From dc66571a66ab790c1ccc9f20082fc819d0961caf Mon Sep 17 00:00:00 2001 From: MilkBlock Date: Sun, 12 Oct 2025 23:46:00 +0800 Subject: [PATCH 1/5] struct LazyMapOfIndexSet: opt DashMap> by allowing lazily insert when read --- core-relations/src/containers/mod.rs | 121 ++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 4 deletions(-) diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index 63ca81f41..3751bfe22 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -12,6 +12,7 @@ use std::{ any::{Any, TypeId}, hash::{Hash, Hasher}, ops::Deref, + sync::{Arc, Mutex}, }; use crate::numeric_id::{DenseIdMap, IdVec, NumericId, define_id}; @@ -201,7 +202,120 @@ struct ContainerEnv { to_id: DashMap, to_container: DashMap, /// Map from a Value to the set of ids of containers that contain that value. + val_index: LazyMapOfIndexSet, +} +#[derive(Clone)] +struct LazyMapOfIndexSet { val_index: DashMap>, + // keys and value to insert + // if user want to insert same value for all keys in IndexSet, LazyMap will put them + // in pending_insert and do the insertion for single key and remove this key in pending_insert when user want to read LazyMap + pending_insert: Arc, Value)>>>, +} + +use dashmap::mapref::one::{Ref, RefMut}; +#[allow(dead_code)] +impl LazyMapOfIndexSet { + /// Creates a new, empty `LazyMapOfIndexSet`. + pub fn new() -> Self { + Self { + val_index: DashMap::default(), + pending_insert: Default::default(), + } + } + + /// Returns the number of elements in the map. + pub fn len(&self) -> usize { + self.val_index.len() + } + + /// Returns `true` if the map contains no elements. + pub fn is_empty(&self) -> bool { + self.val_index.is_empty() + } + + /// Returns the number of elements the map can hold without reallocating. + pub fn capacity(&self) -> usize { + self.val_index.capacity() + } + + /// Inserts a key-value pair into the map. + /// If the map did not have this key present, `None` is returned. + /// If the map did have this key present, the value is updated, and the old value is returned. + pub fn insert(&mut self, key: Value, value: IndexSet) -> Option> { + self.flush_pending_inserts_for_key(&key); + self.val_index.insert(key, value) + } + + /// Removes a key from the map, returning the value at the key if the key was previously in the map. + pub fn remove(&mut self, key: &Value) -> Option> { + self.flush_pending_inserts_for_key(key); + self.val_index.remove(key).map(|(_, v)| v) + } + + /// Removes a key from the map, returning the stored key and value if the key was previously in the map. + pub fn remove_entry(&mut self, key: &Value) -> Option<(Value, IndexSet)> { + self.flush_pending_inserts_for_key(key); + self.val_index.remove(key) + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + pub fn entry(&self, key: Value) -> dashmap::Entry<'_, Value, IndexSet> { + self.flush_pending_inserts_for_key(&key); + self.val_index.entry(key) + } + + /// Returns a reference to the value corresponding to the key. + pub fn get(&mut self, key: &Value) -> Option>> { + self.flush_pending_inserts_for_key(key); + self.val_index.get(key) + } + + /// Returns a mutable reference to the value corresponding to the key. + pub fn get_mut(&mut self, key: &Value) -> Option>> { + self.flush_pending_inserts_for_key(key); + self.val_index.get_mut(key) + } + + /// Returns `true` if the map contains a value for the specified key. + pub fn contains_key(&self, key: &Value) -> bool { + self.val_index.contains_key(key) + } + + /// Lazily inserts a value for all keys in the given index set. + /// The actual insertion will be performed when the map is next accessed. + pub fn lazy_insert_for_all_keys(&self, keys: IndexSet, value: Value) { + self.pending_insert.lock().unwrap().push((keys, value)); + } + + /// Flushes all pending lazy insertions to the underlying map. + fn flush_pending_inserts_for_key(&self, key: &Value) { + let mut pending_insert = self.pending_insert.lock().unwrap(); + if !pending_insert.is_empty() { + for (keys, value) in pending_insert.iter_mut() { + if keys.contains(key) { + self.val_index.entry(*key).or_default().insert(*value); + } + keys.swap_remove(key); + } + } + } + fn flush_pending_inserts(&self) { + let mut pending_insert = self.pending_insert.lock().unwrap(); + if !pending_insert.is_empty() { + for (keys, value) in pending_insert.drain(..) { + for key in keys { + self.val_index.entry(key).or_default().insert(value); + } + } + } + } +} + +impl Default for LazyMapOfIndexSet { + fn default() -> Self { + Self::new() + } } impl DynamicContainerEnv for ContainerEnv { @@ -242,7 +356,7 @@ impl ContainerEnv { counter, to_id: DashMap::default(), to_container: DashMap::default(), - val_index: DashMap::default(), + val_index: Default::default(), } } @@ -274,9 +388,8 @@ impl ContainerEnv { dashmap::Entry::Vacant(vac) => { // Common case: insert the mapping in to_id and update the index. vac.insert(value); - for val in container.iter() { - self.val_index.entry(val).or_default().insert(value); - } + self.val_index + .lazy_insert_for_all_keys(container.iter().collect(), value); value } dashmap::Entry::Occupied(occ) => { From 6d51fd59f0dbe8b887eede4c58208a8bbd9e74aa Mon Sep 17 00:00:00 2001 From: MilkBlock Date: Tue, 14 Oct 2025 11:09:36 +0800 Subject: [PATCH 2/5] support lazy remove --- core-relations/src/containers/mod.rs | 91 ++++++++++++++++------------ 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index 3751bfe22..a380bc784 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -210,7 +210,11 @@ struct LazyMapOfIndexSet { // keys and value to insert // if user want to insert same value for all keys in IndexSet, LazyMap will put them // in pending_insert and do the insertion for single key and remove this key in pending_insert when user want to read LazyMap - pending_insert: Arc, Value)>>>, + pending_operations: Arc, InsertOrRemove)>>>, +} +enum InsertOrRemove { + Insert(Value), + Remove(Value), } use dashmap::mapref::one::{Ref, RefMut}; @@ -220,7 +224,7 @@ impl LazyMapOfIndexSet { pub fn new() -> Self { Self { val_index: DashMap::default(), - pending_insert: Default::default(), + pending_operations: Default::default(), } } @@ -243,37 +247,37 @@ impl LazyMapOfIndexSet { /// If the map did not have this key present, `None` is returned. /// If the map did have this key present, the value is updated, and the old value is returned. pub fn insert(&mut self, key: Value, value: IndexSet) -> Option> { - self.flush_pending_inserts_for_key(&key); + self.flush_pending_operations_for_key(&key); self.val_index.insert(key, value) } /// Removes a key from the map, returning the value at the key if the key was previously in the map. pub fn remove(&mut self, key: &Value) -> Option> { - self.flush_pending_inserts_for_key(key); + self.flush_pending_operations_for_key(key); self.val_index.remove(key).map(|(_, v)| v) } /// Removes a key from the map, returning the stored key and value if the key was previously in the map. pub fn remove_entry(&mut self, key: &Value) -> Option<(Value, IndexSet)> { - self.flush_pending_inserts_for_key(key); + self.flush_pending_operations_for_key(key); self.val_index.remove(key) } /// Gets the given key's corresponding entry in the map for in-place manipulation. pub fn entry(&self, key: Value) -> dashmap::Entry<'_, Value, IndexSet> { - self.flush_pending_inserts_for_key(&key); + self.flush_pending_operations_for_key(&key); self.val_index.entry(key) } /// Returns a reference to the value corresponding to the key. pub fn get(&mut self, key: &Value) -> Option>> { - self.flush_pending_inserts_for_key(key); + self.flush_pending_operations_for_key(key); self.val_index.get(key) } /// Returns a mutable reference to the value corresponding to the key. pub fn get_mut(&mut self, key: &Value) -> Option>> { - self.flush_pending_inserts_for_key(key); + self.flush_pending_operations_for_key(key); self.val_index.get_mut(key) } @@ -285,31 +289,41 @@ impl LazyMapOfIndexSet { /// Lazily inserts a value for all keys in the given index set. /// The actual insertion will be performed when the map is next accessed. pub fn lazy_insert_for_all_keys(&self, keys: IndexSet, value: Value) { - self.pending_insert.lock().unwrap().push((keys, value)); + self.pending_operations + .lock() + .unwrap() + .push((keys, InsertOrRemove::Insert(value))); + } + + /// Lazily removes a value for all keys in the given index set. + pub fn lazy_remove_for_all_keys(&self, keys: IndexSet, value: Value) { + self.pending_operations + .lock() + .unwrap() + .push((keys, InsertOrRemove::Remove(value))); } /// Flushes all pending lazy insertions to the underlying map. - fn flush_pending_inserts_for_key(&self, key: &Value) { - let mut pending_insert = self.pending_insert.lock().unwrap(); - if !pending_insert.is_empty() { - for (keys, value) in pending_insert.iter_mut() { + fn flush_pending_operations_for_key(&self, key: &Value) { + let mut pending_ops = self.pending_operations.lock().unwrap(); + if !pending_ops.is_empty() { + for (keys, op) in pending_ops.iter_mut() { if keys.contains(key) { - self.val_index.entry(*key).or_default().insert(*value); + match op { + InsertOrRemove::Insert(v) => { + self.val_index.entry(*key).or_default().insert(*v); + } + InsertOrRemove::Remove(v) => { + if let Some(mut index) = self.val_index.get_mut(key) { + index.swap_remove(v); + } + } + } } keys.swap_remove(key); } } } - fn flush_pending_inserts(&self) { - let mut pending_insert = self.pending_insert.lock().unwrap(); - if !pending_insert.is_empty() { - for (keys, value) in pending_insert.drain(..) { - for key in keys { - self.val_index.entry(key).or_default().insert(value); - } - } - } - } } impl Default for LazyMapOfIndexSet { @@ -415,18 +429,16 @@ impl ContainerEnv { self.to_container.remove(&old_val); self.to_container.insert(result, (hc as usize, target_map)); *occ.get_mut() = result; - for val in occ.key().iter() { - let mut index = self.val_index.entry(val).or_default(); - index.swap_remove(&old_val); - index.insert(result); - } + self.val_index + .lazy_remove_for_all_keys(occ.key().iter().collect(), old_val); + self.val_index + .lazy_insert_for_all_keys(occ.key().iter().collect(), result); } } dashmap::Entry::Vacant(vacant_entry) => { self.to_container.insert(value, (hc as usize, target_map)); - for val in vacant_entry.key().iter() { - self.val_index.entry(val).or_default().insert(value); - } + self.val_index + .lazy_insert_for_all_keys(vacant_entry.key().iter().collect(), value); vacant_entry.insert(value); } } @@ -613,18 +625,17 @@ impl ContainerEnv { self.to_container.remove(&old_val); self.to_container.insert(result, (hc as usize, target_map)); *val_slot.get_mut() = result; - for val in container.iter() { - let mut index = self.val_index.entry(val).or_default(); - index.swap_remove(&old_val); - index.insert(result); - } + self.val_index + .lazy_remove_for_all_keys(container.iter().collect(), old_val); + self.val_index + .lazy_insert_for_all_keys(container.iter().collect(), result); } } Err(slot) => { self.to_container.insert(val, (hc as usize, target_map)); - for v in container.iter() { - self.val_index.entry(v).or_default().insert(val); - } + self.val_index + .lazy_insert_for_all_keys(container.iter().collect(), val); + // SAFETY: We just got this slot from `find_or_find_insert_slot` // and we have not mutated the map at all since then. unsafe { From 38a041da2f218be96d95b6ae97b99c895ae9f561 Mon Sep 17 00:00:00 2001 From: MilkBlock Date: Tue, 14 Oct 2025 12:48:44 +0800 Subject: [PATCH 3/5] with LAZY_BOUND --- core-relations/src/containers/mod.rs | 50 ++++++++++++++++++---------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index a380bc784..9c8e509bf 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -217,6 +217,7 @@ enum InsertOrRemove { Remove(Value), } +const LAZY_BOUND: usize = 30; use dashmap::mapref::one::{Ref, RefMut}; #[allow(dead_code)] impl LazyMapOfIndexSet { @@ -288,19 +289,34 @@ impl LazyMapOfIndexSet { /// Lazily inserts a value for all keys in the given index set. /// The actual insertion will be performed when the map is next accessed. - pub fn lazy_insert_for_all_keys(&self, keys: IndexSet, value: Value) { - self.pending_operations - .lock() - .unwrap() - .push((keys, InsertOrRemove::Insert(value))); + pub fn insert_for_all_keys(&self, keys: IndexSet, value: Value) { + println!("insert keys_len: {}", keys.len()); + if keys.len() < LAZY_BOUND { + for key in keys { + self.val_index.entry(key).or_default().insert(value); + } + } else { + self.pending_operations + .lock() + .unwrap() + .push((keys, InsertOrRemove::Insert(value))); + } } /// Lazily removes a value for all keys in the given index set. - pub fn lazy_remove_for_all_keys(&self, keys: IndexSet, value: Value) { - self.pending_operations - .lock() - .unwrap() - .push((keys, InsertOrRemove::Remove(value))); + pub fn remove_for_all_keys(&self, keys: IndexSet, value: Value) { + if keys.len() < LAZY_BOUND { + for key in keys { + if let Some(mut index) = self.val_index.get_mut(&key) { + index.swap_remove(&value); + } + } + } else { + self.pending_operations + .lock() + .unwrap() + .push((keys, InsertOrRemove::Remove(value))); + } } /// Flushes all pending lazy insertions to the underlying map. @@ -403,7 +419,7 @@ impl ContainerEnv { // Common case: insert the mapping in to_id and update the index. vac.insert(value); self.val_index - .lazy_insert_for_all_keys(container.iter().collect(), value); + .insert_for_all_keys(container.iter().collect(), value); value } dashmap::Entry::Occupied(occ) => { @@ -430,15 +446,15 @@ impl ContainerEnv { self.to_container.insert(result, (hc as usize, target_map)); *occ.get_mut() = result; self.val_index - .lazy_remove_for_all_keys(occ.key().iter().collect(), old_val); + .remove_for_all_keys(occ.key().iter().collect(), old_val); self.val_index - .lazy_insert_for_all_keys(occ.key().iter().collect(), result); + .insert_for_all_keys(occ.key().iter().collect(), result); } } dashmap::Entry::Vacant(vacant_entry) => { self.to_container.insert(value, (hc as usize, target_map)); self.val_index - .lazy_insert_for_all_keys(vacant_entry.key().iter().collect(), value); + .insert_for_all_keys(vacant_entry.key().iter().collect(), value); vacant_entry.insert(value); } } @@ -626,15 +642,15 @@ impl ContainerEnv { self.to_container.insert(result, (hc as usize, target_map)); *val_slot.get_mut() = result; self.val_index - .lazy_remove_for_all_keys(container.iter().collect(), old_val); + .remove_for_all_keys(container.iter().collect(), old_val); self.val_index - .lazy_insert_for_all_keys(container.iter().collect(), result); + .insert_for_all_keys(container.iter().collect(), result); } } Err(slot) => { self.to_container.insert(val, (hc as usize, target_map)); self.val_index - .lazy_insert_for_all_keys(container.iter().collect(), val); + .insert_for_all_keys(container.iter().collect(), val); // SAFETY: We just got this slot from `find_or_find_insert_slot` // and we have not mutated the map at all since then. From d10d939c76c42903eb412809e5016121d7da8d90 Mon Sep 17 00:00:00 2001 From: MilkBlock Date: Tue, 14 Oct 2025 12:51:39 +0800 Subject: [PATCH 4/5] nit --- core-relations/src/containers/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index 9c8e509bf..bbf88d59e 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -290,7 +290,6 @@ impl LazyMapOfIndexSet { /// Lazily inserts a value for all keys in the given index set. /// The actual insertion will be performed when the map is next accessed. pub fn insert_for_all_keys(&self, keys: IndexSet, value: Value) { - println!("insert keys_len: {}", keys.len()); if keys.len() < LAZY_BOUND { for key in keys { self.val_index.entry(key).or_default().insert(value); From 8d6167d25cc834b3d0c132d87363c59e177acb6b Mon Sep 17 00:00:00 2001 From: MilkBlock Date: Thu, 23 Oct 2025 19:19:24 +0800 Subject: [PATCH 5/5] rename & clear empty set & swith impl to hashset --- core-relations/src/containers/mod.rs | 112 ++++++++++----------------- 1 file changed, 40 insertions(+), 72 deletions(-) diff --git a/core-relations/src/containers/mod.rs b/core-relations/src/containers/mod.rs index bbf88d59e..41c9b112c 100644 --- a/core-relations/src/containers/mod.rs +++ b/core-relations/src/containers/mod.rs @@ -18,6 +18,7 @@ use std::{ use crate::numeric_id::{DenseIdMap, IdVec, NumericId, define_id}; use crossbeam_queue::SegQueue; use dashmap::SharedValue; +use hashbrown::HashSet; use rayon::{ iter::{ParallelBridge, ParallelIterator}, prelude::*, @@ -202,15 +203,15 @@ struct ContainerEnv { to_id: DashMap, to_container: DashMap, /// Map from a Value to the set of ids of containers that contain that value. - val_index: LazyMapOfIndexSet, + val_index: LazyContainerIdx, } #[derive(Clone)] -struct LazyMapOfIndexSet { - val_index: DashMap>, +struct LazyContainerIdx { + val_index: DashMap>, // keys and value to insert // if user want to insert same value for all keys in IndexSet, LazyMap will put them // in pending_insert and do the insertion for single key and remove this key in pending_insert when user want to read LazyMap - pending_operations: Arc, InsertOrRemove)>>>, + pending_operations: Arc, InsertOrRemove)>>>, } enum InsertOrRemove { Insert(Value), @@ -218,9 +219,8 @@ enum InsertOrRemove { } const LAZY_BOUND: usize = 30; -use dashmap::mapref::one::{Ref, RefMut}; -#[allow(dead_code)] -impl LazyMapOfIndexSet { +use dashmap::mapref::one::Ref; +impl LazyContainerIdx { /// Creates a new, empty `LazyMapOfIndexSet`. pub fn new() -> Self { Self { @@ -229,67 +229,15 @@ impl LazyMapOfIndexSet { } } - /// Returns the number of elements in the map. - pub fn len(&self) -> usize { - self.val_index.len() - } - - /// Returns `true` if the map contains no elements. - pub fn is_empty(&self) -> bool { - self.val_index.is_empty() - } - - /// Returns the number of elements the map can hold without reallocating. - pub fn capacity(&self) -> usize { - self.val_index.capacity() - } - - /// Inserts a key-value pair into the map. - /// If the map did not have this key present, `None` is returned. - /// If the map did have this key present, the value is updated, and the old value is returned. - pub fn insert(&mut self, key: Value, value: IndexSet) -> Option> { - self.flush_pending_operations_for_key(&key); - self.val_index.insert(key, value) - } - - /// Removes a key from the map, returning the value at the key if the key was previously in the map. - pub fn remove(&mut self, key: &Value) -> Option> { - self.flush_pending_operations_for_key(key); - self.val_index.remove(key).map(|(_, v)| v) - } - - /// Removes a key from the map, returning the stored key and value if the key was previously in the map. - pub fn remove_entry(&mut self, key: &Value) -> Option<(Value, IndexSet)> { - self.flush_pending_operations_for_key(key); - self.val_index.remove(key) - } - - /// Gets the given key's corresponding entry in the map for in-place manipulation. - pub fn entry(&self, key: Value) -> dashmap::Entry<'_, Value, IndexSet> { - self.flush_pending_operations_for_key(&key); - self.val_index.entry(key) - } - /// Returns a reference to the value corresponding to the key. - pub fn get(&mut self, key: &Value) -> Option>> { + pub fn get(&mut self, key: &Value) -> Option>> { self.flush_pending_operations_for_key(key); self.val_index.get(key) } - /// Returns a mutable reference to the value corresponding to the key. - pub fn get_mut(&mut self, key: &Value) -> Option>> { - self.flush_pending_operations_for_key(key); - self.val_index.get_mut(key) - } - - /// Returns `true` if the map contains a value for the specified key. - pub fn contains_key(&self, key: &Value) -> bool { - self.val_index.contains_key(key) - } - /// Lazily inserts a value for all keys in the given index set. /// The actual insertion will be performed when the map is next accessed. - pub fn insert_for_all_keys(&self, keys: IndexSet, value: Value) { + pub fn insert_for_all_keys(&self, keys: HashSet, value: Value) { if keys.len() < LAZY_BOUND { for key in keys { self.val_index.entry(key).or_default().insert(value); @@ -303,11 +251,11 @@ impl LazyMapOfIndexSet { } /// Lazily removes a value for all keys in the given index set. - pub fn remove_for_all_keys(&self, keys: IndexSet, value: Value) { + pub fn remove_for_all_keys(&self, keys: HashSet, value: Value) { if keys.len() < LAZY_BOUND { for key in keys { - if let Some(mut index) = self.val_index.get_mut(&key) { - index.swap_remove(&value); + if let Some(mut pending_keys) = self.val_index.get_mut(&key) { + pending_keys.remove(&value); } } } else { @@ -321,27 +269,47 @@ impl LazyMapOfIndexSet { /// Flushes all pending lazy insertions to the underlying map. fn flush_pending_operations_for_key(&self, key: &Value) { let mut pending_ops = self.pending_operations.lock().unwrap(); - if !pending_ops.is_empty() { - for (keys, op) in pending_ops.iter_mut() { - if keys.contains(key) { + let mut flush_whole_set = false; + for (keys, op) in pending_ops.iter_mut() { + if keys.contains(key) { + // if the length of keys set is less than LAZY_BOUND just flush the whole set. + if keys.len() < LAZY_BOUND { + // flush all keys in set + for key in keys.iter() { + match op { + InsertOrRemove::Insert(v) => { + self.val_index.entry(*key).or_default().insert(*v); + } + InsertOrRemove::Remove(v) => { + if let Some(mut pending_keys) = self.val_index.get_mut(key) { + pending_keys.remove(v); + } + } + } + } + flush_whole_set = true; + } else { match op { InsertOrRemove::Insert(v) => { self.val_index.entry(*key).or_default().insert(*v); } InsertOrRemove::Remove(v) => { - if let Some(mut index) = self.val_index.get_mut(key) { - index.swap_remove(v); + if let Some(mut pending_keys) = self.val_index.get_mut(key) { + pending_keys.remove(v); } } } + keys.remove(key); } - keys.swap_remove(key); } } + if flush_whole_set { + pending_ops.retain(|(keys, _ops)| !keys.is_empty()); + } } } -impl Default for LazyMapOfIndexSet { +impl Default for LazyContainerIdx { fn default() -> Self { Self::new() } @@ -489,7 +457,7 @@ impl ContainerEnv { let Some(ids) = self.val_index.get(&row[0]) else { continue; }; - to_rebuild.extend(&*ids); + to_rebuild.extend(ids.iter()); } for id in to_rebuild { let Some((hc, target_map)) = self.to_container.get(&id).map(|x| *x) else {