Skip to content

Commit 5c47d0b

Browse files
authored
Rollup merge of #152418 - asder8215:btreemap_merge_optimized, r=Mark-Simulacrum
`BTreeMap::merge` optimized This is an optimized version of #151981. See [ACP](rust-lang/libs-team#739 (comment)) for more information on `BTreeMap::merge` does. CC @programmerjake. Let me know what you think of how I'm using `CursorMut` and `IntoIter` here and whether the unsafe code here looks good. I decided to use `ptr::read()` and `ptr::write()` to grab the value from `CursorMut` as `V` than `&mut V`, use it within the `conflict` function, and overwrite the content of conflicting key afterward. I know this needs some polishing, especially with refactoring some redundant looking code in a nicer way, some of which could probably just be public API methods for `CursorMut`. It does pass all the tests that I currently have for `BTreeMap::merge` (inspired from `BTreeMap::append`) though, so that's good.
2 parents 5d3f80a + cbdcfca commit 5c47d0b

File tree

3 files changed

+338
-1
lines changed

3 files changed

+338
-1
lines changed

library/alloc/src/collections/btree/map.rs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,6 +1240,162 @@ impl<K, V, A: Allocator + Clone> BTreeMap<K, V, A> {
12401240
)
12411241
}
12421242

1243+
/// Moves all elements from `other` into `self`, leaving `other` empty.
1244+
///
1245+
/// If a key from `other` is already present in `self`, then the `conflict`
1246+
/// closure is used to return a value to `self`. The `conflict`
1247+
/// closure takes in a borrow of `self`'s key, `self`'s value, and `other`'s value
1248+
/// in that order.
1249+
///
1250+
/// An example of why one might use this method over [`append`]
1251+
/// is to combine `self`'s value with `other`'s value when their keys conflict.
1252+
///
1253+
/// Similar to [`insert`], though, the key is not overwritten,
1254+
/// which matters for types that can be `==` without being identical.
1255+
///
1256+
/// [`insert`]: BTreeMap::insert
1257+
/// [`append`]: BTreeMap::append
1258+
///
1259+
/// # Examples
1260+
///
1261+
/// ```
1262+
/// #![feature(btree_merge)]
1263+
/// use std::collections::BTreeMap;
1264+
///
1265+
/// let mut a = BTreeMap::new();
1266+
/// a.insert(1, String::from("a"));
1267+
/// a.insert(2, String::from("b"));
1268+
/// a.insert(3, String::from("c")); // Note: Key (3) also present in b.
1269+
///
1270+
/// let mut b = BTreeMap::new();
1271+
/// b.insert(3, String::from("d")); // Note: Key (3) also present in a.
1272+
/// b.insert(4, String::from("e"));
1273+
/// b.insert(5, String::from("f"));
1274+
///
1275+
/// // concatenate a's value and b's value
1276+
/// a.merge(b, |_, a_val, b_val| {
1277+
/// format!("{a_val}{b_val}")
1278+
/// });
1279+
///
1280+
/// assert_eq!(a.len(), 5); // all of b's keys in a
1281+
///
1282+
/// assert_eq!(a[&1], "a");
1283+
/// assert_eq!(a[&2], "b");
1284+
/// assert_eq!(a[&3], "cd"); // Note: "c" has been combined with "d".
1285+
/// assert_eq!(a[&4], "e");
1286+
/// assert_eq!(a[&5], "f");
1287+
/// ```
1288+
#[unstable(feature = "btree_merge", issue = "152152")]
1289+
pub fn merge(&mut self, mut other: Self, mut conflict: impl FnMut(&K, V, V) -> V)
1290+
where
1291+
K: Ord,
1292+
A: Clone,
1293+
{
1294+
// Do we have to append anything at all?
1295+
if other.is_empty() {
1296+
return;
1297+
}
1298+
1299+
// We can just swap `self` and `other` if `self` is empty.
1300+
if self.is_empty() {
1301+
mem::swap(self, &mut other);
1302+
return;
1303+
}
1304+
1305+
let mut other_iter = other.into_iter();
1306+
let (first_other_key, first_other_val) = other_iter.next().unwrap();
1307+
1308+
// find the first gap that has the smallest key greater than or equal to
1309+
// the first key from other
1310+
let mut self_cursor = self.lower_bound_mut(Bound::Included(&first_other_key));
1311+
1312+
if let Some((self_key, _)) = self_cursor.peek_next() {
1313+
match K::cmp(self_key, &first_other_key) {
1314+
Ordering::Equal => {
1315+
// if `f` unwinds, the next entry is already removed leaving
1316+
// the tree in valid state.
1317+
// FIXME: Once `MaybeDangling` is implemented, we can optimize
1318+
// this through using a drop handler and transmutating CursorMutKey<K, V>
1319+
// to CursorMutKey<ManuallyDrop<K>, ManuallyDrop<V>> (see PR #152418)
1320+
if let Some((k, v)) = self_cursor.remove_next() {
1321+
// SAFETY: we remove the K, V out of the next entry,
1322+
// apply 'f' to get a new (K, V), and insert it back
1323+
// into the next entry that the cursor is pointing at
1324+
let v = conflict(&k, v, first_other_val);
1325+
unsafe { self_cursor.insert_after_unchecked(k, v) };
1326+
}
1327+
}
1328+
Ordering::Greater =>
1329+
// SAFETY: we know our other_key's ordering is less than self_key,
1330+
// so inserting before will guarantee sorted order
1331+
unsafe {
1332+
self_cursor.insert_before_unchecked(first_other_key, first_other_val);
1333+
},
1334+
Ordering::Less => {
1335+
unreachable!("Cursor's peek_next should return None.");
1336+
}
1337+
}
1338+
} else {
1339+
// SAFETY: reaching here means our cursor is at the end
1340+
// self BTreeMap so we just insert other_key here
1341+
unsafe {
1342+
self_cursor.insert_before_unchecked(first_other_key, first_other_val);
1343+
}
1344+
}
1345+
1346+
for (other_key, other_val) in other_iter {
1347+
loop {
1348+
if let Some((self_key, _)) = self_cursor.peek_next() {
1349+
match K::cmp(self_key, &other_key) {
1350+
Ordering::Equal => {
1351+
// if `f` unwinds, the next entry is already removed leaving
1352+
// the tree in valid state.
1353+
// FIXME: Once `MaybeDangling` is implemented, we can optimize
1354+
// this through using a drop handler and transmutating CursorMutKey<K, V>
1355+
// to CursorMutKey<ManuallyDrop<K>, ManuallyDrop<V>> (see PR #152418)
1356+
if let Some((k, v)) = self_cursor.remove_next() {
1357+
// SAFETY: we remove the K, V out of the next entry,
1358+
// apply 'f' to get a new (K, V), and insert it back
1359+
// into the next entry that the cursor is pointing at
1360+
let v = conflict(&k, v, other_val);
1361+
unsafe { self_cursor.insert_after_unchecked(k, v) };
1362+
}
1363+
break;
1364+
}
1365+
Ordering::Greater => {
1366+
// SAFETY: we know our self_key's ordering is greater than other_key,
1367+
// so inserting before will guarantee sorted order
1368+
unsafe {
1369+
self_cursor.insert_before_unchecked(other_key, other_val);
1370+
}
1371+
break;
1372+
}
1373+
Ordering::Less => {
1374+
// FIXME: instead of doing a linear search here,
1375+
// this can be optimized to search the tree by starting
1376+
// from self_cursor and going towards the root and then
1377+
// back down to the proper node -- that should probably
1378+
// be a new method on Cursor*.
1379+
self_cursor.next();
1380+
}
1381+
}
1382+
} else {
1383+
// FIXME: If we get here, that means all of other's keys are greater than
1384+
// self's keys. For performance, this should really do a bulk insertion of items
1385+
// from other_iter into the end of self `BTreeMap`. Maybe this should be
1386+
// a method for Cursor*?
1387+
1388+
// SAFETY: reaching here means our cursor is at the end
1389+
// self BTreeMap so we just insert other_key here
1390+
unsafe {
1391+
self_cursor.insert_before_unchecked(other_key, other_val);
1392+
}
1393+
break;
1394+
}
1395+
}
1396+
}
1397+
}
1398+
12431399
/// Constructs a double-ended iterator over a sub-range of elements in the map.
12441400
/// The simplest way is to use the range syntax `min..max`, thus `range(min..max)` will
12451401
/// yield elements from min (inclusive) to max (exclusive).

library/alloc/src/collections/btree/map/tests.rs

Lines changed: 181 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
use core::assert_matches;
2-
use std::iter;
32
use std::ops::Bound::{Excluded, Included, Unbounded};
43
use std::panic::{AssertUnwindSafe, catch_unwind};
54
use std::sync::atomic::AtomicUsize;
65
use std::sync::atomic::Ordering::SeqCst;
6+
use std::{cmp, iter};
77

88
use super::*;
99
use crate::boxed::Box;
@@ -2128,6 +2128,86 @@ create_append_test!(test_append_239, 239);
21282128
#[cfg(not(miri))] // Miri is too slow
21292129
create_append_test!(test_append_1700, 1700);
21302130

2131+
// a inserts (0, 0)..(8, 8) to its own tree
2132+
// b inserts (5, 5 * 2)..($len, 2 * $len) to its own tree
2133+
// note that between a and b, there are duplicate keys
2134+
// between 5..min($len, 8), so on merge we add the values
2135+
// of these keys together
2136+
// we check that:
2137+
// - the merged tree 'a' has a length of max(8, $len)
2138+
// - all keys in 'a' have the correct value associated
2139+
// - removing and inserting an element into the merged
2140+
// tree 'a' still keeps it in valid tree form
2141+
macro_rules! create_merge_test {
2142+
($name:ident, $len:expr) => {
2143+
#[test]
2144+
fn $name() {
2145+
let mut a = BTreeMap::new();
2146+
for i in 0..8 {
2147+
a.insert(i, i);
2148+
}
2149+
2150+
let mut b = BTreeMap::new();
2151+
for i in 5..$len {
2152+
b.insert(i, 2 * i);
2153+
}
2154+
2155+
a.merge(b, |_, a_val, b_val| a_val + b_val);
2156+
2157+
assert_eq!(a.len(), cmp::max($len, 8));
2158+
2159+
for i in 0..cmp::max($len, 8) {
2160+
if i < 5 {
2161+
assert_eq!(a[&i], i);
2162+
} else {
2163+
if i < cmp::min($len, 8) {
2164+
assert_eq!(a[&i], i + 2 * i);
2165+
} else if i >= $len {
2166+
assert_eq!(a[&i], i);
2167+
} else {
2168+
assert_eq!(a[&i], 2 * i);
2169+
}
2170+
}
2171+
}
2172+
2173+
a.check();
2174+
assert_eq!(
2175+
a.remove(&($len - 1)),
2176+
if $len >= 5 && $len < 8 {
2177+
Some(($len - 1) + 2 * ($len - 1))
2178+
} else {
2179+
Some(2 * ($len - 1))
2180+
}
2181+
);
2182+
assert_eq!(a.insert($len - 1, 20), None);
2183+
a.check();
2184+
}
2185+
};
2186+
}
2187+
2188+
// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
2189+
// Single node, merge conflicting key values.
2190+
create_merge_test!(test_merge_7, 7);
2191+
// Single node.
2192+
create_merge_test!(test_merge_9, 9);
2193+
// Two leafs that don't need fixing.
2194+
create_merge_test!(test_merge_17, 17);
2195+
// Two leafs where the second one ends up underfull and needs stealing at the end.
2196+
create_merge_test!(test_merge_14, 14);
2197+
// Two leafs where the second one ends up empty because the insertion finished at the root.
2198+
create_merge_test!(test_merge_12, 12);
2199+
// Three levels; insertion finished at the root.
2200+
create_merge_test!(test_merge_144, 144);
2201+
// Three levels; insertion finished at leaf while there is an empty node on the second level.
2202+
create_merge_test!(test_merge_145, 145);
2203+
// Tests for several randomly chosen sizes.
2204+
create_merge_test!(test_merge_170, 170);
2205+
create_merge_test!(test_merge_181, 181);
2206+
#[cfg(not(miri))] // Miri is too slow
2207+
create_merge_test!(test_merge_239, 239);
2208+
#[cfg(not(miri))] // Miri is too slow
2209+
create_merge_test!(test_merge_1700, 1700);
2210+
21312211
#[test]
21322212
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
21332213
fn test_append_drop_leak() {
@@ -2169,6 +2249,84 @@ fn test_append_ord_chaos() {
21692249
map2.check();
21702250
}
21712251

2252+
#[test]
2253+
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
2254+
fn test_merge_drop_leak() {
2255+
let a = CrashTestDummy::new(0);
2256+
let b = CrashTestDummy::new(1);
2257+
let c = CrashTestDummy::new(2);
2258+
let mut left = BTreeMap::new();
2259+
let mut right = BTreeMap::new();
2260+
left.insert(a.spawn(Panic::Never), ());
2261+
left.insert(b.spawn(Panic::Never), ());
2262+
left.insert(c.spawn(Panic::Never), ());
2263+
right.insert(b.spawn(Panic::InDrop), ()); // first duplicate key, dropped during merge
2264+
right.insert(c.spawn(Panic::Never), ());
2265+
2266+
catch_unwind(move || left.merge(right, |_, _, _| ())).unwrap_err();
2267+
assert_eq!(a.dropped(), 1); // this should not be dropped
2268+
assert_eq!(b.dropped(), 2); // key is dropped on panic
2269+
assert_eq!(c.dropped(), 2); // key is dropped on panic
2270+
}
2271+
2272+
#[test]
2273+
#[cfg_attr(not(panic = "unwind"), ignore = "test requires unwinding support")]
2274+
fn test_merge_conflict_drop_leak() {
2275+
let a = CrashTestDummy::new(0);
2276+
let a_val_left = CrashTestDummy::new(0);
2277+
2278+
let b = CrashTestDummy::new(1);
2279+
let b_val_left = CrashTestDummy::new(1);
2280+
let b_val_right = CrashTestDummy::new(1);
2281+
2282+
let c = CrashTestDummy::new(2);
2283+
let c_val_left = CrashTestDummy::new(2);
2284+
let c_val_right = CrashTestDummy::new(2);
2285+
2286+
let mut left = BTreeMap::new();
2287+
let mut right = BTreeMap::new();
2288+
2289+
left.insert(a.spawn(Panic::Never), a_val_left.spawn(Panic::Never));
2290+
left.insert(b.spawn(Panic::Never), b_val_left.spawn(Panic::Never));
2291+
left.insert(c.spawn(Panic::Never), c_val_left.spawn(Panic::Never));
2292+
right.insert(b.spawn(Panic::Never), b_val_right.spawn(Panic::Never));
2293+
right.insert(c.spawn(Panic::Never), c_val_right.spawn(Panic::Never));
2294+
2295+
// First key that conflicts should
2296+
catch_unwind(move || {
2297+
left.merge(right, |_, _, _| panic!("Panic in conflict function"));
2298+
assert_eq!(left.len(), 1); // only 1 entry should be left
2299+
})
2300+
.unwrap_err();
2301+
assert_eq!(a.dropped(), 1); // should not panic
2302+
assert_eq!(a_val_left.dropped(), 1); // should not panic
2303+
assert_eq!(b.dropped(), 2); // should drop from panic (conflict)
2304+
assert_eq!(b_val_left.dropped(), 1); // should be 2 were it not for Rust issue #47949
2305+
assert_eq!(b_val_right.dropped(), 1); // should be 2 were it not for Rust issue #47949
2306+
assert_eq!(c.dropped(), 2); // should drop from panic (conflict)
2307+
assert_eq!(c_val_left.dropped(), 1); // should be 2 were it not for Rust issue #47949
2308+
assert_eq!(c_val_right.dropped(), 1); // should be 2 were it not for Rust issue #47949
2309+
}
2310+
2311+
#[test]
2312+
fn test_merge_ord_chaos() {
2313+
let mut map1 = BTreeMap::new();
2314+
map1.insert(Cyclic3::A, ());
2315+
map1.insert(Cyclic3::B, ());
2316+
let mut map2 = BTreeMap::new();
2317+
map2.insert(Cyclic3::A, ());
2318+
map2.insert(Cyclic3::B, ());
2319+
map2.insert(Cyclic3::C, ()); // lands first, before A
2320+
map2.insert(Cyclic3::B, ()); // lands first, before C
2321+
map1.check();
2322+
map2.check(); // keys are not unique but still strictly ascending
2323+
assert_eq!(map1.len(), 2);
2324+
assert_eq!(map2.len(), 4);
2325+
map1.merge(map2, |_, _, _| ());
2326+
assert_eq!(map1.len(), 5);
2327+
map1.check();
2328+
}
2329+
21722330
fn rand_data(len: usize) -> Vec<(u32, u32)> {
21732331
let mut rng = DeterministicRng::new();
21742332
Vec::from_iter((0..len).map(|_| (rng.next(), rng.next())))
@@ -2615,3 +2773,25 @@ fn test_id_based_append() {
26152773

26162774
assert_eq!(lhs.pop_first().unwrap().0.name, "lhs_k".to_string());
26172775
}
2776+
2777+
#[test]
2778+
fn test_id_based_merge() {
2779+
let mut lhs = BTreeMap::new();
2780+
let mut rhs = BTreeMap::new();
2781+
2782+
lhs.insert(IdBased { id: 0, name: "lhs_k".to_string() }, "1".to_string());
2783+
rhs.insert(IdBased { id: 0, name: "rhs_k".to_string() }, "2".to_string());
2784+
2785+
lhs.merge(rhs, |_, mut lhs_val, rhs_val| {
2786+
// confirming that lhs_val comes from lhs tree,
2787+
// rhs_val comes from rhs tree
2788+
assert_eq!(lhs_val, String::from("1"));
2789+
assert_eq!(rhs_val, String::from("2"));
2790+
lhs_val.push_str(&rhs_val);
2791+
lhs_val
2792+
});
2793+
2794+
let merged_kv_pair = lhs.pop_first().unwrap();
2795+
assert_eq!(merged_kv_pair.0.id, 0);
2796+
assert_eq!(merged_kv_pair.0.name, "lhs_k".to_string());
2797+
}

library/alloctests/tests/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![feature(allocator_api)]
22
#![feature(binary_heap_pop_if)]
3+
#![feature(btree_merge)]
34
#![feature(const_heap)]
45
#![feature(deque_extend_front)]
56
#![feature(iter_array_chunks)]

0 commit comments

Comments
 (0)