From 76ddbe79442b10c420d8a59f0b55b8ebac62ff84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Fiala?= Date: Tue, 8 Apr 2025 23:58:15 +0200 Subject: [PATCH 1/2] Add a bump interner --- Cargo.toml | 2 + src/bump.rs | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 6 +++ 3 files changed, 150 insertions(+) create mode 100644 src/bump.rs diff --git a/Cargo.toml b/Cargo.toml index a4da7da..4d3b2b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ maintenance = { status = "actively-developed" } [dependencies] ahash = { version = "0.8.5", optional = true } +bumpalo = { version = "3.17.0", optional = true } dashmap = { version = "5.4.0", optional = true } once_cell = { version = "1.4", optional = true } tinyset = { version = "0.4.2", optional = true } @@ -52,6 +53,7 @@ portable-atomic = { version = "1", default-features = false, optional = true } arc = ["std", "dep:ahash", "dep:dashmap", "dep:once_cell"] bench = ["arc", "arena", "_experimental-new-intern", "dep:memorable-wordlist"] arena = ["alloc"] +bump = ["alloc", "dep:bumpalo"] intern = ["alloc"] default = ["intern", "std"] _experimental-new-intern = ["alloc", "dep:append-only-vec"] diff --git a/src/bump.rs b/src/bump.rs new file mode 100644 index 0000000..3a2a17e --- /dev/null +++ b/src/bump.rs @@ -0,0 +1,142 @@ +use core::{ + borrow::Borrow, + cell::Cell, + hash::{BuildHasher, Hash, Hasher}, + ptr::NonNull, +}; +use hashbrown::{hash_map::RawEntryMut, HashMap}; + +#[cfg(test)] +use std::println; + +/// A bump-arena for storing interned data +/// +/// You can use an `Bump` to intern data of type `T`. This data is then +/// freed when the `Bump` is dropped. +/// +/// # Example +/// ```rust +/// use internment::Bump; +/// let arena: Bump<&'static str> = Bump::new(); +/// let x = arena.intern("hello"); +/// let y = arena.intern("world"); +/// assert_ne!(x, y); +/// println!("The conventional greeting is '{} {}'", x, y); +/// ``` + +#[cfg_attr(docsrs, doc(cfg(feature = "bump")))] +pub struct Bump { + arena: bumpalo::Bump, + interner: Cell, (), S>>, +} + +impl Bump { + /// Allocate a new `Bump` + #[inline] + pub fn new() -> Self { + Bump { + arena: bumpalo::Bump::new(), + interner: Default::default(), + } + } +} +impl Bump { + /// Intern a value. + /// + /// If this value has not previously been interned, then `intern` will + /// allocate a spot for the value on the heap. Otherwise, it will return a + /// pointer to the object previously allocated. + pub fn intern(&self, val: T) -> &T { + let mut interner = self.interner.take(); + let entry = interner.raw_entry_mut().from_key(&val); + let r = match entry { + RawEntryMut::Vacant(v) => { + let r = &*self.arena.alloc(val); + v.insert(Interned(NonNull::from(r)), ()); + r + } + RawEntryMut::Occupied(o) => { + let key = o.key(); + // SAFETY: We are creating a ref with the same lifetime as + // `&self` (the enclosing `Bump`). + unsafe { key.deref() } + } + }; + self.interner.set(interner); + r + } +} + +impl Default for Bump { + #[inline] + fn default() -> Self { + Self::new() + } +} + +// Essentially a `&'static T` reference to a value allocated in the `Bump` +// arena. Always safe to deref, but any `&'a T` reference lifetime must be +// linked to the lifetime of the `Bump` arena (i.e. the lifetime of this +// `Interned`). +struct Interned(NonNull); + +impl Interned { + /// SAFETY: always safe to call, however if the lifetime of the resulting + /// reference must be shorter than the lifetime of the enclosing `Bump`. + unsafe fn deref<'a>(&self) -> &'a T { + unsafe { self.0.as_ref() } + } + + fn borrow(&self) -> &T { + // SAFETY: The `self: Interned` only exists in the `interner` field. Any + // (lifetime) reference to it must live as long as the containing + // `Bump`. This means that the `arena` field must also be live for this + // duration. Therefore it is safe to hand out a ref to a `T` in that + // arena. + unsafe { self.deref() } + } +} + +impl Borrow for Interned { + fn borrow(&self) -> &T { + self.borrow() + } +} + +impl PartialEq for Interned { + fn eq(&self, other: &Self) -> bool { + self.borrow() == other.borrow() + } +} +impl Eq for Interned {} + +impl Hash for Interned { + fn hash(&self, state: &mut H) { + self.borrow().hash(state); + } +} + +#[test] +fn eq_string() { + let arena = Bump::<&'static str>::new(); + assert_eq!( + arena.intern("hello") as *const _, + arena.intern("hello") as *const _ + ); + assert_ne!( + arena.intern("goodbye") as *const _, + arena.intern("farewell") as *const _ + ); +} +#[test] +fn display() { + let arena = Bump::<&'static str>::new(); + let world = arena.intern("world"); + println!("Hello {}", world); +} +#[test] +fn debug() { + let arena = Bump::<&'static str>::new(); + let world = arena.intern("world"); + println!("Hello {:?}", world); +} diff --git a/src/lib.rs b/src/lib.rs index 0854f6f..89f398b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,6 +79,12 @@ pub use arena::Arena; #[cfg(feature = "arena")] pub use arena::ArenaIntern; +#[cfg(feature = "bump")] +mod bump; + +#[cfg(feature = "bump")] +pub use bump::Bump; + #[cfg(feature = "arc")] mod arc; #[cfg(feature = "arc")] From 447adcf45502cc7f39f674d8b8b398c6d22dd806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jon=C3=A1=C5=A1=20Fiala?= Date: Wed, 9 Apr 2025 01:21:33 +0200 Subject: [PATCH 2/2] Add `intern_ref` --- src/bump.rs | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/bump.rs b/src/bump.rs index 3a2a17e..11854dd 100644 --- a/src/bump.rs +++ b/src/bump.rs @@ -47,11 +47,23 @@ impl Bump { /// allocate a spot for the value on the heap. Otherwise, it will return a /// pointer to the object previously allocated. pub fn intern(&self, val: T) -> &T { + self.intern_inner(val) + } + + /// Same as [`intern`](Self::intern), but does not clone if the value is already interned. + pub fn intern_ref(&self, val: &T) -> &T + where + T: Clone, + { + self.intern_inner(val) + } + + fn intern_inner>(&self, val: Q) -> &T { let mut interner = self.interner.take(); - let entry = interner.raw_entry_mut().from_key(&val); + let entry = interner.raw_entry_mut().from_key(val.borrow()); let r = match entry { RawEntryMut::Vacant(v) => { - let r = &*self.arena.alloc(val); + let r = &*self.arena.alloc(val.to_owned()); v.insert(Interned(NonNull::from(r)), ()); r } @@ -74,6 +86,24 @@ impl Default for Bump { } } +/// Private trait to allow for generic interning implementation which works with +/// both `&T` and `T`. +trait Internable: Borrow { + fn to_owned(self) -> T; +} + +impl Internable for T { + fn to_owned(self) -> T { + self + } +} + +impl Internable for &T { + fn to_owned(self) -> T { + self.clone() + } +} + // Essentially a `&'static T` reference to a value allocated in the `Bump` // arena. Always safe to deref, but any `&'a T` reference lifetime must be // linked to the lifetime of the `Bump` arena (i.e. the lifetime of this