Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 37 additions & 16 deletions builtin/hasher.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,51 @@ struct Hasher {
/// inspect(h1.finalize() != h2.finalize(), content="true") // Different seeds produce different hashes
/// }
/// ```
pub fn Hasher::new(seed? : Int = seed) -> Hasher {
pub fn Hasher::new(seed? : Int = get_hash_seed()) -> Hasher {
{ acc: seed.reinterpret_as_uint() + GPRIME5 }
}

///|
#cfg(not(target="js"))
let seed : Int = 0
let global_hash_seed : Ref[Int] = { val: 0 }

///|
#cfg(target="js")
let seed : Int = random_seed()
let hash_seed_counter : Ref[UInt64] = { val: 0UL }

///|
#cfg(target="js")
extern "js" fn random_seed() -> Int =
#|() => {
#| if (globalThis.crypto?.getRandomValues) {
#| const array = new Uint32Array(1);
#| globalThis.crypto.getRandomValues(array);
#| return array[0] | 0; // Convert to signed 32
#| } else {
#| return Math.floor(Math.random() * 0x100000000) | 0; // Fallback to Math.random
#| }
#|}
fn get_hash_seed() -> Int {
global_hash_seed.val
}

///|
pub fn set_hash_seed(seed : Int) -> Unit {
global_hash_seed.val = seed
hash_seed_counter.val = 0UL
}

///|
fn next_hash_seed() -> Int {
hash_seed_counter.val += 1UL
let state = splitmix64(
global_hash_seed.val.reinterpret_as_uint().to_uint64() +
hash_seed_counter.val,
)
(state ^ (state >> 32)).to_uint().reinterpret_as_int()
}

///|
fn[T : Hash] hash_with_seed(value : T, seed : Int) -> Int {
let hasher = Hasher::new(seed~)
hasher.combine(value)
hasher.finalize()
}

///|
fn splitmix64(state : UInt64) -> UInt64 {
let mut z = state + 0x9E3779B97F4A7C15UL
z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9UL
z = (z ^ (z >> 27)) * 0x94D049BB133111EBUL
z ^ (z >> 31)
}

///|
/// Combines a hashable value with the current state of the hasher. This is
Expand Down
13 changes: 13 additions & 0 deletions builtin/hasher_test.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,16 @@ test "hash_combine for UInt" {
Hash::hash_combine(value, hasher)
inspect(hasher.finalize(), content="1161967057")
}

///|
test "set_hash_seed changes default Hash::hash seed" {
set_hash_seed(0)
let h0 = Hash::hash("moonbit")
set_hash_seed(2026)
let h1 = Hash::hash("moonbit")
let hasher = Hasher::new(seed=2026)
hasher.combine("moonbit")
inspect(h0 == h1, content="false")
inspect(h1 == hasher.finalize(), content="true")
set_hash_seed(0)
}
67 changes: 51 additions & 16 deletions builtin/linked_hash_map.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct Map[K, V] {
mut capacity : Int // current capacity
mut capacity_mask : Int // capacity_mask = capacity - 1, used to find idx
mut grow_at : Int // threshold that triggers grow
mut hash_seed : Int
mut head : Entry[K, V]? // head of linked list
mut tail : Int // tail of linked list
}
Expand All @@ -61,6 +62,7 @@ pub fn[K, V] Map::new(capacity? : Int = 8) -> Map[K, V] {
capacity,
capacity_mask: capacity - 1,
grow_at: calc_grow_threshold(capacity),
hash_seed: next_hash_seed(),
entries: FixedArray::make(capacity, None),
head: None,
tail: -1,
Expand Down Expand Up @@ -106,7 +108,12 @@ pub fn[K : Hash + Eq, V] Map::from_array(arr : ArrayView[(K, V)]) -> Map[K, V] {
/// ```
#alias("_[_]=_")
pub fn[K : Hash + Eq, V] Map::set(self : Map[K, V], key : K, value : V) -> Unit {
self.set_with_hash(key, value, key.hash())
self.set_with_hash(key, value, self.key_hash(key))
}

///|
fn[K : Hash, V] Map::key_hash(self : Map[K, V], key : K) -> Int {
hash_with_seed(key, self.hash_seed)
}

///|
Expand Down Expand Up @@ -214,7 +221,7 @@ fn[K, V] Map::set_entry(
/// }
/// ```
pub fn[K : Hash + Eq, V] Map::get(self : Map[K, V], key : K) -> V? {
let hash = key.hash()
let hash = self.key_hash(key)
for i = 0, idx = hash & self.capacity_mask {
guard self.entries[idx] is Some(entry) else { break None }
if entry.hash == hash && entry.key == key {
Expand All @@ -230,7 +237,7 @@ pub fn[K : Hash + Eq, V] Map::get(self : Map[K, V], key : K) -> V? {
///|
#alias("_[_]")
pub fn[K : Hash + Eq, V] Map::at(self : Map[K, V], key : K) -> V {
let hash = key.hash()
let hash = self.key_hash(key)
for i = 0, idx = hash & self.capacity_mask {
guard self.entries[idx] is Some(entry)
if entry.hash == hash && entry.key == key {
Expand Down Expand Up @@ -269,7 +276,7 @@ pub fn[K : Hash + Eq, V] Map::get_or_default(
key : K,
default : V,
) -> V {
let hash = key.hash()
let hash = self.key_hash(key)
for i = 0, idx = hash & self.capacity_mask {
match self.entries[idx] {
Some(entry) => {
Expand All @@ -293,7 +300,7 @@ pub fn[K : Hash + Eq, V] Map::get_or_init(
key : K,
default : () -> V,
) -> V {
let hash = key.hash()
let hash = self.key_hash(key)
let (idx, psl, new_value, push_away) = for psl = 0, idx = hash &
self.capacity_mask {
match self.entries[idx] {
Expand Down Expand Up @@ -338,7 +345,7 @@ pub fn[K : Hash + Eq, V] Map::get_or_init(
/// Check if the hash map contains a key.
pub fn[K : Hash + Eq, V] Map::contains(self : Map[K, V], key : K) -> Bool {
// inline Map::get to avoid boxing
let hash = key.hash()
let hash = self.key_hash(key)
for i = 0, idx = hash & self.capacity_mask {
guard self.entries[idx] is Some(entry) else { break false }
if entry.hash == hash && entry.key == key {
Expand Down Expand Up @@ -379,7 +386,7 @@ pub fn[K : Hash + Eq, V : Eq] Map::contains_kv(
value : V,
) -> Bool {
// inline Map::get to avoid boxing
let hash = key.hash()
let hash = self.key_hash(key)
for i = 0, idx = hash & self.capacity_mask {
guard self.entries[idx] is Some(entry) else { break false }
if entry.hash == hash && entry.key == key && entry.value == value {
Expand Down Expand Up @@ -414,7 +421,7 @@ pub fn[K : Hash + Eq, V : Eq] Map::contains_kv(
/// }
/// ```
pub fn[K : Hash + Eq, V] Map::remove(self : Map[K, V], key : K) -> Unit {
self.remove_with_hash(key, key.hash())
self.remove_with_hash(key, self.key_hash(key))
}

///|
Expand Down Expand Up @@ -498,6 +505,29 @@ fn[K : Eq, V] Map::grow(self : Map[K, V]) -> Unit {
}
}

///|
pub fn[K : Hash + Eq, V] Map::rehash_with_seed(
self : Map[K, V],
seed : Int,
) -> Unit {
if self.hash_seed == seed {
return
}
let old_head = self.head
self.entries = FixedArray::make(self.capacity, None)
self.size = 0
self.head = None
self.tail = -1
self.hash_seed = seed
loop old_head {
Some({ next, key, value, .. }) => {
self.set_with_hash(key, value, self.key_hash(key))
continue next
}
None => break
}
}

///|
fn calc_grow_threshold(capacity : Int) -> Int {
capacity * 13 / 16
Expand Down Expand Up @@ -656,7 +686,7 @@ pub impl[K : Hash + Eq, V : Eq] Eq for Map[K, V] with equal(
guard self.size == that.size else { return false }
for k, v in self {
guard that.contains_kv(k, v) else { return false }
} else {
} nobreak {
true
}
}
Expand Down Expand Up @@ -702,6 +732,7 @@ pub fn[K, V, V2] Map::map(self : Map[K, V], f : (K, V) -> V2) -> Map[K, V2] {
size: self.size,
capacity_mask: self.capacity_mask,
grow_at: self.grow_at,
hash_seed: self.hash_seed,
head: None,
tail: self.tail,
}
Expand Down Expand Up @@ -734,6 +765,7 @@ pub fn[K, V] Map::copy(self : Map[K, V]) -> Map[K, V] {
size: self.size,
capacity_mask: self.capacity_mask,
grow_at: self.grow_at,
hash_seed: self.hash_seed,
head: None,
tail: self.tail,
}
Expand Down Expand Up @@ -781,7 +813,10 @@ pub fn[K, V] Map::copy(self : Map[K, V]) -> Map[K, V] {
/// @json.json_inspect(merged, content={ "a": 1, "b": 3, "c": 4 })
/// }
/// ```
pub fn[K : Eq, V] Map::merge(self : Map[K, V], other : Map[K, V]) -> Map[K, V] {
pub fn[K : Hash + Eq, V] Map::merge(
self : Map[K, V],
other : Map[K, V],
) -> Map[K, V] {
let result = self.copy()
result.merge_in_place(other)
result
Expand Down Expand Up @@ -811,16 +846,16 @@ pub fn[K : Eq, V] Map::merge(self : Map[K, V], other : Map[K, V]) -> Map[K, V] {
/// @json.json_inspect(map1, content={ "a": 1, "b": 3, "c": 4 })
/// }
/// ```
pub fn[K : Eq, V] Map::merge_in_place(
pub fn[K : Hash + Eq, V] Map::merge_in_place(
self : Map[K, V],
other : Map[K, V],
) -> Unit {
if physical_equal(self, other) {
return
}
loop other.head {
Some({ key, value, next, hash, .. }) => {
self.set_with_hash(key, value, hash)
Some({ key, value, next, .. }) => {
self.set_with_hash(key, value, self.key_hash(key))
continue next
}
None => break
Expand Down Expand Up @@ -945,7 +980,7 @@ pub fn[K : Hash + Eq, V] Map::update(
key : K,
f : (V?) -> V?,
) -> Unit {
let hash = key.hash()
let hash = self.key_hash(key)
let (idx, psl, new_value, push_away) = for psl = 0, idx = hash &
self.capacity_mask {
match self.entries[idx] {
Expand Down Expand Up @@ -1051,7 +1086,7 @@ fn BytesView::equal_to_bytes(self : Self, other : Bytes) -> Bool {
/// }
/// ```
pub fn[V] Map::get_from_bytes(map : Self[Bytes, V], key : BytesView) -> V? {
let hash = key.hash()
let hash = hash_with_seed(key, map.hash_seed)
for i = 0, idx = hash & map.capacity_mask {
guard map.entries[idx] is Some(entry) else { break None }
if entry.hash == hash && key.equal_to_bytes(entry.key) {
Expand Down Expand Up @@ -1088,7 +1123,7 @@ pub fn[V] Map::get_from_bytes(map : Self[Bytes, V], key : BytesView) -> V? {
/// }
/// ```
pub fn[V] Map::get_from_string(map : Self[String, V], key : StringView) -> V? {
let hash = key.hash()
let hash = hash_with_seed(key, map.hash_seed)
for i = 0, idx = hash & map.capacity_mask {
guard map.entries[idx] is Some(entry) else { break None }
if entry.hash == hash && key.equal_to_string(entry.key) {
Expand Down
17 changes: 17 additions & 0 deletions builtin/linked_hash_map_test.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -661,3 +661,20 @@ test "Map::set updating existing key should not grow" {
assert_eq(map.get(i.to_string()), Some(i * 10))
}
}

///|
test "Map merge and rehash across different seeds" {
set_hash_seed(1)
let map1 : Map[String, Int] = {}
map1.set("a", 1)
set_hash_seed(2)
let map2 : Map[String, Int] = {}
map2.set("b", 2)
map1.merge_in_place(map2)
inspect(map1.get("a"), content="Some(1)")
inspect(map1.get("b"), content="Some(2)")
map1.rehash_with_seed(999)
inspect(map1.get("a"), content="Some(1)")
inspect(map1.get("b"), content="Some(2)")
set_hash_seed(0)
}
4 changes: 2 additions & 2 deletions builtin/linked_hash_map_wbtest.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ test "remove_entry_head" {
assert_eq(head.prev, -1)
assert_true(head.next is None)
assert_eq(head.psl, 0)
assert_eq(head.hash, (2).hash())
assert_eq(head.hash, map.key_hash(2))
assert_eq(head.key, 2)
assert_eq(head.value, 2)
}
Expand All @@ -564,7 +564,7 @@ test "remove_entry_tail" {
assert_eq(tail.prev, -1)
assert_true(tail.next is None)
assert_eq(tail.psl, 0)
assert_eq(tail.hash, (1).hash())
assert_eq(tail.hash, map.key_hash(1))
assert_eq(tail.key, 1)
assert_eq(tail.value, 1)
}
Expand Down
7 changes: 5 additions & 2 deletions builtin/pkg.generated.mbti
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ pub fn[T : Show] println(T) -> Unit

pub fn[T : Show] repr(T) -> String

pub fn set_hash_seed(Int) -> Unit

// Errors
pub(all) suberror BenchError {
BenchError(String)
Expand Down Expand Up @@ -377,10 +379,11 @@ pub fn[K, V] Map::keys(Self[K, V]) -> Iter[K]
#alias(size, deprecated)
pub fn[K, V] Map::length(Self[K, V]) -> Int
pub fn[K, V, V2] Map::map(Self[K, V], (K, V) -> V2) -> Self[K, V2]
pub fn[K : Eq, V] Map::merge(Self[K, V], Self[K, V]) -> Self[K, V]
pub fn[K : Eq, V] Map::merge_in_place(Self[K, V], Self[K, V]) -> Unit
pub fn[K : Hash + Eq, V] Map::merge(Self[K, V], Self[K, V]) -> Self[K, V]
pub fn[K : Hash + Eq, V] Map::merge_in_place(Self[K, V], Self[K, V]) -> Unit
pub fn[K, V] Map::new(capacity? : Int) -> Self[K, V]
pub fn[K : Hash + Eq, V] Map::of(FixedArray[(K, V)]) -> Self[K, V]
pub fn[K : Hash + Eq, V] Map::rehash_with_seed(Self[K, V], Int) -> Unit
pub fn[K : Hash + Eq, V] Map::remove(Self[K, V], K) -> Unit
pub fn[K, V] Map::retain(Self[K, V], (K, V) -> Bool) -> Unit
#alias("_[_]=_")
Expand Down
Loading
Loading