-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
63 lines (54 loc) · 2.11 KB
/
main.py
File metadata and controls
63 lines (54 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import hashlib
class BloomFilter:
def __init__(self, size=1000, hash_count=5):
"""
:param size: The size of the bit array (larger means fewer false positives).
:param hash_count: Number of different hash functions to apply to each item.
"""
self.size = size
self.hash_count = hash_count
# Python integer to store bits; could also use bitarray or bytearray
self.bit_array = 0
def _hashes(self, item):
"""
Generate hash_count different hashes of 'item'.
Each hash is turned into an integer index in [0, size).
"""
# Convert item to bytes, if not already
if isinstance(item, str):
encoded_item = item.encode('utf-8')
else:
encoded_item = item
# Generate multiple hash values by salting with i
for i in range(self.hash_count):
# Use hashlib with a different salt each iteration
hasher = hashlib.sha256(encoded_item + i.to_bytes(2, byteorder='big'))
# Convert to int, then mod to get an index
yield int(hasher.hexdigest(), 16) % self.size
def add(self, item):
"""
Add an item to the Bloom filter.
Sets the bits corresponding to the multiple hashes of 'item'.
"""
for index in self._hashes(item):
self.bit_array |= (1 << index)
def check(self, item):
"""
Check membership in the Bloom filter.
Returns True if 'item' is *probably* in the filter; False if definitely not.
"""
for index in self._hashes(item):
# If the bit at position 'index' is not set, return False immediately
if not (self.bit_array & (1 << index)):
return False
return True
# --- Example Usage ---
if __name__ == "__main__":
bf = BloomFilter(size=1000, hash_count=5)
# Add some items
bf.add("apple")
bf.add("banana")
# Check if items are in the filter
print(bf.check("apple")) # Expect True
print(bf.check("banana")) # Expect True
print(bf.check("cherry")) # Expect False (most likely)