diff --git a/BloomFilter.py b/BloomFilter.py index 0c7852f..5425c14 100644 --- a/BloomFilter.py +++ b/BloomFilter.py @@ -3,7 +3,6 @@ class BloomFilter: - # To create bit array of size n ''' size - size of bit array calc based the formula @@ -13,56 +12,59 @@ class BloomFilter: def __init__(self, n): self.p = 0.05 - self.size = math.ceil(-n*math.log(self.p)/(math.log(2)**2)) - self.k = math.ceil(self.size/n*math.log(2)) - self.bit_array = [0] * self.size - self.validate_array = [] + self.size = math.ceil(-n * math.log(self.p) / (math.log(2) ** 2)) + self.k = math.ceil(self.size / n * math.log(2)) + self.bit_array = bytearray(math.ceil(self.size / 8)) + + def __set_bit__(self, index): + byte_index = int(index / 8) + self.bit_array[byte_index] = self.bit_array[byte_index] | (1 << (7 - index % 8)) + + def __get_bit__(self, index): + byte_index = int(index / 8) + return self.bit_array[byte_index] & (1 << (7 - index % 8)) # Func to insert values into BF def insert(self, value, freq=1): - line_hash = str(mmh3.hash(value,freq)) + line_hash = str(mmh3.hash(value, freq)) for i in range(self.k): - index = mmh3.hash(line_hash,i) % self.size - self.bit_array[index] = 1 + index = mmh3.hash(line_hash, i) % self.size + self.__set_bit__(index) # To check if the value is present in BF or not def validate(self, value, freq=1): - line_hash = str(mmh3.hash(value,freq)) + line_hash = str(mmh3.hash(value, freq)) for i in range(self.k): - check_at_index = mmh3.hash(line_hash,i) % self.size - if self.validate_array[check_at_index] == 1: + check_at_index = mmh3.hash(line_hash, i) % self.size + if self.__get_bit__(check_at_index): continue else: return False return True - def readBloomFilterFromFile(self,filename): + def readBloomFilterFromFile(self, filename): f = open(filename, "rb") - self.validate_array = list(f.read()) - for i in range(0, len(self.validate_array)): - self.validate_array[i] -= 48 - print(self.validate_array) + self.bit_array = bytearray(f.read()) + print(self.bit_array) f.close() - - def readBloomFilterFromBytes(self,bf_as_bytes): - self.validate_array = list(bf_as_bytes) - for i in range(0, len(self.validate_array)): - self.validate_array[i] -= 48 + + def readBloomFilterFromBytes(self, bf_as_bytes): + self.bit_array = bytearray(bf_as_bytes) # Returns the bit array def getBloomFilter(self): return self.bit_array - # Returns the size of the bit arry + # Returns the size of the bit array def getSize(self): return self.size - def getNFromSize(self,size): - return(math.floor(size*-1*(math.log(2)**2)/math.log(self.p))) + def getNFromSize(self, size): + return math.floor(size * -1 * (math.log(2) ** 2) / math.log(self.p)) # Returns the # of Hash Functions ie. h1(k), h2(k) ... def getNumberOfHashFunctions(self): return self.k def getAsBytes(self): - return str.encode(''.join([str(i) for i in self.bit_array])) + return self.bit_array diff --git a/P2P/utils.py b/P2P/utils.py index 54d60c6..e909bdd 100644 --- a/P2P/utils.py +++ b/P2P/utils.py @@ -1,9 +1,8 @@ - class Request: - REQUEST_TYPE_BLOOMFILTER = 2 + REQUEST_TYPE_BLOOMFILTER = 2 REQUEST_TYPE_REPLY_SLAVE_BLOOMFILTER = 3 REQUEST_SEND_ACTUAL_LINES = 4 REQUEST_SEND_ENTIRE_FILE_HASH = 5 @@ -14,7 +13,7 @@ def __init__(self, request_type, message): self.message = message if isinstance(message, str): self.byte_message = bytes(message, 'utf-8') - elif isinstance(message, bytes) : + elif isinstance(message, bytes): self.byte_message = message else: self.byte_message = bytes(message) @@ -26,17 +25,13 @@ def get_type(self): return self.type def get_message_size(self): - return len(self.actual_message()) - - def actual_message(self): - return self.byte_message.decode('utf-8') + return len(self.byte_message) def get_message_bytes(self): return self.byte_message def __str__(self): - return "" - + return "" def parse_received_data(data): @@ -50,4 +45,4 @@ def parse_received_data(data): str_message = bloom_filter type_int = int(bytes.hex(type_specifying_byte), 16) req = Request(type_int, str_message) - return req \ No newline at end of file + return req diff --git a/main.py b/main.py index c786dfa..e9a1915 100644 --- a/main.py +++ b/main.py @@ -46,7 +46,7 @@ def handle_request(self, request): # We send it now print("\n\nThe other user has modified his file, syncing...") print("Received the bloom filter") - my_missing_content = getMissingContent(getNFromSize( + my_missing_content = getMissingContent(getNFromByteSize( request.get_message_size()), request.get_message_bytes()) print("Acknowleding and transmitting the bloom filter...") bf = computeBloomFilter() @@ -57,7 +57,7 @@ def handle_request(self, request): elif(request.get_type() == utils.Request.REQUEST_TYPE_REPLY_SLAVE_BLOOMFILTER): print( "Request was acknowledged by the other peer and has given the other bloom filter") - my_missing_content = getMissingContent(getNFromSize( + my_missing_content = getMissingContent(getNFromByteSize( request.get_message_size()), request.get_message_bytes()) # Send the missing contents computed to the other user @@ -68,7 +68,7 @@ def handle_request(self, request): elif(request.get_type() == utils.Request.REQUEST_SEND_ACTUAL_LINES): print("Received the actual missing lines...") - missing_dict = eval(request.actual_message()) + missing_dict = eval(request.get_message_bytes()) should_trigger_modified = False print("Syncing the file...") Synchronizer.syncFile( @@ -97,10 +97,10 @@ def handle_request(self, request): p2p.send_request(req) print("Done.") elif(request.get_type() == utils.Request.REQUEST_SEND_ENTIRE_FILE): - file_content_from_other_user = request.actual_message() + file_content_from_other_user = request.get_message_bytes() should_trigger_modified = False - with open(input_path, 'w') as f: + with open(input_path, 'wb') as f: f.write(file_content_from_other_user) time.sleep(1) should_trigger_modified = True @@ -138,12 +138,11 @@ def on_modified(self, event): self.last_modified = time.time() return super().on_modified(event) -# Use this func to find n required for BloomFilter -# Size is the len of bloomfilter bit array - -def getNFromSize(size): - return(floor(size*-1*(log(2)**2)/log(0.05))) +# Use this func to find n required for BloomFilter +# byte_size is the len of bloomfilter bit array in bytes +def getNFromByteSize(byte_size): + return floor((byte_size * 8)*-1*(log(2)**2)/log(0.05)) def main(): @@ -251,7 +250,7 @@ def initiateSync(): def read_entire_file(): - with open(input_path) as f: + with open(input_path, "rb") as f: content = f.read() return content