@@ -16,26 +16,22 @@ final class FastSet
1616
1717 private bool $ isInitialized = false ;
1818
19- private string $ blob = '' ;
19+ private string $ hashesBlob = '' ;
2020
2121 /**
2222 * Prefix → start-offset lookup table for the sorted fingerprint blob.
2323 *
24- * Fingerprints (16 bytes each) are stored sorted (binary order) in `hashes.bin`.
24+ * Fingerprints are stored sorted (binary order) in `hashes.bin`.
2525 * We bucket them by their first 2 bytes (a 16-bit prefix key in the range 0..65535).
2626 *
27- * For each prefix key `p`, this array stores the starting index (not the byte offset)
27+ * For each prefix key `p`, this blob stores the starting index (not the byte offset)
2828 * of that bucket within the sorted fingerprint list. This defines the low of our
2929 * bucket. For the high, we need to take `p + 1`.
3030 *
3131 * This is why the table has 65537 entries: one extra "placeholder" entry at the end
3232 * containing the offset after the last fingerprint, so `p + 1` is always defined.
33- *
34- * Values are indices of 16-byte fingerprints (so byte position = index * 16).
35- *
36- * @var array<int, int>
3733 */
38- private array $ prefixOffsets = [] ;
34+ private string $ indexBlob = '' ;
3935
4036 public function __construct (
4137 private readonly string $ directory ,
@@ -73,8 +69,10 @@ public function has(string $entry): bool
7369 $ prefixKey = $ this ->getPrefixKey ($ fingerprint );
7470
7571 // Restrict search to the bucket range [startIndex, endIndex]
76- $ startIndex = $ this ->prefixOffsets [$ prefixKey ];
77- $ endIndex = $ this ->prefixOffsets [$ prefixKey + 1 ];
72+ $ byteOffset = $ prefixKey * 4 ;
73+ $ values = unpack ('V2 ' , substr ($ this ->indexBlob , $ byteOffset , 8 ));
74+ $ startIndex = $ values [1 ];
75+ $ endIndex = $ values [2 ];
7876
7977 // Empty bucket -> definitely not present
8078 if ($ startIndex >= $ endIndex ) {
@@ -91,7 +89,7 @@ public function has(string $entry): bool
9189 $ mid = $ low + $ high >> 1 ;
9290
9391 $ middleTailByteOffset = $ mid * $ this ->storedTailByteLength ;
94- $ middleFingerprintTailBytes = substr ($ this ->blob , $ middleTailByteOffset , $ this ->storedTailByteLength );
92+ $ middleFingerprintTailBytes = substr ($ this ->hashesBlob , $ middleTailByteOffset , $ this ->storedTailByteLength );
9593
9694 $ cmp = strcmp ($ middleFingerprintTailBytes , $ queryFingerprintTailBytes );
9795 if (0 === $ cmp ) {
@@ -164,15 +162,15 @@ public function initialize(): void
164162 return ;
165163 }
166164
167- $ blob = @file_get_contents ($ this ->hashesPath );
168- $ indexBytes = @file_get_contents ($ this ->indexPath );
165+ $ hashesBlob = @file_get_contents ($ this ->hashesPath );
166+ $ indexBlob = @file_get_contents ($ this ->indexPath );
169167
170- if (false === $ blob || false === $ indexBytes ) {
168+ if (false === $ hashesBlob || false === $ indexBlob ) {
171169 throw new \RuntimeException ('Hashes or index files do not exist. ' );
172170 }
173171
174- $ this ->blob = $ blob ;
175- $ this ->prefixOffsets = array_values ( unpack ( ' V* ' , $ indexBytes )) ;
172+ $ this ->hashesBlob = $ hashesBlob ;
173+ $ this ->indexBlob = $ indexBlob ;
176174 $ this ->isInitialized = true ;
177175 }
178176
0 commit comments