Skip to content

Commit e8ef5f9

Browse files
committed
Do not use a native PHP array for index lookups
1 parent e4a87e9 commit e8ef5f9

1 file changed

Lines changed: 14 additions & 16 deletions

File tree

src/FastSet.php

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,22 @@ final class FastSet
1616

1717
private bool $isInitialized = false;
1818

19-
private string $blob = '';
19+
private string $hashesBlob = '';
2020

2121
/**
2222
* Prefix → start-offset lookup table for the sorted fingerprint blob.
2323
*
24-
* Fingerprints (16 bytes each) are stored sorted (binary order) in `hashes.bin`.
24+
* Fingerprints are stored sorted (binary order) in `hashes.bin`.
2525
* We bucket them by their first 2 bytes (a 16-bit prefix key in the range 0..65535).
2626
*
27-
* For each prefix key `p`, this array stores the starting index (not the byte offset)
27+
* For each prefix key `p`, this blob stores the starting index (not the byte offset)
2828
* of that bucket within the sorted fingerprint list. This defines the low of our
2929
* bucket. For the high, we need to take `p + 1`.
3030
*
3131
* This is why the table has 65537 entries: one extra "placeholder" entry at the end
3232
* containing the offset after the last fingerprint, so `p + 1` is always defined.
33-
*
34-
* Values are indices of 16-byte fingerprints (so byte position = index * 16).
35-
*
36-
* @var array<int, int>
3733
*/
38-
private array $prefixOffsets = [];
34+
private string $indexBlob = '';
3935

4036
public function __construct(
4137
private readonly string $directory,
@@ -73,8 +69,10 @@ public function has(string $entry): bool
7369
$prefixKey = $this->getPrefixKey($fingerprint);
7470

7571
// Restrict search to the bucket range [startIndex, endIndex]
76-
$startIndex = $this->prefixOffsets[$prefixKey];
77-
$endIndex = $this->prefixOffsets[$prefixKey + 1];
72+
$byteOffset = $prefixKey * 4;
73+
$values = unpack('V2', substr($this->indexBlob, $byteOffset, 8));
74+
$startIndex = $values[1];
75+
$endIndex = $values[2];
7876

7977
// Empty bucket -> definitely not present
8078
if ($startIndex >= $endIndex) {
@@ -91,7 +89,7 @@ public function has(string $entry): bool
9189
$mid = $low + $high >> 1;
9290

9391
$middleTailByteOffset = $mid * $this->storedTailByteLength;
94-
$middleFingerprintTailBytes = substr($this->blob, $middleTailByteOffset, $this->storedTailByteLength);
92+
$middleFingerprintTailBytes = substr($this->hashesBlob, $middleTailByteOffset, $this->storedTailByteLength);
9593

9694
$cmp = strcmp($middleFingerprintTailBytes, $queryFingerprintTailBytes);
9795
if (0 === $cmp) {
@@ -164,15 +162,15 @@ public function initialize(): void
164162
return;
165163
}
166164

167-
$blob = @file_get_contents($this->hashesPath);
168-
$indexBytes = @file_get_contents($this->indexPath);
165+
$hashesBlob = @file_get_contents($this->hashesPath);
166+
$indexBlob = @file_get_contents($this->indexPath);
169167

170-
if (false === $blob || false === $indexBytes) {
168+
if (false === $hashesBlob || false === $indexBlob) {
171169
throw new \RuntimeException('Hashes or index files do not exist.');
172170
}
173171

174-
$this->blob = $blob;
175-
$this->prefixOffsets = array_values(unpack('V*', $indexBytes));
172+
$this->hashesBlob = $hashesBlob;
173+
$this->indexBlob = $indexBlob;
176174
$this->isInitialized = true;
177175
}
178176

0 commit comments

Comments
 (0)