From e7ab1881470e4d4f10c27f2d451fb29732e9e3fa Mon Sep 17 00:00:00 2001 From: yethee Date: Tue, 10 Mar 2026 22:45:30 +0300 Subject: [PATCH] Prevent race condition in vocabulary cache update --- src/Vocab/Loader/DefaultVocabLoader.php | 23 +++++++++++++++++++++-- tests/EncoderProviderTest.php | 6 +++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/Vocab/Loader/DefaultVocabLoader.php b/src/Vocab/Loader/DefaultVocabLoader.php index 393ea55..f2381f0 100644 --- a/src/Vocab/Loader/DefaultVocabLoader.php +++ b/src/Vocab/Loader/DefaultVocabLoader.php @@ -22,9 +22,12 @@ use function is_resource; use function is_writable; use function mkdir; +use function rename; use function sha1; use function sprintf; use function stream_copy_to_stream; +use function uniqid; +use function unlink; use const DIRECTORY_SEPARATOR; @@ -68,10 +71,11 @@ public function loadFile(string $uri, string|null $checksum = null): string } try { - $cacheStream = fopen($cacheFile, 'w+'); + $tmpFile = $cacheFile . '_' . uniqid('tmp', more_entropy: true); + $cacheStream = fopen($tmpFile, 'w+'); if ($cacheStream === false) { - throw new IOError(sprintf('Could not open file for write: %s', $cacheFile)); + throw new IOError(sprintf('Could not open file for write: %s', $tmpFile)); } try { @@ -86,6 +90,17 @@ public function loadFile(string $uri, string|null $checksum = null): string throw new IOError($message); } + if (rename($tmpFile, $cacheFile) === false) { + $message = 'Could not rename file'; + $lastError = error_get_last(); + + if ($lastError !== null) { + $message .= ': ' . $lastError['message']; + } + + throw new IOError($message); + } + if ($checksum !== null) { if (! $this->checkHash($cacheFile, $checksum)) { throw new IOError(sprintf( @@ -96,6 +111,10 @@ public function loadFile(string $uri, string|null $checksum = null): string } } finally { fclose($cacheStream); + + if (file_exists($tmpFile)) { + unlink($tmpFile); + } } } finally { fclose($stream); diff --git a/tests/EncoderProviderTest.php b/tests/EncoderProviderTest.php index 2c7be18..bf0f3c8 100644 --- a/tests/EncoderProviderTest.php +++ b/tests/EncoderProviderTest.php @@ -5,6 +5,7 @@ namespace Yethee\Tiktoken\Tests; use org\bovigo\vfs\vfsStream; +use org\bovigo\vfs\vfsStreamFile; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; use Yethee\Tiktoken\EncoderProvider; @@ -46,7 +47,7 @@ public function testUseHashWhenLoadVocab(): void $cache = vfsStream::setup('cache'); $vocabCacheFilename = hash('sha1', EncoderProvider::ENCODINGS['p50k_base']['vocab']); - $cacheFile = vfsStream::newFile($vocabCacheFilename) + vfsStream::newFile($vocabCacheFilename) ->withContent('broken cache') ->at($cache); @@ -56,6 +57,9 @@ public function testUseHashWhenLoadVocab(): void $provider->get('p50k_base'); + $cacheFile = $cache->getChild($vocabCacheFilename); + + self::assertInstanceOf(vfsStreamFile::class, $cacheFile); self::assertNotEquals('broken cache', $cacheFile->getContent()); }