From 22c182d6c0e54ac68b45619c5cace7af0b2d151b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 13:35:34 +0200 Subject: [PATCH 01/25] createCache(): Take CacheInfo as argument --- src/libstore-tests/nar-info-disk-cache.cc | 12 +++++++----- src/libstore/http-binary-cache-store.cc | 3 ++- .../include/nix/store/nar-info-disk-cache.hh | 15 +++++++++------ src/libstore/nar-info-disk-cache.cc | 10 +++++----- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/libstore-tests/nar-info-disk-cache.cc b/src/libstore-tests/nar-info-disk-cache.cc index aebefc775675..7612250c661d 100644 --- a/src/libstore-tests/nar-info-disk-cache.cc +++ b/src/libstore-tests/nar-info-disk-cache.cc @@ -30,15 +30,16 @@ TEST(NarInfoDiskCacheImpl, create_and_read) // Set up "background noise" and check that different caches receive different ids { - auto bc1 = cache->createCache("https://bar", "/nix/storedir", wantMassQuery, prio); - auto bc2 = cache->createCache("https://xyz", "/nix/storedir", false, 12); + auto bc1 = + cache->createCache("https://bar", "/nix/storedir", {.wantMassQuery = wantMassQuery, .priority = prio}); + auto bc2 = cache->createCache("https://xyz", "/nix/storedir", {.priority = 12}); ASSERT_NE(bc1, bc2); barId = bc1; } // Check that the fields are saved and returned correctly. This does not test // the select statement yet, because of in-memory caching. - savedId = cache->createCache("http://foo", "/nix/storedir", wantMassQuery, prio); + savedId = cache->createCache("http://foo", "/nix/storedir", {.wantMassQuery = wantMassQuery, .priority = prio}); ; { auto r = cache->upToDateCacheExists("http://foo"); @@ -84,7 +85,7 @@ TEST(NarInfoDiskCacheImpl, create_and_read) } // "Update", same data, check that the id number is reused - cache2->createCache("http://foo", "/nix/storedir", wantMassQuery, prio); + cache2->createCache("http://foo", "/nix/storedir", {.wantMassQuery = wantMassQuery, .priority = prio}); { auto r = cache2->upToDateCacheExists("http://foo"); @@ -107,7 +108,8 @@ TEST(NarInfoDiskCacheImpl, create_and_read) auto r0 = cache2->upToDateCacheExists("https://bar"); ASSERT_FALSE(r0); - cache2->createCache("https://bar", "/nix/storedir", !wantMassQuery, prio + 10); + cache2->createCache( + "https://bar", "/nix/storedir", {.wantMassQuery = !wantMassQuery, .priority = prio + 10}); auto r = cache2->upToDateCacheExists("https://bar"); ASSERT_EQ(r->wantMassQuery, !wantMassQuery); ASSERT_EQ(r->priority, prio + 10); diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index b3678ae4fdf1..2b4ccfcaef3e 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -75,7 +75,8 @@ void HttpBinaryCacheStore::init() } catch (UploadToHTTP &) { throw Error("'%s' does not appear to be a binary cache", config->cacheUri.to_string()); } - diskCache->createCache(cacheKey, config->storeDir, config->wantMassQuery, config->priority); + diskCache->createCache( + cacheKey, config->storeDir, {.wantMassQuery = config->wantMassQuery, .priority = config->priority}); } } diff --git a/src/libstore/include/nix/store/nar-info-disk-cache.hh b/src/libstore/include/nix/store/nar-info-disk-cache.hh index a30c5a553b96..37d1f1b10e25 100644 --- a/src/libstore/include/nix/store/nar-info-disk-cache.hh +++ b/src/libstore/include/nix/store/nar-info-disk-cache.hh @@ -25,16 +25,19 @@ struct NarInfoDiskCache virtual ~NarInfoDiskCache() {} - virtual int - createCache(const std::string & uri, const std::string & storeDir, bool wantMassQuery, int priority) = 0; - struct CacheInfo { - int id; - bool wantMassQuery; - int priority; + int id = 0; + bool wantMassQuery = false; + int priority = 0; }; + /** + * Create or update the cached nix-cache-info for the binary cache at `uri`. + * Note that `info.id` is ignored. This function returns the id of the cache entry. + */ + virtual int createCache(const std::string & uri, const std::string & storeDir, const CacheInfo & info) = 0; + virtual std::optional upToDateCacheExists(const std::string & uri) = 0; virtual std::pair> diff --git a/src/libstore/nar-info-disk-cache.cc b/src/libstore/nar-info-disk-cache.cc index 5c69561bebf0..b168e7c50279 100644 --- a/src/libstore/nar-info-disk-cache.cc +++ b/src/libstore/nar-info-disk-cache.cc @@ -203,7 +203,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache } public: - int createCache(const std::string & uri, const std::string & storeDir, bool wantMassQuery, int priority) override + int createCache(const std::string & uri, const std::string & storeDir, const CacheInfo & info) override { return retrySQLite([&]() { auto state(_state.lock()); @@ -219,8 +219,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache Cache ret{ .id = -1, // set below .storeDir = storeDir, - .wantMassQuery = wantMassQuery, - .priority = priority, + .wantMassQuery = info.wantMassQuery, + .priority = info.priority, }; { @@ -228,8 +228,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache .apply(uri) .apply(time(nullptr)) .apply(storeDir) - .apply(wantMassQuery) - .apply(priority)); + .apply(info.wantMassQuery) + .apply(info.priority)); if (!r.next()) { unreachable(); } From d94497aa0a97aac4e415afeb81784be60242cf73 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 15:29:28 +0200 Subject: [PATCH 02/25] Deduplicate Cache/CacheInfo --- src/libstore/nar-info-disk-cache.cc | 44 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/libstore/nar-info-disk-cache.cc b/src/libstore/nar-info-disk-cache.cc index b168e7c50279..fa345764eed3 100644 --- a/src/libstore/nar-info-disk-cache.cc +++ b/src/libstore/nar-info-disk-cache.cc @@ -67,10 +67,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache struct Cache { - int id; std::string storeDir; - bool wantMassQuery; - int priority; + CacheInfo info; }; struct State @@ -192,11 +190,12 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache if (!queryCache.next()) return std::nullopt; auto cache = Cache{ - .id = (int) queryCache.getInt(0), .storeDir = queryCache.getStr(1), - .wantMassQuery = queryCache.getInt(2) != 0, - .priority = (int) queryCache.getInt(3), - }; + .info = { + .id = (int) queryCache.getInt(0), + .wantMassQuery = queryCache.getInt(2) != 0, + .priority = (int) queryCache.getInt(3), + }}; state.caches.emplace(uri, cache); } return getCache(state, uri); @@ -214,14 +213,9 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache auto cache(queryCacheRaw(*state, uri)); if (cache) - return cache->id; + return cache->info.id; - Cache ret{ - .id = -1, // set below - .storeDir = storeDir, - .wantMassQuery = info.wantMassQuery, - .priority = info.priority, - }; + Cache ret{.storeDir = storeDir, .info = info}; { auto r(state->insertCache.use() @@ -233,13 +227,13 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache if (!r.next()) { unreachable(); } - ret.id = (int) r.getInt(0); + ret.info.id = (int) r.getInt(0); } state->caches[uri] = ret; txn.commit(); - return ret.id; + return ret.info.id; }); } @@ -250,7 +244,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache auto cache(queryCacheRaw(*state, uri)); if (!cache) return std::nullopt; - return CacheInfo{.id = cache->id, .wantMassQuery = cache->wantMassQuery, .priority = cache->priority}; + return cache->info; }); } @@ -266,7 +260,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache auto now = time(nullptr); auto queryNAR(state->queryNAR.use() - .apply(cache.id) + .apply(cache.info.id) .apply(hashPart) .apply(now - settings.ttlNegative) .apply(now - settings.ttlPositive)); @@ -312,7 +306,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache auto now = time(nullptr); auto queryRealisation(state->queryRealisation.use() - .apply(cache.id) + .apply(cache.info.id) .apply(id.to_string()) .apply(now - settings.ttlNegative) .apply(now - settings.ttlPositive)); @@ -350,7 +344,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache // assert(hashPart == storePathToHash(info->path)); state->insertNAR.use() - .apply(cache.id) + .apply(cache.info.id) .apply(hashPart) .apply(std::string(info->path.name())) .apply(narInfo ? narInfo->url : "", narInfo != 0) @@ -372,7 +366,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache .exec(); } else { - state->insertMissingNAR.use().apply(cache.id).apply(hashPart).apply(time(nullptr)).exec(); + state->insertMissingNAR.use().apply(cache.info.id).apply(hashPart).apply(time(nullptr)).exec(); } }); } @@ -385,7 +379,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache auto & cache(getCache(*state, uri)); state->insertRealisation.use() - .apply(cache.id) + .apply(cache.info.id) .apply(realisation.id.to_string()) .apply(static_cast(realisation).dump()) .apply(time(nullptr)) @@ -399,7 +393,11 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache auto state(_state.lock()); auto & cache(getCache(*state, uri)); - state->insertMissingRealisation.use().apply(cache.id).apply(id.to_string()).apply(time(nullptr)).exec(); + state->insertMissingRealisation.use() + .apply(cache.info.id) + .apply(id.to_string()) + .apply(time(nullptr)) + .exec(); }); } }; From 343b7b1bb3ce62150fa7237106607a6fef70e249 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sat, 6 Jun 2026 23:33:46 +0200 Subject: [PATCH 03/25] nix serve: publish a bloom filter of valid store paths Add a /bloom-filter endpoint to `nix serve` that returns a Bloom filter of every valid store path's 32-character Nix32 hash part, so clients can rule out definite cache misses without issuing a .narinfo request per candidate path. The filter uses Kirsch-Mitzenmacher double hashing over the decoded 20-byte hash part (no extra hashing needed: the hash part is already cryptographic). Size and number of hash functions are derived from the valid-path count and a 1% target false-positive rate, compiled in as a constant for now. The URL is advertised through a new `BloomFilter` field in nix-cache-info (absolute or relative). The wire format is self-describing: an 8-byte "NixBloom" magic, then version, k, m and the bit array, all little-endian. Both the format and the new nix-cache-info field are documented under doc/manual/source/protocols. Co-Authored-By: Claude Opus 4.7 (1M context) --- doc/manual/source/SUMMARY.md.in | 1 + .../protocols/binary-cache-bloom-filter.md | 76 +++++++++++++++++++ doc/manual/source/protocols/nix-cache-info.md | 15 ++++ src/nix/serve.cc | 67 +++++++++++++++- 4 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 doc/manual/source/protocols/binary-cache-bloom-filter.md diff --git a/doc/manual/source/SUMMARY.md.in b/doc/manual/source/SUMMARY.md.in index b1bf1c7da391..c75cbbb9a762 100644 --- a/doc/manual/source/SUMMARY.md.in +++ b/doc/manual/source/SUMMARY.md.in @@ -129,6 +129,7 @@ - [Store Path Specification](protocols/store-path.md) - [Nix Archive (NAR) Format](protocols/nix-archive/index.md) - [Nix Cache Info Format](protocols/nix-cache-info.md) + - [Binary Cache Bloom Filter Format](protocols/binary-cache-bloom-filter.md) - [Derivation "ATerm" file format](protocols/derivation-aterm.md) - [Nix32 Encoding](protocols/nix32.md) - [`builtins.wasm` Host Interface](protocols/wasm.md) diff --git a/doc/manual/source/protocols/binary-cache-bloom-filter.md b/doc/manual/source/protocols/binary-cache-bloom-filter.md new file mode 100644 index 000000000000..985a10203566 --- /dev/null +++ b/doc/manual/source/protocols/binary-cache-bloom-filter.md @@ -0,0 +1,76 @@ +# Binary Cache Bloom Filter Format + +A [binary cache](@docroot@/package-management/binary-cache-substituter.md) may publish a Bloom filter of all store paths it contains. +The filter's URL is announced through the [`BloomFilter`](@docroot@/protocols/nix-cache-info.md#bloomfilter) field of the cache's [`nix-cache-info`](@docroot@/protocols/nix-cache-info.md) file — either as an absolute URL or as a path relative to the cache root. +A cache that does not advertise the field does not provide a Bloom filter; clients must not probe for one at a default path. + +A Bloom filter lets a client decide that a store path is **definitely not** in the cache without issuing a `.narinfo` request. +Membership tests are one-sided: a "not present" answer is authoritative, while a "possibly present" answer must still be confirmed by fetching the `.narinfo`. +False positives occur at a configurable rate; false negatives do not. + +MIME type: `application/octet-stream` + +## Format + +The response is binary, little-endian, with a fixed 24-byte header followed by the raw bit array: + +| Offset | Size | Field | Description | +|-------:|-----------:|-----------|----------------------------------------------------------| +| 0 | 8 | `magic` | ASCII bytes `NixBloom` (no terminating NUL). | +| 8 | 4 | `version` | `uint32` format version. Currently `1`. | +| 12 | 4 | `k` | `uint32` number of hash functions. | +| 16 | 8 | `m` | `uint64` size of the bit array, in bits. Multiple of 8. | +| 24 | `m / 8` | `bits` | The bit array. Bit at position `p` is `bits[p / 8] >> (p % 8)` masked with `1`. | + +The total response size is `24 + m / 8` bytes. + +## Membership test + +A client tests whether a store path *might* be in the cache as follows: + +1. Take the path's [hash part](@docroot@/protocols/store-path.md) — the first 32 [Nix32](@docroot@/protocols/nix32.md) characters of its base name. +2. Decode it into a 20-byte (160-bit) sequence using Nix32 decoding. +3. Read two 64-bit unsigned values from the decoded bytes, little-endian: + - `h1` from bytes `0..8` + - `h2` from bytes `8..16` + (The trailing 4 bytes are unused.) +4. For each `i` in `0, 1, …, k − 1`, compute the bit position + ``` + pos = ((h1 + i * h2) mod 2^64) mod m + ``` + The intermediate addition and multiplication wrap modulo 2^64 (standard unsigned 64-bit overflow) before the modulo by `m`. +5. If every `bits[pos / 8] >> (pos % 8)` has its low bit set, the path is *possibly* present; otherwise it is *definitely not* present. + +This is the standard Kirsch-Mitzenmacher double-hashing scheme. +Because a store path's hash part is already a cryptographic hash, no further hashing is required. + +## Server-side construction + +The server populates the filter by performing the same membership procedure for every valid store path and OR-ing in the resulting bits. + +Parameters are chosen from the count `n` of valid paths and a target false-positive rate `p`: + +``` +m = ceil(-n * ln(p) / (ln 2)^2), rounded up to a multiple of 8 +k = max(1, round((m / n) * ln 2)) +``` + +If `n` is zero, the server may emit a minimal filter (e.g., `m = 8`, `k = 1`, all bits zero), which correctly reports every query as "not present". + +The choice of `p` is server-defined and not advertised separately: a client can infer the asymptotic FPR from `m` and the number of paths in the cache, but does not need to in order to use the filter. + +## Caching + +The bloom filter changes whenever the cache's path set changes. +Clients should refetch periodically; an HTTP cache lifetime on the order of minutes-to-hours is typically appropriate. + +## Example + +A cache containing roughly 500 000 paths, with a 1% target false-positive rate, produces a filter with `k = 7` and `m ≈ 4.7 × 10^6` bits — roughly 590 KB on the wire including the header. + +## See Also + +- [Nix Cache Info Format](@docroot@/protocols/nix-cache-info.md) +- [Store Path Specification](@docroot@/protocols/store-path.md) +- [Nix32 Encoding](@docroot@/protocols/nix32.md) +- [HTTP Binary Cache Store](@docroot@/store/types/http-binary-cache-store.md) diff --git a/doc/manual/source/protocols/nix-cache-info.md b/doc/manual/source/protocols/nix-cache-info.md index e8351e1cebe8..60ed0bfc9842 100644 --- a/doc/manual/source/protocols/nix-cache-info.md +++ b/doc/manual/source/protocols/nix-cache-info.md @@ -36,12 +36,27 @@ error: binary cache 'https://example.com' is for Nix stores with prefix '/nix/st Integer. Sets the default for [`priority`](@docroot@/store/types/http-binary-cache-store.md#store-http-binary-cache-store-priority). +### `BloomFilter` + +URL of a [Bloom filter](@docroot@/protocols/binary-cache-bloom-filter.md) that enumerates the store paths held by this cache. +Clients may use it to skip `.narinfo` requests for paths the filter rules out. + +The value is either an absolute URL or a path relative to the cache root: + +``` +BloomFilter: /bloom-filter +BloomFilter: https://filters.example.com/cache-abc.bloom +``` + +If absent, the cache does not publish a Bloom filter and clients must not assume one is available at any default location. + ## Example ``` StoreDir: /nix/store WantMassQuery: 1 Priority: 30 +BloomFilter: /bloom-filter ``` ## Caching Behavior diff --git a/src/nix/serve.cc b/src/nix/serve.cc index 7206411fd3da..2a76e3c991c1 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -3,11 +3,16 @@ #include "nix/util/serialise.hh" #include "nix/util/signals.hh" #include "nix/util/deleter.hh" +#include "nix/util/base-nix-32.hh" +#include "nix/util/util.hh" #include "nix/store/nar-info.hh" #include "nix/store/binary-cache-store.hh" #include "nix/store/log-store.hh" #include "nix/util/environment-variables.hh" +#include +#include +#include #include #include @@ -18,6 +23,59 @@ using namespace nix; using Response = std::unique_ptr>; +static constexpr double bloomFalsePositiveRate = 0.01; + +static std::string buildBloomFilter(Store & store) +{ + auto paths = store.queryAllValidPaths(); + size_t n = paths.size(); + + uint64_t mBits; + uint32_t k; + if (n == 0) { + mBits = 8; + k = 1; + } else { + constexpr double ln2 = 0.6931471805599453; + double mF = -double(n) * std::log(bloomFalsePositiveRate) / (ln2 * ln2); + mBits = ((uint64_t(std::ceil(mF)) + 7) / 8) * 8; + long kL = std::lround((double(mBits) / double(n)) * ln2); + k = uint32_t(std::max(1, kL)); + } + + const size_t headerLen = 8 + 4 + 4 + 8; + std::string out(headerLen + mBits / 8, '\0'); + + std::memcpy(out.data(), "NixBloom", 8); + auto writeU32 = [&](size_t off, uint32_t v) { + for (int i = 0; i < 4; ++i) + out[off + i] = char((v >> (8 * i)) & 0xff); + }; + auto writeU64 = [&](size_t off, uint64_t v) { + for (int i = 0; i < 8; ++i) + out[off + i] = char((v >> (8 * i)) & 0xff); + }; + writeU32(8, 1); + writeU32(12, k); + writeU64(16, mBits); + + char * bits = out.data() + headerLen; + + for (auto & path : paths) { + auto raw = BaseNix32::decode(std::string(path.hashPart())); + assert(raw.size() == 20); + auto * rawBytes = reinterpret_cast(raw.data()); + uint64_t h1 = readLittleEndian(rawBytes); + uint64_t h2 = readLittleEndian(rawBytes + 8); + for (uint32_t i = 0; i < k; ++i) { + uint64_t pos = (h1 + uint64_t(i) * h2) % mBits; + bits[pos / 8] |= uint8_t(1) << (pos % 8); + } + } + + return out; +} + struct CmdServe : StoreCommand { uint16_t port = 8080; @@ -110,7 +168,8 @@ struct CmdServe : StoreCommand auto body = std::make_unique( "StoreDir: " + store.storeDir + "\n" "WantMassQuery: " + (store.config.wantMassQuery ? "1" : "0") + "\n" - "Priority: " + std::to_string(priority.value_or(store.config.priority)) + "\n"); + "Priority: " + std::to_string(priority.value_or(store.config.priority)) + "\n" + "BloomFilter: /bloom-filter\n"); response.reset(MHD_create_response_from_buffer(body->size(), body->data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "text/x-nix-cache-info"); @@ -211,6 +270,12 @@ struct CmdServe : StoreCommand response.reset(MHD_create_response_from_buffer(log->size(), log->data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "text/plain; charset=utf-8"); + } + + else if (url == "/bloom-filter") { + auto body = std::make_unique(buildBloomFilter(store)); + response.reset(MHD_create_response_from_buffer(body->size(), body->data(), MHD_RESPMEM_MUST_COPY)); + MHD_add_response_header(response.get(), "Content-Type", "application/octet-stream"); } else return notFound(); From 6658cb97ca4e819cc7ef14fd174118f3a129b2bb Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 01:31:00 +0200 Subject: [PATCH 04/25] BinaryCacheStore: consult bloom filter to skip definite misses Read the cache's bloom filter (advertised via the new BloomFilter: field in nix-cache-info) and consult it from queryPathInfoUncached and isValidPathUncached. A "definitely missing" answer short-circuits the .narinfo round trip; "possibly present" falls through to the existing fetch. The filter is cached on disk in NarInfoDiskCache as a blob and probed via sqlite3_blob_read so we only touch ~k bytes per query. Reuse of the negative-narinfo TTL keeps the cadence familiar. ETag/304 is plumbed through a new BinaryCacheStore::getFileConditional hook (HTTP overrides it, file:// falls back to a plain GET). Concurrent first probes across processes are serialised on a $XDG_CACHE_HOME lockfile, mirroring the fetcher pattern in libfetchers. Fetch failures log a warning and disable the filter for the rest of the process. Bumps the on-disk cache file to v2 because both BinaryCaches gains a bloomFilterUrl column and a new BloomFilters table is added; v1 files are orphaned and refilled on first use. A debug log records every "definitely missing" decision so the behaviour is easy to inspect. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libstore/binary-cache-store.cc | 167 ++++++++++++++++++ src/libstore/http-binary-cache-store.cc | 27 ++- .../include/nix/store/binary-cache-store.hh | 54 ++++++ .../nix/store/http-binary-cache-store.hh | 2 + .../include/nix/store/nar-info-disk-cache.hh | 41 +++++ src/libstore/nar-info-disk-cache.cc | 118 ++++++++++++- 6 files changed, 402 insertions(+), 7 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index a7f636f12311..c9bc19d4da58 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -14,11 +14,17 @@ #include "nix/util/signals.hh" #include "nix/util/archive.hh" #include "nix/util/util.hh" +#include "nix/util/base-nix-32.hh" +#include "nix/util/users.hh" +#include "nix/store/pathlocks.hh" +#include #include +#include #include #include #include +#include #include #include @@ -68,11 +74,167 @@ void BinaryCacheStore::init() config.wantMassQuery.setDefault(value == "1"); } else if (name == "Priority") { config.priority.setDefault(std::stoi(value)); + } else if (name == "BloomFilter") { + bloomFilterUrl = value; } } } } +ConditionalGetResult +BinaryCacheStore::getFileConditional(const std::string & path, const std::string & /*expectedETag*/) +{ + /* Default: no ETag support; just do an ordinary fetch. */ + auto data = getFile(path); + return ConditionalGetResult{.data = std::move(data), .etag = "", .notModified = false}; +} + +static std::vector bloomBitPositions(const StorePath & path, uint32_t k, uint64_t mBits) +{ + auto raw = BaseNix32::decode(std::string(path.hashPart())); + assert(raw.size() == 20); + auto * b = reinterpret_cast(raw.data()); + uint64_t h1 = readLittleEndian(b); + uint64_t h2 = readLittleEndian(b + 8); + std::vector out(k); + for (uint32_t i = 0; i < k; ++i) + out[i] = (h1 + uint64_t(i) * h2) % mBits; + return out; +} + +bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept +{ + try { + if (!diskCache || !bloomFilterUrl) + return false; + + const auto uri = config.getReference().render(/*withParams=*/false); + + auto probe = [&](uint32_t k, uint64_t mBits) { + auto positions = bloomBitPositions(storePath, k, mBits); + bool definitelyMissing = !diskCache->probeBloomFilter(uri, positions); + if (definitelyMissing) + debug("bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); + return definitelyMissing; + }; + + /* Fast path: filter already loaded or known disabled. */ + { + auto state(bloomState.lock()); + if (state->status == BloomState::Disabled) + return false; + if (state->status == BloomState::Ready) + return probe(state->k, state->mBits); + } + + /* Slow path: acquire a cross-process file lock so concurrent first-probers + don't race on the network. */ + auto lockDir = getCacheDir() / "bloom-filter-locks"; + std::filesystem::create_directories(lockDir); + auto lockFile = + lockDir / hashString(HashAlgorithm::SHA256, uri).to_string(HashFormat::Base16, /*includePrefix=*/false); + PathLocks fetchLock( + {lockFile.string()}, fmt("waiting for another Nix process to fetch bloom filter for '%s'...", uri)); + + /* Check disk cache while holding the lock: another process may have + just refreshed it. */ + NarInfoDiskCache::BloomFilterMeta meta; + bool haveMeta = false; + std::string expectedETag; + if (auto m = diskCache->lookupBloomFilter(uri)) { + auto ttl = (time_t) settings.getNarInfoDiskCacheSettings().ttlNegative.get(); + if (time(nullptr) - m->timestamp <= ttl) { + meta = *m; + haveMeta = true; + } else { + expectedETag = m->etag; + } + } + + if (!haveMeta) { + const auto & url = *bloomFilterUrl; + if (hasPrefix(url, "http://") || hasPrefix(url, "https://")) { + warn( + "bloom filter at absolute URL '%s' is not yet supported; disabling bloom filter for cache '%s'", + url, + uri); + bloomState.lock()->status = BloomState::Disabled; + return false; + } + std::string path = url; + while (!path.empty() && path[0] == '/') + path.erase(0, 1); + + ConditionalGetResult res; + try { + res = getFileConditional(path, expectedETag); + } catch (Error & e) { + warn("failed to fetch bloom filter from cache '%s': %s; disabling for this process", uri, e.message()); + bloomState.lock()->status = BloomState::Disabled; + return false; + } + + if (res.notModified) { + diskCache->touchBloomFilter(uri, res.etag.empty() ? expectedETag : res.etag); + auto m = diskCache->lookupBloomFilter(uri); + if (!m) { + warn("bloom filter cache row missing after 304 for '%s'; disabling", uri); + bloomState.lock()->status = BloomState::Disabled; + return false; + } + meta = *m; + } else if (!res.data) { + warn("bloom filter at '%s' returned 404; disabling for this process", uri); + bloomState.lock()->status = BloomState::Disabled; + return false; + } else { + const auto & body = *res.data; + if (body.size() < 24 || std::memcmp(body.data(), "NixBloom", 8) != 0) { + warn("bloom filter from cache '%s' has invalid magic; disabling", uri); + bloomState.lock()->status = BloomState::Disabled; + return false; + } + auto readU32 = [&](size_t off) { + return uint32_t((unsigned char) body[off]) + | (uint32_t((unsigned char) body[off + 1]) << 8) + | (uint32_t((unsigned char) body[off + 2]) << 16) + | (uint32_t((unsigned char) body[off + 3]) << 24); + }; + auto readU64 = [&](size_t off) { + uint64_t v = 0; + for (int i = 0; i < 8; ++i) + v |= uint64_t((unsigned char) body[off + i]) << (8 * i); + return v; + }; + uint32_t version = readU32(8); + uint32_t k = readU32(12); + uint64_t mBits = readU64(16); + if (version != 1 || mBits == 0 || mBits % 8 != 0 || body.size() != 24 + mBits / 8) { + warn("bloom filter from cache '%s' has invalid header; disabling", uri); + bloomState.lock()->status = BloomState::Disabled; + return false; + } + std::span bits( + reinterpret_cast(body.data() + 24), (size_t) (mBits / 8)); + diskCache->upsertBloomFilter(uri, res.etag, k, mBits, bits); + meta = {.k = k, .mBits = mBits, .etag = res.etag, .timestamp = time(nullptr)}; + } + } + + { + auto state(bloomState.lock()); + state->status = BloomState::Ready; + state->k = meta.k; + state->mBits = meta.mBits; + } + + return probe(meta.k, meta.mBits); + } catch (...) { + ignoreExceptionExceptInterrupt(); + return false; + } +} + std::optional BinaryCacheStore::getNixCacheInfo() { return getFile(cacheInfoFile); @@ -527,6 +689,8 @@ StorePath BinaryCacheStore::addToStoreFromDump( bool BinaryCacheStore::isValidPathUncached(const StorePath & storePath) { + if (isDefinitelyMissing(storePath)) + return false; // FIXME: this only checks whether a .narinfo with a matching hash // part exists. So ‘f4kb...-foo’ matches ‘f4kb...-bar’, even // though they shouldn't. Not easily fixed. @@ -580,6 +744,9 @@ void BinaryCacheStore::queryPathInfoUncached( auto callbackPtr = std::make_shared(std::move(callback)); try { + if (isDefinitelyMissing(storePath)) + return (*callbackPtr)({}); + auto uri = config.getReference().render(/*FIXME withParams=*/false); auto storePathS = printStorePath(storePath); auto act = std::make_shared( diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index 2b4ccfcaef3e..ae180c63fa98 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -69,6 +69,7 @@ void HttpBinaryCacheStore::init() if (auto cacheInfo = diskCache->upToDateCacheExists(cacheKey)) { config->wantMassQuery.setDefault(cacheInfo->wantMassQuery); config->priority.setDefault(cacheInfo->priority); + bloomFilterUrl = cacheInfo->bloomFilterUrl; } else { try { BinaryCacheStore::init(); @@ -76,7 +77,9 @@ void HttpBinaryCacheStore::init() throw Error("'%s' does not appear to be a binary cache", config->cacheUri.to_string()); } diskCache->createCache( - cacheKey, config->storeDir, {.wantMassQuery = config->wantMassQuery, .priority = config->priority}); + cacheKey, + config->storeDir, + {.wantMassQuery = config->wantMassQuery, .priority = config->priority, .bloomFilterUrl = bloomFilterUrl}); } } @@ -260,6 +263,28 @@ void HttpBinaryCacheStore::getFile(const std::string & path, Callbackdownload(request); + return ConditionalGetResult{ + .data = result.cached ? std::optional(std::string{}) + : std::optional(std::move(result.data)), + .etag = std::move(result.etag), + .notModified = result.cached, + }; + } catch (FileTransferError & e) { + if (e.error == FileTransfer::NotFound || e.error == FileTransfer::Forbidden) + return ConditionalGetResult{.data = std::nullopt, .etag = "", .notModified = false}; + maybeDisable(); + throw; + } +} + std::optional HttpBinaryCacheStore::getNixCacheInfo() { try { diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 6a66e901a883..37ea20074726 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -6,6 +6,7 @@ #include "nix/store/log-store.hh" #include "nix/util/pool.hh" +#include "nix/util/sync.hh" #include @@ -68,6 +69,22 @@ struct BinaryCacheStoreConfig : virtual StoreConfig )"}; }; +/** + * Result of a conditional HTTP-style GET. Returned by + * `BinaryCacheStore::getFileConditional`. + */ +struct ConditionalGetResult +{ + /** Response body. Empty if `notModified`. `nullopt` if the file does not exist (404). */ + std::optional data; + + /** ETag returned by the server. Empty if no ETag was sent. */ + std::string etag; + + /** True if the server replied 304 Not Modified to our If-None-Match. */ + bool notModified = false; +}; + /** * @note subclasses must implement at least one of the two * virtual getFile() methods. @@ -84,9 +101,28 @@ struct alignas(8) /* Work around ASAN failures on i686-linux. */ */ Config & config; + /** + * URL of the bloom filter advertised by this cache (from the + * `BloomFilter:` field in `nix-cache-info`), as written by the server. + * Absolute URL or path relative to the cache root. `nullopt` if the + * cache doesn't advertise a bloom filter. Populated by `init()` on + * the cold path or restored from the disk-cache by subclasses on the + * warm path. + */ + std::optional bloomFilterUrl; + private: std::vector> signers; + struct BloomState + { + enum Status { Pending, Ready, Disabled }; + Status status = Pending; + uint32_t k = 0; + uint64_t mBits = 0; + }; + Sync bloomState; + protected: /** @@ -153,10 +189,28 @@ public: std::optional getFile(const std::string & path); + /** + * Fetch a file with an HTTP-style conditional GET. The default + * implementation just forwards to `getFile()` (no ETag support). + * `HttpBinaryCacheStore` overrides this to use `If-None-Match` and + * to surface 304 responses. + */ + virtual ConditionalGetResult + getFileConditional(const std::string & path, const std::string & expectedETag); + public: virtual void init() override; + /** + * Return true if this cache definitely does not contain `storePath`. + * Consults the bloom filter advertised by the cache; lazily fetches + * and caches the filter on first call. Returns false in every other + * case (no filter advertised, filter disabled after a failure, + * filter says "possibly present"). Never throws. + */ + bool isDefinitelyMissing(const StorePath & storePath) noexcept; + private: std::string narMagic; diff --git a/src/libstore/include/nix/store/http-binary-cache-store.hh b/src/libstore/include/nix/store/http-binary-cache-store.hh index 765eb6dd5135..e91d48a0a315 100644 --- a/src/libstore/include/nix/store/http-binary-cache-store.hh +++ b/src/libstore/include/nix/store/http-binary-cache-store.hh @@ -121,6 +121,8 @@ protected: void getFile(const std::string & path, Callback> callback) noexcept override; + ConditionalGetResult getFileConditional(const std::string & path, const std::string & expectedETag) override; + std::optional getNixCacheInfo() override; std::optional isTrustedClient() override; diff --git a/src/libstore/include/nix/store/nar-info-disk-cache.hh b/src/libstore/include/nix/store/nar-info-disk-cache.hh index 37d1f1b10e25..a2c28377bf8b 100644 --- a/src/libstore/include/nix/store/nar-info-disk-cache.hh +++ b/src/libstore/include/nix/store/nar-info-disk-cache.hh @@ -5,6 +5,8 @@ #include "nix/store/nar-info.hh" #include "nix/store/realisation.hh" +#include + namespace nix { struct SQLiteSettings; @@ -30,6 +32,7 @@ struct NarInfoDiskCache int id = 0; bool wantMassQuery = false; int priority = 0; + std::optional bloomFilterUrl; }; /** @@ -51,6 +54,44 @@ struct NarInfoDiskCache virtual std::pair> lookupRealisation(const std::string & uri, const DrvOutput & id) = 0; + struct BloomFilterMeta + { + uint32_t k; + uint64_t mBits; + std::string etag; + time_t timestamp; + }; + + /** + * Return the metadata for a cached bloom filter, or nullopt if none is cached. + * Does not check the TTL; the caller decides whether to refresh. + */ + virtual std::optional lookupBloomFilter(const std::string & uri) = 0; + + /** + * Store a freshly fetched bloom filter blob (just the bit array, no header). + */ + virtual void upsertBloomFilter( + const std::string & uri, + const std::string & etag, + uint32_t k, + uint64_t mBits, + std::span bits) = 0; + + /** + * Refresh the timestamp (and optionally the etag) of an existing bloom filter + * after a successful conditional GET returned 304 Not Modified. + */ + virtual void touchBloomFilter(const std::string & uri, const std::string & etag) = 0; + + /** + * Probe `bitPositions` against the cached bloom filter via random-access + * blob reads. Returns true if every position has its bit set (i.e. the + * bloom filter says "possibly present"), false otherwise (definitely + * not present, OR no filter is cached). + */ + virtual bool probeBloomFilter(const std::string & uri, std::span bitPositions) = 0; + /** * Return a singleton cache object that can be used concurrently by * multiple threads. diff --git a/src/libstore/nar-info-disk-cache.cc b/src/libstore/nar-info-disk-cache.cc index fa345764eed3..c736e2c998ac 100644 --- a/src/libstore/nar-info-disk-cache.cc +++ b/src/libstore/nar-info-disk-cache.cc @@ -1,6 +1,7 @@ #include "nix/store/nar-info-disk-cache.hh" #include "nix/util/users.hh" #include "nix/util/sync.hh" +#include "nix/util/finally.hh" #include "nix/store/sqlite.hh" #include "nix/store/globals.hh" #include "nix/store/provenance.hh" @@ -20,7 +21,18 @@ create table if not exists BinaryCaches ( timestamp integer not null, storeDir text not null, wantMassQuery integer not null, - priority integer not null + priority integer not null, + bloomFilterUrl text -- NULL if the cache doesn't advertise a bloom filter +); + +create table if not exists BloomFilters ( + cache integer primary key not null, + timestamp integer not null, + etag text, + k integer not null, + mBits integer not null, + bits blob not null, + foreign key (cache) references BinaryCaches(id) on delete cascade ); create table if not exists NARs ( @@ -75,7 +87,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache { SQLite db; SQLiteStmt insertCache, queryCache, insertNAR, insertMissingNAR, queryNAR, insertRealisation, - insertMissingRealisation, queryRealisation, purgeCache; + insertMissingRealisation, queryRealisation, purgeCache, queryBloomFilter, insertBloomFilter, + touchBloomFilter, queryBloomFilterRowId; std::map caches; }; @@ -84,7 +97,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache NarInfoDiskCacheImpl( const Settings & settings, SQLiteSettings sqliteSettings, - std::filesystem::path dbPath = getCacheDir() / "binary-cache-detsys-v1.sqlite") + std::filesystem::path dbPath = getCacheDir() / "binary-cache-detsys-v2.sqlite") : NarInfoDiskCache{settings} { auto state(_state.lock()); @@ -99,11 +112,21 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache state->insertCache.create( state->db, - "insert into BinaryCaches(url, timestamp, storeDir, wantMassQuery, priority) values (?1, ?2, ?3, ?4, ?5) on conflict (url) do update set timestamp = ?2, storeDir = ?3, wantMassQuery = ?4, priority = ?5 returning id;"); + "insert into BinaryCaches(url, timestamp, storeDir, wantMassQuery, priority, bloomFilterUrl) values (?1, ?2, ?3, ?4, ?5, ?6) on conflict (url) do update set timestamp = ?2, storeDir = ?3, wantMassQuery = ?4, priority = ?5, bloomFilterUrl = ?6 returning id;"); state->queryCache.create( state->db, - "select id, storeDir, wantMassQuery, priority from BinaryCaches where url = ? and timestamp > ?"); + "select id, storeDir, wantMassQuery, priority, bloomFilterUrl from BinaryCaches where url = ? and timestamp > ?"); + + state->queryBloomFilter.create(state->db, "select timestamp, etag, k, mBits from BloomFilters where cache = ?"); + + state->queryBloomFilterRowId.create(state->db, "select rowid from BloomFilters where cache = ?"); + + state->insertBloomFilter.create( + state->db, + "insert or replace into BloomFilters(cache, timestamp, etag, k, mBits, bits) values (?, ?, ?, ?, ?, ?)"); + + state->touchBloomFilter.create(state->db, "update BloomFilters set timestamp = ?, etag = ? where cache = ?"); state->insertNAR.create( state->db, @@ -195,6 +218,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache .id = (int) queryCache.getInt(0), .wantMassQuery = queryCache.getInt(2) != 0, .priority = (int) queryCache.getInt(3), + .bloomFilterUrl = queryCache.isNull(4) ? std::nullopt : std::optional(queryCache.getStr(4)), }}; state.caches.emplace(uri, cache); } @@ -223,7 +247,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache .apply(time(nullptr)) .apply(storeDir) .apply(info.wantMassQuery) - .apply(info.priority)); + .apply(info.priority) + .apply(info.bloomFilterUrl.value_or(""), info.bloomFilterUrl.has_value())); if (!r.next()) { unreachable(); } @@ -400,6 +425,87 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache .exec(); }); } + + std::optional lookupBloomFilter(const std::string & uri) override + { + return retrySQLite>([&]() -> std::optional { + auto state(_state.lock()); + auto & cache(getCache(*state, uri)); + auto q(state->queryBloomFilter.use().apply(cache.info.id)); + if (!q.next()) + return std::nullopt; + return BloomFilterMeta{ + .k = (uint32_t) q.getInt(2), + .mBits = (uint64_t) q.getInt(3), + .etag = q.isNull(1) ? std::string{} : q.getStr(1), + .timestamp = (time_t) q.getInt(0), + }; + }); + } + + void upsertBloomFilter( + const std::string & uri, + const std::string & etag, + uint32_t k, + uint64_t mBits, + std::span bits) override + { + retrySQLite([&]() { + auto state(_state.lock()); + auto & cache(getCache(*state, uri)); + state->insertBloomFilter.use() + .apply(cache.info.id) + .apply(time(nullptr)) + .apply(etag, !etag.empty()) + .apply((uint64_t) k) + .apply(mBits) + .apply(reinterpret_cast(bits.data()), bits.size()) + .exec(); + }); + } + + void touchBloomFilter(const std::string & uri, const std::string & etag) override + { + retrySQLite([&]() { + auto state(_state.lock()); + auto & cache(getCache(*state, uri)); + state->touchBloomFilter.use().apply(time(nullptr)).apply(etag, !etag.empty()).apply(cache.info.id).exec(); + }); + } + + bool probeBloomFilter(const std::string & uri, std::span bitPositions) override + { + return retrySQLite([&]() -> bool { + auto state(_state.lock()); + auto & cache(getCache(*state, uri)); + + int64_t rowid; + { + auto q(state->queryBloomFilterRowId.use().apply(cache.info.id)); + if (!q.next()) + return false; // no cached filter + rowid = q.getInt(0); + } + + sqlite3_blob * blob = nullptr; + if (sqlite3_blob_open(state->db, "main", "BloomFilters", "bits", rowid, /*write=*/0, &blob) != SQLITE_OK) + SQLiteError::throw_(state->db, "opening bloom-filter blob"); + Finally _closeBlob([&] { + if (blob) + sqlite3_blob_close(blob); + }); + + for (auto pos : bitPositions) { + unsigned char byte = 0; + int rc = sqlite3_blob_read(blob, &byte, 1, (int) (pos / 8)); + if (rc != SQLITE_OK) + SQLiteError::throw_(state->db, "reading bloom-filter blob"); + if (!((byte >> (pos % 8)) & 1)) + return false; + } + return true; + }); + } }; ref NarInfoDiskCache::get(const Settings & settings, SQLiteSettings sqliteSettings) From 885295d4c31c8358763ac30f65825e166e43a233 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 01:51:11 +0200 Subject: [PATCH 05/25] libstore: factor bloom-filter bit positions into a shared helper The server-side filter builder in `nix serve` and the client-side probe in BinaryCacheStore were doing the same Nix32-decode + double hashing dance with two slightly different bodies. Hoist the iteration into a templated `forEachBloomBitPosition` in a new `nix/store/bloom-filter.hh` so the wire-format math lives in exactly one place; callers supply a body that either sets a bit or pushes a position. Pure refactor: the served filter bytes and the client probe results are unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libstore/binary-cache-store.cc | 14 +++----- .../include/nix/store/bloom-filter.hh | 36 +++++++++++++++++++ src/libstore/include/nix/store/meson.build | 1 + src/nix/serve.cc | 18 +++------- 4 files changed, 45 insertions(+), 24 deletions(-) create mode 100644 src/libstore/include/nix/store/bloom-filter.hh diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index c9bc19d4da58..a9443f4e23a0 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -14,11 +14,10 @@ #include "nix/util/signals.hh" #include "nix/util/archive.hh" #include "nix/util/util.hh" -#include "nix/util/base-nix-32.hh" #include "nix/util/users.hh" +#include "nix/store/bloom-filter.hh" #include "nix/store/pathlocks.hh" -#include #include #include #include @@ -91,14 +90,9 @@ BinaryCacheStore::getFileConditional(const std::string & path, const std::string static std::vector bloomBitPositions(const StorePath & path, uint32_t k, uint64_t mBits) { - auto raw = BaseNix32::decode(std::string(path.hashPart())); - assert(raw.size() == 20); - auto * b = reinterpret_cast(raw.data()); - uint64_t h1 = readLittleEndian(b); - uint64_t h2 = readLittleEndian(b + 8); - std::vector out(k); - for (uint32_t i = 0; i < k; ++i) - out[i] = (h1 + uint64_t(i) * h2) % mBits; + std::vector out; + out.reserve(k); + forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { out.push_back(pos); }); return out; } diff --git a/src/libstore/include/nix/store/bloom-filter.hh b/src/libstore/include/nix/store/bloom-filter.hh new file mode 100644 index 000000000000..5edbcf400a5b --- /dev/null +++ b/src/libstore/include/nix/store/bloom-filter.hh @@ -0,0 +1,36 @@ +#pragma once +///@file + +#include "nix/store/path.hh" +#include "nix/util/base-nix-32.hh" +#include "nix/util/util.hh" + +#include +#include +#include + +namespace nix { + +/** + * Invoke `f(uint64_t pos)` for each of the `k` bit positions in an + * `mBits`-sized bloom filter that correspond to `path`. + * + * Kirsch-Mitzenmacher double hashing over the 160 bits of the path's + * `hashPart`; intermediate arithmetic wraps modulo 2^64 before the + * final modulo by `mBits`. See + * `doc/manual/source/protocols/binary-cache-bloom-filter.md` for the + * full specification. + */ +template +void forEachBloomBitPosition(const StorePath & path, uint32_t k, uint64_t mBits, F && f) +{ + auto raw = BaseNix32::decode(std::string(path.hashPart())); + assert(raw.size() == 20); + auto * b = reinterpret_cast(raw.data()); + uint64_t h1 = readLittleEndian(b); + uint64_t h2 = readLittleEndian(b + 8); + for (uint32_t i = 0; i < k; ++i) + f((h1 + uint64_t(i) * h2) % mBits); +} + +} // namespace nix diff --git a/src/libstore/include/nix/store/meson.build b/src/libstore/include/nix/store/meson.build index 9900e64c67a0..d59fc3a5ce43 100644 --- a/src/libstore/include/nix/store/meson.build +++ b/src/libstore/include/nix/store/meson.build @@ -14,6 +14,7 @@ headers = [ config_pub_h ] + files( 'async-path-writer.hh', 'aws-creds.hh', 'binary-cache-store.hh', + 'bloom-filter.hh', 'build-result.hh', 'build/build-log.hh', 'build/derivation-builder.hh', diff --git a/src/nix/serve.cc b/src/nix/serve.cc index 2a76e3c991c1..dfdf035c6805 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -3,14 +3,12 @@ #include "nix/util/serialise.hh" #include "nix/util/signals.hh" #include "nix/util/deleter.hh" -#include "nix/util/base-nix-32.hh" -#include "nix/util/util.hh" #include "nix/store/nar-info.hh" #include "nix/store/binary-cache-store.hh" +#include "nix/store/bloom-filter.hh" #include "nix/store/log-store.hh" #include "nix/util/environment-variables.hh" -#include #include #include #include @@ -61,17 +59,9 @@ static std::string buildBloomFilter(Store & store) char * bits = out.data() + headerLen; - for (auto & path : paths) { - auto raw = BaseNix32::decode(std::string(path.hashPart())); - assert(raw.size() == 20); - auto * rawBytes = reinterpret_cast(raw.data()); - uint64_t h1 = readLittleEndian(rawBytes); - uint64_t h2 = readLittleEndian(rawBytes + 8); - for (uint32_t i = 0; i < k; ++i) { - uint64_t pos = (h1 + uint64_t(i) * h2) % mBits; - bits[pos / 8] |= uint8_t(1) << (pos % 8); - } - } + for (auto & path : paths) + forEachBloomBitPosition( + path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); return out; } From 0d82d91f52b374705df206c9d45c889d4578dd28 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 11:12:33 +0200 Subject: [PATCH 06/25] Formatting --- src/libstore/binary-cache-store.cc | 3 +-- src/libstore/include/nix/store/binary-cache-store.hh | 5 +++-- src/nix/serve.cc | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index a9443f4e23a0..8b7722c22757 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -189,8 +189,7 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept return false; } auto readU32 = [&](size_t off) { - return uint32_t((unsigned char) body[off]) - | (uint32_t((unsigned char) body[off + 1]) << 8) + return uint32_t((unsigned char) body[off]) | (uint32_t((unsigned char) body[off + 1]) << 8) | (uint32_t((unsigned char) body[off + 2]) << 16) | (uint32_t((unsigned char) body[off + 3]) << 24); }; diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 37ea20074726..26dc4b8f7978 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -117,10 +117,12 @@ private: struct BloomState { enum Status { Pending, Ready, Disabled }; + Status status = Pending; uint32_t k = 0; uint64_t mBits = 0; }; + Sync bloomState; protected: @@ -195,8 +197,7 @@ public: * `HttpBinaryCacheStore` overrides this to use `If-None-Match` and * to surface 304 responses. */ - virtual ConditionalGetResult - getFileConditional(const std::string & path, const std::string & expectedETag); + virtual ConditionalGetResult getFileConditional(const std::string & path, const std::string & expectedETag); public: diff --git a/src/nix/serve.cc b/src/nix/serve.cc index dfdf035c6805..f0c86bc2ac32 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -60,8 +60,7 @@ static std::string buildBloomFilter(Store & store) char * bits = out.data() + headerLen; for (auto & path : paths) - forEachBloomBitPosition( - path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); + forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); return out; } From 4bf9be508b59c9f9475019495d357b87a6f0d26f Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 11:16:19 +0200 Subject: [PATCH 07/25] libstore: hoist buildBloomFilter and add nix store generate-bloom-filter Move the bloom-filter blob builder out of `nix serve`'s TU and into `src/libstore/bloom-filter.cc` so the same code can drive a new undocumented CLI command. `nix store generate-bloom-filter` takes the store's valid path set (or a newline-separated file via `--from-file`) and writes the same wire-format blob the `/bloom-filter` endpoint serves. `--false-positive-rate` overrides the 1% default. The command refuses to write to a terminal (mirroring `nix nario export`) and reports the resulting size + path count to stderr. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libstore/bloom-filter.cc | 49 ++++++++++++ .../include/nix/store/bloom-filter.hh | 7 ++ src/libstore/meson.build | 1 + src/nix/generate-bloom-filter.cc | 76 +++++++++++++++++++ src/nix/meson.build | 1 + src/nix/serve.cc | 48 +----------- 6 files changed, 135 insertions(+), 47 deletions(-) create mode 100644 src/libstore/bloom-filter.cc create mode 100644 src/nix/generate-bloom-filter.cc diff --git a/src/libstore/bloom-filter.cc b/src/libstore/bloom-filter.cc new file mode 100644 index 000000000000..64f35be341f0 --- /dev/null +++ b/src/libstore/bloom-filter.cc @@ -0,0 +1,49 @@ +#include "nix/store/bloom-filter.hh" + +#include +#include + +namespace nix { + +std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate) +{ + size_t n = paths.size(); + + uint64_t mBits; + uint32_t k; + if (n == 0) { + mBits = 8; + k = 1; + } else { + constexpr double ln2 = 0.6931471805599453; + double mF = -double(n) * std::log(falsePositiveRate) / (ln2 * ln2); + mBits = ((uint64_t(std::ceil(mF)) + 7) / 8) * 8; + long kL = std::lround((double(mBits) / double(n)) * ln2); + k = uint32_t(std::max(1, kL)); + } + + const size_t headerLen = 8 + 4 + 4 + 8; + std::string out(headerLen + mBits / 8, '\0'); + + std::memcpy(out.data(), "NixBloom", 8); + auto writeU32 = [&](size_t off, uint32_t v) { + for (int i = 0; i < 4; ++i) + out[off + i] = char((v >> (8 * i)) & 0xff); + }; + auto writeU64 = [&](size_t off, uint64_t v) { + for (int i = 0; i < 8; ++i) + out[off + i] = char((v >> (8 * i)) & 0xff); + }; + writeU32(8, 1); + writeU32(12, k); + writeU64(16, mBits); + + char * bits = out.data() + headerLen; + + for (auto & path : paths) + forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); + + return out; +} + +} // namespace nix diff --git a/src/libstore/include/nix/store/bloom-filter.hh b/src/libstore/include/nix/store/bloom-filter.hh index 5edbcf400a5b..241e28e7865e 100644 --- a/src/libstore/include/nix/store/bloom-filter.hh +++ b/src/libstore/include/nix/store/bloom-filter.hh @@ -11,6 +11,13 @@ namespace nix { +/** + * Build a bloom-filter blob (24-byte header + raw bit array, see + * `doc/manual/source/protocols/binary-cache-bloom-filter.md`) from a + * set of store paths. + */ +std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate = 0.01); + /** * Invoke `f(uint64_t pos)` for each of the `k` bit positions in an * `mBits`-sized bloom filter that correspond to `path`. diff --git a/src/libstore/meson.build b/src/libstore/meson.build index 47c7d20d59c4..10131ab495c5 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -295,6 +295,7 @@ sources = files( 'active-builds.cc', 'async-path-writer.cc', 'binary-cache-store.cc', + 'bloom-filter.cc', 'build-result.cc', 'build/build-log.cc', 'build/derivation-builder.cc', diff --git a/src/nix/generate-bloom-filter.cc b/src/nix/generate-bloom-filter.cc new file mode 100644 index 000000000000..0d5bc50cae6c --- /dev/null +++ b/src/nix/generate-bloom-filter.cc @@ -0,0 +1,76 @@ +#include "nix/cmd/command.hh" +#include "nix/store/bloom-filter.hh" +#include "nix/store/store-api.hh" +#include "nix/util/file-system.hh" +#include "nix/util/serialise.hh" +#include "nix/util/strings.hh" + +#include + +using namespace nix; + +struct CmdGenerateBloomFilter : StoreCommand +{ + std::optional fromFile; + double falsePositiveRate = 0.01; + + CmdGenerateBloomFilter() + { + addFlag({ + .longName = "from-file", + .description = "Read newline-separated store paths from *file* instead of " + "enumerating every valid path in the store.", + .labels = {"file"}, + .handler = {[this](std::string s) { fromFile = s; }}, + }); + addFlag({ + .longName = "false-positive-rate", + .description = "Target false-positive rate (default: 0.01).", + .labels = {"rate"}, + .handler = {[this](std::string s) { falsePositiveRate = std::stod(s); }}, + }); + } + + std::string description() override + { + return "build a bloom filter from the store's valid paths"; + } + + Category category() override + { + return catUndocumented; + } + + void run(ref store) override + { + auto fd = getStandardOutput(); + if (isatty(fd)) + throw UsageError("refusing to write bloom filter to a terminal"); + + StorePathSet paths; + if (fromFile) { + for (auto & line : tokenizeString(readFile(*fromFile), "\n")) { + auto trimmed = trim(line); + if (trimmed.empty()) + continue; + paths.insert(store->parseStorePath(trimmed)); + } + } else { + paths = store->queryAllValidPaths(); + } + + auto blob = buildBloomFilter(paths, falsePositiveRate); + + FdSink sink(std::move(fd)); + sink(blob); + sink.flush(); + + notice( + "Wrote bloom filter (%d bytes) for %d store paths (%f false positive rate).", + blob.size(), + paths.size(), + falsePositiveRate); + } +}; + +static auto rCmdGenerateBloomFilter = registerCommand2({"store", "generate-bloom-filter"}); diff --git a/src/nix/meson.build b/src/nix/meson.build index b7ddcc8eec44..5b452f809fa7 100644 --- a/src/nix/meson.build +++ b/src/nix/meson.build @@ -96,6 +96,7 @@ nix_sources = [ config_priv_h ] + files( 'flake-prefetch-inputs.cc', 'flake.cc', 'formatter.cc', + 'generate-bloom-filter.cc', 'hash.cc', 'log.cc', 'ls.cc', diff --git a/src/nix/serve.cc b/src/nix/serve.cc index f0c86bc2ac32..b77ce87967cb 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -9,8 +9,6 @@ #include "nix/store/log-store.hh" #include "nix/util/environment-variables.hh" -#include -#include #include #include @@ -21,50 +19,6 @@ using namespace nix; using Response = std::unique_ptr>; -static constexpr double bloomFalsePositiveRate = 0.01; - -static std::string buildBloomFilter(Store & store) -{ - auto paths = store.queryAllValidPaths(); - size_t n = paths.size(); - - uint64_t mBits; - uint32_t k; - if (n == 0) { - mBits = 8; - k = 1; - } else { - constexpr double ln2 = 0.6931471805599453; - double mF = -double(n) * std::log(bloomFalsePositiveRate) / (ln2 * ln2); - mBits = ((uint64_t(std::ceil(mF)) + 7) / 8) * 8; - long kL = std::lround((double(mBits) / double(n)) * ln2); - k = uint32_t(std::max(1, kL)); - } - - const size_t headerLen = 8 + 4 + 4 + 8; - std::string out(headerLen + mBits / 8, '\0'); - - std::memcpy(out.data(), "NixBloom", 8); - auto writeU32 = [&](size_t off, uint32_t v) { - for (int i = 0; i < 4; ++i) - out[off + i] = char((v >> (8 * i)) & 0xff); - }; - auto writeU64 = [&](size_t off, uint64_t v) { - for (int i = 0; i < 8; ++i) - out[off + i] = char((v >> (8 * i)) & 0xff); - }; - writeU32(8, 1); - writeU32(12, k); - writeU64(16, mBits); - - char * bits = out.data() + headerLen; - - for (auto & path : paths) - forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); - - return out; -} - struct CmdServe : StoreCommand { uint16_t port = 8080; @@ -262,7 +216,7 @@ struct CmdServe : StoreCommand } else if (url == "/bloom-filter") { - auto body = std::make_unique(buildBloomFilter(store)); + auto body = std::make_unique(buildBloomFilter(store.queryAllValidPaths())); response.reset(MHD_create_response_from_buffer(body->size(), body->data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "application/octet-stream"); } else From c41ff96ce1da7ba30adc8b30e322a8dbe047a175 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 11:23:05 +0200 Subject: [PATCH 08/25] Test false-positive rate --- src/nix/generate-bloom-filter.cc | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/nix/generate-bloom-filter.cc b/src/nix/generate-bloom-filter.cc index 0d5bc50cae6c..1e6fcdc1527a 100644 --- a/src/nix/generate-bloom-filter.cc +++ b/src/nix/generate-bloom-filter.cc @@ -70,6 +70,44 @@ struct CmdGenerateBloomFilter : StoreCommand blob.size(), paths.size(), falsePositiveRate); + +#if 0 + /* Self-check the empirical false-positive rate by probing the + just-built filter with 10 000 random store paths. */ + auto readU32 = [&](size_t off) { + return uint32_t((unsigned char) blob[off]) | (uint32_t((unsigned char) blob[off + 1]) << 8) + | (uint32_t((unsigned char) blob[off + 2]) << 16) + | (uint32_t((unsigned char) blob[off + 3]) << 24); + }; + auto readU64 = [&](size_t off) { + uint64_t v = 0; + for (int i = 0; i < 8; ++i) + v |= uint64_t((unsigned char) blob[off + i]) << (8 * i); + return v; + }; + uint32_t k = readU32(12); + uint64_t mBits = readU64(16); + const char * bits = blob.data() + 24; + + constexpr size_t numSamples = 1000000; + size_t falsePositives = 0; + for (size_t i = 0; i < numSamples; ++i) { + auto p = StorePath::random("nix-bloom-fpr-probe"); + bool allSet = true; + forEachBloomBitPosition(p, k, mBits, [&](uint64_t pos) { + if (!((uint8_t(bits[pos / 8]) >> (pos % 8)) & 1)) + allSet = false; + }); + if (allSet) + ++falsePositives; + } + notice( + "Empirical false-positive rate over %d random probes: %d (%f, target %f).", + numSamples, + falsePositives, + double(falsePositives) / double(numSamples), + falsePositiveRate); +#endif } }; From bc713631ba9e6b7ac74a8d1319bd10af57d21b1d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 11:38:07 +0200 Subject: [PATCH 09/25] tests: cover bloom-filter rule-out and disk-cache reuse via nix serve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have `startNixServe` redirect server output to `$TEST_ROOT/nix-serve.log` so tests can inspect what the server actually received, then add two assertions to the existing HTTP-cache section of binary-cache.sh: 1. A `--debug nix path-info` for a fake store path against `nix serve` should log "bloom filter for … ruled out …", proving the client short-circuited without a `.narinfo` round trip. 2. A second `--debug nix path-info` for a different fake path should reuse the disk-cached filter — the server should record exactly one `GET /bloom-filter` across the two probes. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/functional/binary-cache.sh | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/functional/binary-cache.sh b/tests/functional/binary-cache.sh index ee6185a07563..b4c2cc138316 100755 --- a/tests/functional/binary-cache.sh +++ b/tests/functional/binary-cache.sh @@ -63,7 +63,9 @@ stopNixServe() { startNixServe() { local portFile="$TEST_ROOT/nix-serve-port" rm -f "$portFile" - nix serve --port 0 --port-file "$portFile" "$@" & + nixServeLog="$TEST_ROOT/nix-serve.log" + rm -f "$nixServeLog" + nix serve --port 0 --port-file "$portFile" "$@" > "$nixServeLog" 2>&1 & nixServePid="$!" while [[ ! -e "$portFile" ]]; do if ! kill -0 "$nixServePid" 2>/dev/null; then @@ -145,6 +147,21 @@ nix path-info -vvvv --store "$httpBinaryCacheUrl" "$bigFile" 2> "$TEST_ROOT/log" [[ $(grep -c "downloading.*narinfo'" "$TEST_ROOT/log") -eq 1 ]] +# Bloom filter advertised by `nix serve` should rule out random store paths. +clearCacheCache +restartNixServe +fake="$NIX_STORE_DIR/00000000000000000000000000000000-fake-not-in-cache" +nix path-info --debug --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log" || true +grepQuiet "bloom filter for.*ruled out.*$fake" "$TEST_ROOT/bloom-log" + + +# A second probe with a different fake path should reuse the cached filter +# rather than fetching /bloom-filter again. +fake2="$NIX_STORE_DIR/11111111111111111111111111111111-fake-also-not-in-cache" +nix path-info --debug --store "$httpBinaryCacheUrl" "$fake2" 2> "$TEST_ROOT/bloom-log2" || true +[[ $(grep -c "url=/bloom-filter" "$nixServeLog") -eq 1 ]] + + # Test that multiple concurrent substitutions do only one download. clearStore nix-store --init # needed because concurrent creation of the store can give SQLite errors From e12c37ec390e607f6362e9eebf9e6872959d0ff3 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 11:58:38 +0200 Subject: [PATCH 10/25] nix serve: Implement ETag for the bloom filter Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libstore/binary-cache-store.cc | 3 ++- src/nix/serve.cc | 14 ++++++++++++++ tests/functional/binary-cache.sh | 17 +++++++++++++++-- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 8b7722c22757..7a3174d8d3cd 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -137,7 +137,7 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept std::string expectedETag; if (auto m = diskCache->lookupBloomFilter(uri)) { auto ttl = (time_t) settings.getNarInfoDiskCacheSettings().ttlNegative.get(); - if (time(nullptr) - m->timestamp <= ttl) { + if (time(nullptr) - m->timestamp < ttl) { meta = *m; haveMeta = true; } else { @@ -169,6 +169,7 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept } if (res.notModified) { + debug("bloom filter for '%s' unchanged (304 Not Modified)", uri); diskCache->touchBloomFilter(uri, res.etag.empty() ? expectedETag : res.etag); auto m = diskCache->lookupBloomFilter(uri); if (!m) { diff --git a/src/nix/serve.cc b/src/nix/serve.cc index b77ce87967cb..200473b8fcd2 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -1,5 +1,6 @@ #include "nix/cmd/command.hh" #include "nix/util/file-system.hh" +#include "nix/util/hash.hh" #include "nix/util/serialise.hh" #include "nix/util/signals.hh" #include "nix/util/deleter.hh" @@ -217,8 +218,21 @@ struct CmdServe : StoreCommand else if (url == "/bloom-filter") { auto body = std::make_unique(buildBloomFilter(store.queryAllValidPaths())); + auto etag = "\"" + + hashString(HashAlgorithm::SHA512, *body) + .to_string(HashFormat::Base16, /*includePrefix=*/false) + + "\""; + + if (auto * inm = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "If-None-Match"); + inm && etag == inm) { + response.reset(MHD_create_response_from_buffer(0, (void *) "", MHD_RESPMEM_PERSISTENT)); + MHD_add_response_header(response.get(), "ETag", etag.c_str()); + return MHD_queue_response(connection, MHD_HTTP_NOT_MODIFIED, response.get()); + } + response.reset(MHD_create_response_from_buffer(body->size(), body->data(), MHD_RESPMEM_MUST_COPY)); MHD_add_response_header(response.get(), "Content-Type", "application/octet-stream"); + MHD_add_response_header(response.get(), "ETag", etag.c_str()); } else return notFound(); diff --git a/tests/functional/binary-cache.sh b/tests/functional/binary-cache.sh index b4c2cc138316..36568fe120bf 100755 --- a/tests/functional/binary-cache.sh +++ b/tests/functional/binary-cache.sh @@ -148,20 +148,33 @@ nix path-info -vvvv --store "$httpBinaryCacheUrl" "$bigFile" 2> "$TEST_ROOT/log" # Bloom filter advertised by `nix serve` should rule out random store paths. +# The hashparts below are chosen so their 7 bloom positions hit several +# bits that the deterministic test-cache filter leaves unset. clearCacheCache restartNixServe -fake="$NIX_STORE_DIR/00000000000000000000000000000000-fake-not-in-cache" +fake="$NIX_STORE_DIR/0123456789abcdfghijklmnpqrsvwxyz-fake-not-in-cache" nix path-info --debug --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log" || true grepQuiet "bloom filter for.*ruled out.*$fake" "$TEST_ROOT/bloom-log" # A second probe with a different fake path should reuse the cached filter # rather than fetching /bloom-filter again. -fake2="$NIX_STORE_DIR/11111111111111111111111111111111-fake-also-not-in-cache" +fake2="$NIX_STORE_DIR/abcdfghijklmnpqrsvwxyz0123456789-fake-also-not-in-cache" nix path-info --debug --store "$httpBinaryCacheUrl" "$fake2" 2> "$TEST_ROOT/bloom-log2" || true [[ $(grep -c "url=/bloom-filter" "$nixServeLog") -eq 1 ]] +# `--refresh` should force the cached filter to be treated as stale; the +# client must re-fetch with `If-None-Match` and the server should reply 304 +# Not Modified instead of resending the body. +prev=$(grep -c "url=/bloom-filter" "$nixServeLog") +nix path-info --debug --refresh --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log3" || true +# One additional /bloom-filter request was made. +[[ $(grep -c "url=/bloom-filter" "$nixServeLog") -eq $((prev + 1)) ]] +# And the client logged the 304 Not Modified branch. +grepQuiet "bloom filter for.*unchanged.*304 Not Modified" "$TEST_ROOT/bloom-log3" + + # Test that multiple concurrent substitutions do only one download. clearStore nix-store --init # needed because concurrent creation of the store can give SQLite errors From 696e27e0fe7077656d23379a24768d0212e8af78 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 12:07:54 +0200 Subject: [PATCH 11/25] tests: loop fake hashparts until one is ruled out by the bloom filter A fixed fake hashpart can still hit a false positive against the deterministic test-fixture filter, so loop generating fakes (varying the last 6 chars of the hashpart, which is where Nix32 decode's reverse-iteration places the bytes that feed h1/h2) until one is ruled out. The disk-cache-reuse assertion folds in for free since the bloom is fetched at most once across all loop iterations. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/functional/binary-cache.sh | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/functional/binary-cache.sh b/tests/functional/binary-cache.sh index 36568fe120bf..1f967e2536bf 100755 --- a/tests/functional/binary-cache.sh +++ b/tests/functional/binary-cache.sh @@ -148,19 +148,27 @@ nix path-info -vvvv --store "$httpBinaryCacheUrl" "$bigFile" 2> "$TEST_ROOT/log" # Bloom filter advertised by `nix serve` should rule out random store paths. -# The hashparts below are chosen so their 7 bloom positions hit several -# bits that the deterministic test-cache filter leaves unset. +# Any fixed hashpart can hit a false positive, so loop generating fakes +# until one is ruled out. The counter sits in the last 6 chars of the +# hashpart because Nix32 decode reverse-iterates: low-order chars feed the +# bytes that drive `h1` in the double-hashing scheme; varying the leading +# chars wouldn't change `h1` or `h2` at all. clearCacheCache restartNixServe -fake="$NIX_STORE_DIR/0123456789abcdfghijklmnpqrsvwxyz-fake-not-in-cache" -nix path-info --debug --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log" || true -grepQuiet "bloom filter for.*ruled out.*$fake" "$TEST_ROOT/bloom-log" +ruledOut=0 +for n in $(seq 0 100); do + fake="$NIX_STORE_DIR/00000000000000000000000000$(printf '%06d' "$n")-fake-not-in-cache" + nix path-info --debug --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log" || true + if grep -q "bloom filter for.*ruled out.*$fake" "$TEST_ROOT/bloom-log"; then + ruledOut=1 + break + fi +done +[[ $ruledOut -eq 1 ]] -# A second probe with a different fake path should reuse the cached filter -# rather than fetching /bloom-filter again. -fake2="$NIX_STORE_DIR/abcdfghijklmnpqrsvwxyz0123456789-fake-also-not-in-cache" -nix path-info --debug --store "$httpBinaryCacheUrl" "$fake2" 2> "$TEST_ROOT/bloom-log2" || true +# The bloom filter should have been fetched exactly once across all the +# loop iterations, proving the disk-cache reuse path works. [[ $(grep -c "url=/bloom-filter" "$nixServeLog") -eq 1 ]] From 1dbe07a4371900124fee6f6822d1da72eb18ab17 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 12:11:23 +0200 Subject: [PATCH 12/25] nix serve: Add --false-positive-rate flag for the bloom filter Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libstore/include/nix/store/bloom-filter.hh | 2 +- src/nix/serve.cc | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/libstore/include/nix/store/bloom-filter.hh b/src/libstore/include/nix/store/bloom-filter.hh index 241e28e7865e..1f01e555f41b 100644 --- a/src/libstore/include/nix/store/bloom-filter.hh +++ b/src/libstore/include/nix/store/bloom-filter.hh @@ -16,7 +16,7 @@ namespace nix { * `doc/manual/source/protocols/binary-cache-bloom-filter.md`) from a * set of store paths. */ -std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate = 0.01); +std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate); /** * Invoke `f(uint64_t pos)` for each of the `k` bit positions in an diff --git a/src/nix/serve.cc b/src/nix/serve.cc index 200473b8fcd2..6241ce139c2c 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -26,6 +26,7 @@ struct CmdServe : StoreCommand std::string listenAddress = "127.0.0.1"; std::optional priority; std::optional portFile; + double bloomFalsePositiveRate = 0.01; CmdServe() { @@ -55,6 +56,13 @@ struct CmdServe : StoreCommand .labels = {"priority"}, .handler = {[this](std::string s) { priority = std::stoi(s); }}, }); + addFlag({ + .longName = "false-positive-rate", + .description = "Target false-positive rate for the bloom filter " + "served at `/bloom-filter` (default: 0.01).", + .labels = {"rate"}, + .handler = {[this](std::string s) { bloomFalsePositiveRate = std::stod(s); }}, + }); } std::string description() override @@ -217,7 +225,8 @@ struct CmdServe : StoreCommand } else if (url == "/bloom-filter") { - auto body = std::make_unique(buildBloomFilter(store.queryAllValidPaths())); + auto body = std::make_unique( + buildBloomFilter(store.queryAllValidPaths(), bloomFalsePositiveRate)); auto etag = "\"" + hashString(HashAlgorithm::SHA512, *body) .to_string(HashFormat::Base16, /*includePrefix=*/false) From 8781bbea41ab32b5d1a7b796ecbb18c0ff67decc Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 12:12:00 +0200 Subject: [PATCH 13/25] Formatting --- src/nix/serve.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/nix/serve.cc b/src/nix/serve.cc index 6241ce139c2c..0cf1d7dc4ea9 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -225,12 +225,11 @@ struct CmdServe : StoreCommand } else if (url == "/bloom-filter") { - auto body = std::make_unique( - buildBloomFilter(store.queryAllValidPaths(), bloomFalsePositiveRate)); - auto etag = "\"" - + hashString(HashAlgorithm::SHA512, *body) - .to_string(HashFormat::Base16, /*includePrefix=*/false) - + "\""; + auto body = + std::make_unique(buildBloomFilter(store.queryAllValidPaths(), bloomFalsePositiveRate)); + auto etag = + "\"" + hashString(HashAlgorithm::SHA512, *body).to_string(HashFormat::Base16, /*includePrefix=*/false) + + "\""; if (auto * inm = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "If-None-Match"); inm && etag == inm) { From 4b4ff7feafa4998a229fe47a78bb8f93eca81c98 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 12:28:59 +0200 Subject: [PATCH 14/25] bloom filter: switch wire integers to u64 and use StringSink/StringSource Replace the hand-rolled byte twiddling in buildBloomFilter with a StringSink and the `sink << n` idiom; replace the matching reader in BinaryCacheStore::isDefinitelyMissing with a StringSource and `source >> ...`. The wire format becomes uniformly little-endian u64 for all integer fields, growing the header from 24 to 32 bytes (negligible vs. the bit array). The Source-based parse also reports out-of-range values via SerialisationError rather than silently truncating. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../protocols/binary-cache-bloom-filter.md | 12 +++--- src/libstore/binary-cache-store.cc | 32 +++++++-------- src/libstore/bloom-filter.cc | 39 +++++++------------ src/nix/generate-bloom-filter.cc | 11 ++---- 4 files changed, 39 insertions(+), 55 deletions(-) diff --git a/doc/manual/source/protocols/binary-cache-bloom-filter.md b/doc/manual/source/protocols/binary-cache-bloom-filter.md index 985a10203566..095790635c60 100644 --- a/doc/manual/source/protocols/binary-cache-bloom-filter.md +++ b/doc/manual/source/protocols/binary-cache-bloom-filter.md @@ -12,17 +12,17 @@ MIME type: `application/octet-stream` ## Format -The response is binary, little-endian, with a fixed 24-byte header followed by the raw bit array: +The response is binary, little-endian, with a fixed 32-byte header followed by the raw bit array: | Offset | Size | Field | Description | |-------:|-----------:|-----------|----------------------------------------------------------| | 0 | 8 | `magic` | ASCII bytes `NixBloom` (no terminating NUL). | -| 8 | 4 | `version` | `uint32` format version. Currently `1`. | -| 12 | 4 | `k` | `uint32` number of hash functions. | -| 16 | 8 | `m` | `uint64` size of the bit array, in bits. Multiple of 8. | -| 24 | `m / 8` | `bits` | The bit array. Bit at position `p` is `bits[p / 8] >> (p % 8)` masked with `1`. | +| 8 | 8 | `version` | `uint64` format version. Currently `1`. | +| 16 | 8 | `k` | `uint64` number of hash functions. | +| 24 | 8 | `m` | `uint64` size of the bit array, in bits. Multiple of 8. | +| 32 | `m / 8` | `bits` | The bit array. Bit at position `p` is `bits[p / 8] >> (p % 8)` masked with `1`. | -The total response size is `24 + m / 8` bytes. +The total response size is `32 + m / 8` bytes. ## Membership test diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 7a3174d8d3cd..61f5b2391512 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -184,32 +184,30 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept return false; } else { const auto & body = *res.data; - if (body.size() < 24 || std::memcmp(body.data(), "NixBloom", 8) != 0) { + constexpr size_t headerLen = 8 + 8 + 8 + 8; + if (body.size() < headerLen || std::memcmp(body.data(), "NixBloom", 8) != 0) { warn("bloom filter from cache '%s' has invalid magic; disabling", uri); bloomState.lock()->status = BloomState::Disabled; return false; } - auto readU32 = [&](size_t off) { - return uint32_t((unsigned char) body[off]) | (uint32_t((unsigned char) body[off + 1]) << 8) - | (uint32_t((unsigned char) body[off + 2]) << 16) - | (uint32_t((unsigned char) body[off + 3]) << 24); - }; - auto readU64 = [&](size_t off) { - uint64_t v = 0; - for (int i = 0; i < 8; ++i) - v |= uint64_t((unsigned char) body[off + i]) << (8 * i); - return v; - }; - uint32_t version = readU32(8); - uint32_t k = readU32(12); - uint64_t mBits = readU64(16); - if (version != 1 || mBits == 0 || mBits % 8 != 0 || body.size() != 24 + mBits / 8) { + StringSource source(std::string_view(body).substr(8)); + uint64_t version; + uint32_t k; + uint64_t mBits; + try { + source >> version >> k >> mBits; + } catch (SerialisationError &) { + warn("bloom filter from cache '%s' has invalid header; disabling", uri); + bloomState.lock()->status = BloomState::Disabled; + return false; + } + if (version != 1 || mBits == 0 || mBits % 8 != 0 || body.size() != headerLen + mBits / 8) { warn("bloom filter from cache '%s' has invalid header; disabling", uri); bloomState.lock()->status = BloomState::Disabled; return false; } std::span bits( - reinterpret_cast(body.data() + 24), (size_t) (mBits / 8)); + reinterpret_cast(body.data() + headerLen), (size_t) (mBits / 8)); diskCache->upsertBloomFilter(uri, res.etag, k, mBits, bits); meta = {.k = k, .mBits = mBits, .etag = res.etag, .timestamp = time(nullptr)}; } diff --git a/src/libstore/bloom-filter.cc b/src/libstore/bloom-filter.cc index 64f35be341f0..4c2700be8e61 100644 --- a/src/libstore/bloom-filter.cc +++ b/src/libstore/bloom-filter.cc @@ -1,7 +1,7 @@ #include "nix/store/bloom-filter.hh" +#include "nix/util/serialise.hh" #include -#include namespace nix { @@ -9,12 +9,9 @@ std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRat { size_t n = paths.size(); - uint64_t mBits; - uint32_t k; - if (n == 0) { - mBits = 8; - k = 1; - } else { + uint64_t mBits = 8; + uint32_t k = 1; + if (n) { constexpr double ln2 = 0.6931471805599453; double mF = -double(n) * std::log(falsePositiveRate) / (ln2 * ln2); mBits = ((uint64_t(std::ceil(mF)) + 7) / 8) * 8; @@ -22,28 +19,22 @@ std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRat k = uint32_t(std::max(1, kL)); } - const size_t headerLen = 8 + 4 + 4 + 8; - std::string out(headerLen + mBits / 8, '\0'); + constexpr size_t headerLen = 8 + 8 + 8 + 8; + StringSink sink(headerLen + mBits / 8); - std::memcpy(out.data(), "NixBloom", 8); - auto writeU32 = [&](size_t off, uint32_t v) { - for (int i = 0; i < 4; ++i) - out[off + i] = char((v >> (8 * i)) & 0xff); - }; - auto writeU64 = [&](size_t off, uint64_t v) { - for (int i = 0; i < 8; ++i) - out[off + i] = char((v >> (8 * i)) & 0xff); - }; - writeU32(8, 1); - writeU32(12, k); - writeU64(16, mBits); - - char * bits = out.data() + headerLen; + using namespace std::string_view_literals; + sink("NixBloom"sv); + sink << 1; // version + sink << k; + sink << mBits; + assert(sink.s.size() == headerLen); + sink.s.resize(headerLen + mBits / 8); + char * bits = sink.s.data() + headerLen; for (auto & path : paths) forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); - return out; + return std::move(sink.s); } } // namespace nix diff --git a/src/nix/generate-bloom-filter.cc b/src/nix/generate-bloom-filter.cc index 1e6fcdc1527a..2e5c1507464b 100644 --- a/src/nix/generate-bloom-filter.cc +++ b/src/nix/generate-bloom-filter.cc @@ -74,20 +74,15 @@ struct CmdGenerateBloomFilter : StoreCommand #if 0 /* Self-check the empirical false-positive rate by probing the just-built filter with 10 000 random store paths. */ - auto readU32 = [&](size_t off) { - return uint32_t((unsigned char) blob[off]) | (uint32_t((unsigned char) blob[off + 1]) << 8) - | (uint32_t((unsigned char) blob[off + 2]) << 16) - | (uint32_t((unsigned char) blob[off + 3]) << 24); - }; auto readU64 = [&](size_t off) { uint64_t v = 0; for (int i = 0; i < 8; ++i) v |= uint64_t((unsigned char) blob[off + i]) << (8 * i); return v; }; - uint32_t k = readU32(12); - uint64_t mBits = readU64(16); - const char * bits = blob.data() + 24; + uint32_t k = uint32_t(readU64(16)); + uint64_t mBits = readU64(24); + const char * bits = blob.data() + 32; constexpr size_t numSamples = 1000000; size_t falsePositives = 0; From 55577fa9fb3803b16fe2a7256cbc853f7d828453 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sun, 7 Jun 2026 15:18:25 +0200 Subject: [PATCH 15/25] Move ConditionalGetResult --- src/libstore/binary-cache-store.cc | 2 +- src/libstore/http-binary-cache-store.cc | 2 +- .../include/nix/store/binary-cache-store.hh | 32 +++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 61f5b2391512..2283217d33b9 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -80,7 +80,7 @@ void BinaryCacheStore::init() } } -ConditionalGetResult +BinaryCacheStore::ConditionalGetResult BinaryCacheStore::getFileConditional(const std::string & path, const std::string & /*expectedETag*/) { /* Default: no ETag support; just do an ordinary fetch. */ diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index ae180c63fa98..8322b26f4ab7 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -263,7 +263,7 @@ void HttpBinaryCacheStore::getFile(const std::string & path, Callback data; - - /** ETag returned by the server. Empty if no ETag was sent. */ - std::string etag; - - /** True if the server replied 304 Not Modified to our If-None-Match. */ - bool notModified = false; -}; - /** * @note subclasses must implement at least one of the two * virtual getFile() methods. @@ -191,6 +175,22 @@ public: std::optional getFile(const std::string & path); + /** + * Result of a conditional HTTP-style GET. Returned by + * `BinaryCacheStore::getFileConditional`. + */ + struct ConditionalGetResult + { + /** Response body. Empty if `notModified`. `nullopt` if the file does not exist (404). */ + std::optional data; + + /** ETag returned by the server. Empty if no ETag was sent. */ + std::string etag; + + /** True if the server replied 304 Not Modified to our If-None-Match. */ + bool notModified = false; + }; + /** * Fetch a file with an HTTP-style conditional GET. The default * implementation just forwards to `getFile()` (no ETag support). From 4f410291ba5520b6fe653bf03aad448e79ec7243 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 11:44:34 +0200 Subject: [PATCH 16/25] bloom filter -> Bloom filter --- .../protocols/binary-cache-bloom-filter.md | 2 +- src/libstore/binary-cache-store.cc | 20 +++++++++---------- .../include/nix/store/binary-cache-store.hh | 6 +++--- .../include/nix/store/bloom-filter.hh | 2 +- .../include/nix/store/nar-info-disk-cache.hh | 10 +++++----- src/libstore/nar-info-disk-cache.cc | 2 +- src/nix/generate-bloom-filter.cc | 6 +++--- src/nix/serve.cc | 2 +- tests/functional/binary-cache.sh | 6 +++--- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/doc/manual/source/protocols/binary-cache-bloom-filter.md b/doc/manual/source/protocols/binary-cache-bloom-filter.md index 095790635c60..b27265c16bd2 100644 --- a/doc/manual/source/protocols/binary-cache-bloom-filter.md +++ b/doc/manual/source/protocols/binary-cache-bloom-filter.md @@ -61,7 +61,7 @@ The choice of `p` is server-defined and not advertised separately: a client can ## Caching -The bloom filter changes whenever the cache's path set changes. +The Bloom filter changes whenever the cache's path set changes. Clients should refetch periodically; an HTTP cache lifetime on the order of minutes-to-hours is typically appropriate. ## Example diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 2283217d33b9..e190225b39be 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -108,7 +108,7 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept auto positions = bloomBitPositions(storePath, k, mBits); bool definitelyMissing = !diskCache->probeBloomFilter(uri, positions); if (definitelyMissing) - debug("bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); + debug("Bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); return definitelyMissing; }; @@ -128,7 +128,7 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept auto lockFile = lockDir / hashString(HashAlgorithm::SHA256, uri).to_string(HashFormat::Base16, /*includePrefix=*/false); PathLocks fetchLock( - {lockFile.string()}, fmt("waiting for another Nix process to fetch bloom filter for '%s'...", uri)); + {lockFile.string()}, fmt("waiting for another Nix process to fetch Bloom filter for '%s'...", uri)); /* Check disk cache while holding the lock: another process may have just refreshed it. */ @@ -149,7 +149,7 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept const auto & url = *bloomFilterUrl; if (hasPrefix(url, "http://") || hasPrefix(url, "https://")) { warn( - "bloom filter at absolute URL '%s' is not yet supported; disabling bloom filter for cache '%s'", + "Bloom filter at absolute URL '%s' is not yet supported; disabling Bloom filter for cache '%s'", url, uri); bloomState.lock()->status = BloomState::Disabled; @@ -163,30 +163,30 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept try { res = getFileConditional(path, expectedETag); } catch (Error & e) { - warn("failed to fetch bloom filter from cache '%s': %s; disabling for this process", uri, e.message()); + warn("failed to fetch Bloom filter from cache '%s': %s; disabling for this process", uri, e.message()); bloomState.lock()->status = BloomState::Disabled; return false; } if (res.notModified) { - debug("bloom filter for '%s' unchanged (304 Not Modified)", uri); + debug("Bloom filter for '%s' unchanged (304 Not Modified)", uri); diskCache->touchBloomFilter(uri, res.etag.empty() ? expectedETag : res.etag); auto m = diskCache->lookupBloomFilter(uri); if (!m) { - warn("bloom filter cache row missing after 304 for '%s'; disabling", uri); + warn("Bloom filter cache row missing after 304 for '%s'; disabling", uri); bloomState.lock()->status = BloomState::Disabled; return false; } meta = *m; } else if (!res.data) { - warn("bloom filter at '%s' returned 404; disabling for this process", uri); + warn("Bloom filter at '%s' returned 404; disabling for this process", uri); bloomState.lock()->status = BloomState::Disabled; return false; } else { const auto & body = *res.data; constexpr size_t headerLen = 8 + 8 + 8 + 8; if (body.size() < headerLen || std::memcmp(body.data(), "NixBloom", 8) != 0) { - warn("bloom filter from cache '%s' has invalid magic; disabling", uri); + warn("Bloom filter from cache '%s' has invalid magic; disabling", uri); bloomState.lock()->status = BloomState::Disabled; return false; } @@ -197,12 +197,12 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept try { source >> version >> k >> mBits; } catch (SerialisationError &) { - warn("bloom filter from cache '%s' has invalid header; disabling", uri); + warn("Bloom filter from cache '%s' has invalid header; disabling", uri); bloomState.lock()->status = BloomState::Disabled; return false; } if (version != 1 || mBits == 0 || mBits % 8 != 0 || body.size() != headerLen + mBits / 8) { - warn("bloom filter from cache '%s' has invalid header; disabling", uri); + warn("Bloom filter from cache '%s' has invalid header; disabling", uri); bloomState.lock()->status = BloomState::Disabled; return false; } diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index d22350a9b313..245f32d651a5 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -86,10 +86,10 @@ struct alignas(8) /* Work around ASAN failures on i686-linux. */ Config & config; /** - * URL of the bloom filter advertised by this cache (from the + * URL of the Bloom filter advertised by this cache (from the * `BloomFilter:` field in `nix-cache-info`), as written by the server. * Absolute URL or path relative to the cache root. `nullopt` if the - * cache doesn't advertise a bloom filter. Populated by `init()` on + * cache doesn't advertise a Bloom filter. Populated by `init()` on * the cold path or restored from the disk-cache by subclasses on the * warm path. */ @@ -205,7 +205,7 @@ public: /** * Return true if this cache definitely does not contain `storePath`. - * Consults the bloom filter advertised by the cache; lazily fetches + * Consults the Bloom filter advertised by the cache; lazily fetches * and caches the filter on first call. Returns false in every other * case (no filter advertised, filter disabled after a failure, * filter says "possibly present"). Never throws. diff --git a/src/libstore/include/nix/store/bloom-filter.hh b/src/libstore/include/nix/store/bloom-filter.hh index 1f01e555f41b..9f9f26ad33ff 100644 --- a/src/libstore/include/nix/store/bloom-filter.hh +++ b/src/libstore/include/nix/store/bloom-filter.hh @@ -20,7 +20,7 @@ std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRat /** * Invoke `f(uint64_t pos)` for each of the `k` bit positions in an - * `mBits`-sized bloom filter that correspond to `path`. + * `mBits`-sized Bloom filter that correspond to `path`. * * Kirsch-Mitzenmacher double hashing over the 160 bits of the path's * `hashPart`; intermediate arithmetic wraps modulo 2^64 before the diff --git a/src/libstore/include/nix/store/nar-info-disk-cache.hh b/src/libstore/include/nix/store/nar-info-disk-cache.hh index a2c28377bf8b..4b9e93b157b1 100644 --- a/src/libstore/include/nix/store/nar-info-disk-cache.hh +++ b/src/libstore/include/nix/store/nar-info-disk-cache.hh @@ -63,13 +63,13 @@ struct NarInfoDiskCache }; /** - * Return the metadata for a cached bloom filter, or nullopt if none is cached. + * Return the metadata for a cached Bloom filter, or nullopt if none is cached. * Does not check the TTL; the caller decides whether to refresh. */ virtual std::optional lookupBloomFilter(const std::string & uri) = 0; /** - * Store a freshly fetched bloom filter blob (just the bit array, no header). + * Store a freshly fetched Bloom filter blob (just the bit array, no header). */ virtual void upsertBloomFilter( const std::string & uri, @@ -79,15 +79,15 @@ struct NarInfoDiskCache std::span bits) = 0; /** - * Refresh the timestamp (and optionally the etag) of an existing bloom filter + * Refresh the timestamp (and optionally the etag) of an existing Bloom filter * after a successful conditional GET returned 304 Not Modified. */ virtual void touchBloomFilter(const std::string & uri, const std::string & etag) = 0; /** - * Probe `bitPositions` against the cached bloom filter via random-access + * Probe `bitPositions` against the cached Bloom filter via random-access * blob reads. Returns true if every position has its bit set (i.e. the - * bloom filter says "possibly present"), false otherwise (definitely + * Bloom filter says "possibly present"), false otherwise (definitely * not present, OR no filter is cached). */ virtual bool probeBloomFilter(const std::string & uri, std::span bitPositions) = 0; diff --git a/src/libstore/nar-info-disk-cache.cc b/src/libstore/nar-info-disk-cache.cc index c736e2c998ac..152929e6d8b7 100644 --- a/src/libstore/nar-info-disk-cache.cc +++ b/src/libstore/nar-info-disk-cache.cc @@ -22,7 +22,7 @@ create table if not exists BinaryCaches ( storeDir text not null, wantMassQuery integer not null, priority integer not null, - bloomFilterUrl text -- NULL if the cache doesn't advertise a bloom filter + bloomFilterUrl text -- NULL if the cache doesn't advertise a Bloom filter ); create table if not exists BloomFilters ( diff --git a/src/nix/generate-bloom-filter.cc b/src/nix/generate-bloom-filter.cc index 2e5c1507464b..17bd27581718 100644 --- a/src/nix/generate-bloom-filter.cc +++ b/src/nix/generate-bloom-filter.cc @@ -33,7 +33,7 @@ struct CmdGenerateBloomFilter : StoreCommand std::string description() override { - return "build a bloom filter from the store's valid paths"; + return "build a Bloom filter from the store's valid paths"; } Category category() override @@ -45,7 +45,7 @@ struct CmdGenerateBloomFilter : StoreCommand { auto fd = getStandardOutput(); if (isatty(fd)) - throw UsageError("refusing to write bloom filter to a terminal"); + throw UsageError("refusing to write Bloom filter to a terminal"); StorePathSet paths; if (fromFile) { @@ -66,7 +66,7 @@ struct CmdGenerateBloomFilter : StoreCommand sink.flush(); notice( - "Wrote bloom filter (%d bytes) for %d store paths (%f false positive rate).", + "Wrote Bloom filter (%d bytes) for %d store paths (%f false positive rate).", blob.size(), paths.size(), falsePositiveRate); diff --git a/src/nix/serve.cc b/src/nix/serve.cc index 0cf1d7dc4ea9..b97463aac3b6 100644 --- a/src/nix/serve.cc +++ b/src/nix/serve.cc @@ -58,7 +58,7 @@ struct CmdServe : StoreCommand }); addFlag({ .longName = "false-positive-rate", - .description = "Target false-positive rate for the bloom filter " + .description = "Target false-positive rate for the Bloom filter " "served at `/bloom-filter` (default: 0.01).", .labels = {"rate"}, .handler = {[this](std::string s) { bloomFalsePositiveRate = std::stod(s); }}, diff --git a/tests/functional/binary-cache.sh b/tests/functional/binary-cache.sh index 1f967e2536bf..b8bb38c0088d 100755 --- a/tests/functional/binary-cache.sh +++ b/tests/functional/binary-cache.sh @@ -159,7 +159,7 @@ ruledOut=0 for n in $(seq 0 100); do fake="$NIX_STORE_DIR/00000000000000000000000000$(printf '%06d' "$n")-fake-not-in-cache" nix path-info --debug --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log" || true - if grep -q "bloom filter for.*ruled out.*$fake" "$TEST_ROOT/bloom-log"; then + if grep -q "Bloom filter for.*ruled out.*$fake" "$TEST_ROOT/bloom-log"; then ruledOut=1 break fi @@ -167,7 +167,7 @@ done [[ $ruledOut -eq 1 ]] -# The bloom filter should have been fetched exactly once across all the +# The Bloom filter should have been fetched exactly once across all the # loop iterations, proving the disk-cache reuse path works. [[ $(grep -c "url=/bloom-filter" "$nixServeLog") -eq 1 ]] @@ -180,7 +180,7 @@ nix path-info --debug --refresh --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ # One additional /bloom-filter request was made. [[ $(grep -c "url=/bloom-filter" "$nixServeLog") -eq $((prev + 1)) ]] # And the client logged the 304 Not Modified branch. -grepQuiet "bloom filter for.*unchanged.*304 Not Modified" "$TEST_ROOT/bloom-log3" +grepQuiet "Bloom filter for.*unchanged.*304 Not Modified" "$TEST_ROOT/bloom-log3" # Test that multiple concurrent substitutions do only one download. From 38d11a1f85d4c820c3b4581b8351c66775708514 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 11:52:03 +0200 Subject: [PATCH 17/25] bloom filter: validate falsePositiveRate and floor mBits at 8 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/libstore/bloom-filter.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/libstore/bloom-filter.cc b/src/libstore/bloom-filter.cc index 4c2700be8e61..95b0f85c38e3 100644 --- a/src/libstore/bloom-filter.cc +++ b/src/libstore/bloom-filter.cc @@ -7,6 +7,10 @@ namespace nix { std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate) { + /* Rejects NaN as well, because all comparisons with NaN are false. */ + if (!(falsePositiveRate > 0 && falsePositiveRate < 1)) + throw Error("Bloom filter false positive rate must be between 0 and 1, got %f", falsePositiveRate); + size_t n = paths.size(); uint64_t mBits = 8; @@ -14,7 +18,9 @@ std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRat if (n) { constexpr double ln2 = 0.6931471805599453; double mF = -double(n) * std::log(falsePositiveRate) / (ln2 * ln2); - mBits = ((uint64_t(std::ceil(mF)) + 7) / 8) * 8; + /* `falsePositiveRate` very close to 1 makes `mF` round down to zero; + keep the floor of 8 bits so we never modulo by zero later. */ + mBits = std::max(8, ((uint64_t(std::ceil(mF)) + 7) / 8) * 8); long kL = std::lround((double(mBits) / double(n)) * ln2); k = uint32_t(std::max(1, kL)); } From 1e5fe74275579362cfc220984fb53f1a87029c88 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 12:07:19 +0200 Subject: [PATCH 18/25] bloom filter: support absolute BloomFilter URLs This is already handled correctly in HttpBinaryCacheStore::makeRequest(), so we can just drop the check. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/libstore/binary-cache-store.cc | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index e190225b39be..26187282a90d 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -146,22 +146,13 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept } if (!haveMeta) { - const auto & url = *bloomFilterUrl; - if (hasPrefix(url, "http://") || hasPrefix(url, "https://")) { - warn( - "Bloom filter at absolute URL '%s' is not yet supported; disabling Bloom filter for cache '%s'", - url, - uri); - bloomState.lock()->status = BloomState::Disabled; - return false; - } - std::string path = url; - while (!path.empty() && path[0] == '/') - path.erase(0, 1); - + /* `*bloomFilterUrl` can be a full (absolute) URL or a path + relative to the cache root; either way the resolution is + done by `getFile()` / `makeRequest()`, the same as for NAR + URLs in `.narinfo` files. */ ConditionalGetResult res; try { - res = getFileConditional(path, expectedETag); + res = getFileConditional(*bloomFilterUrl, expectedETag); } catch (Error & e) { warn("failed to fetch Bloom filter from cache '%s': %s; disabling for this process", uri, e.message()); bloomState.lock()->status = BloomState::Disabled; From 3f8dc5714ee52feed1775a2032ae0b678b68dc42 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 14:20:36 +0200 Subject: [PATCH 19/25] bloom filter: fix doc comments Correct the header size (32 bytes, not 24) in the buildBloomFilter docstring. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/libstore/include/nix/store/bloom-filter.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/include/nix/store/bloom-filter.hh b/src/libstore/include/nix/store/bloom-filter.hh index 9f9f26ad33ff..b49af3060902 100644 --- a/src/libstore/include/nix/store/bloom-filter.hh +++ b/src/libstore/include/nix/store/bloom-filter.hh @@ -12,7 +12,7 @@ namespace nix { /** - * Build a bloom-filter blob (24-byte header + raw bit array, see + * Build a bloom-filter blob (32-byte header + raw bit array, see * `doc/manual/source/protocols/binary-cache-bloom-filter.md`) from a * set of store paths. */ From bf3cec822c8c12f8034d00d7f4e57f1ab2e1f5ef Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 16:08:39 +0200 Subject: [PATCH 20/25] bloom filter: drop BloomState; store raw blob; combine lookup+probe The client used to cache the filter's k/mBits in an in-process BloomState and compute bit positions from those cached values while probing the current sqlite blob. After another process refreshed the filter (possibly with different parameters) the cached k/mBits drifted from the probed blob, risking false negatives. Now: - The disk cache stores the raw filter body (header + bit array) in a single `blob` column; the k/mBits/bits columns are gone. - A combined probeBloomFilter(uri, StorePath) -> optional reads the header (for k/mBits) and the probed bits from the same blob in one locked transaction, so they can't drift. nullopt means "no fresh filter; (re)fetch". getBloomFilterETag replaces lookupBloomFilter for the conditional-GET etag. - Freshness compares the row timestamp against a fixed per-process startTime (>=), so a filter we just (re)fetched stays fresh for the rest of the run instead of being re-fetched on every query (which a moving clock caused under --refresh). - BloomState is reduced to a maybeDisable()-style per-process cooldown that suppresses Bloom use after a failed fetch; it no longer caches any filter parameters. - Header parsing is factored into parseBloomFilterHeader (+ the bloomFilterHeaderLen constant) in bloom-filter.{hh,cc}, shared by the builder, the fetch-time validator, and the probe. The binary-cache functional test gets a 1s sleep before the --refresh step so the cached entry deterministically predates it (freshness is at 1-second resolution). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/libstore/binary-cache-store.cc | 179 ++++++++---------- src/libstore/bloom-filter.cc | 33 +++- .../include/nix/store/binary-cache-store.hh | 26 ++- .../include/nix/store/bloom-filter.hh | 32 +++- .../include/nix/store/nar-info-disk-cache.hh | 42 ++-- src/libstore/nar-info-disk-cache.cc | 121 ++++++------ tests/functional/binary-cache.sh | 3 + 7 files changed, 240 insertions(+), 196 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 26187282a90d..3fc16b04abf8 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -88,12 +88,55 @@ BinaryCacheStore::getFileConditional(const std::string & path, const std::string return ConditionalGetResult{.data = std::move(data), .etag = "", .notModified = false}; } -static std::vector bloomBitPositions(const StorePath & path, uint32_t k, uint64_t mBits) +void BinaryCacheStore::maybeDisableBloomFilter(std::string_view uri) +{ + auto state(bloomState.lock()); + if (state->enabled) { + int t = 60; + debug("disabling Bloom filter for cache '%s' for %d seconds", uri, t); + state->enabled = false; + state->disabledUntil = std::chrono::steady_clock::now() + std::chrono::seconds(t); + } +} + +bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) { - std::vector out; - out.reserve(k); - forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { out.push_back(pos); }); - return out; + auto expectedETag = diskCache->getBloomFilterETag(uri).value_or(""); + + /* `*bloomFilterUrl` can be a full (absolute) URL or a path relative to + the cache root; either way the resolution is done by `getFile()` / + `makeRequest()`, the same as for NAR URLs in `.narinfo` files. */ + ConditionalGetResult res; + try { + res = getFileConditional(*bloomFilterUrl, expectedETag); + } catch (Error & e) { + warn("failed to fetch Bloom filter from cache '%s': %s; disabling for now", uri, e.message()); + maybeDisableBloomFilter(uri); + return false; + } + + if (res.notModified) { + debug("Bloom filter for '%s' unchanged (304 Not Modified)", uri); + diskCache->touchBloomFilter(uri, res.etag.empty() ? expectedETag : res.etag); + return true; + } + + if (!res.data) { + warn("Bloom filter at '%s' returned 404; disabling for now", uri); + maybeDisableBloomFilter(uri); + return false; + } + + const auto & body = *res.data; + auto params = parseBloomFilterHeader(body); + if (!params || body.size() != bloomFilterHeaderLen + params->mBits / 8) { + warn("Bloom filter from cache '%s' is malformed; disabling for now", uri); + maybeDisableBloomFilter(uri); + return false; + } + + diskCache->upsertBloomFilter(uri, res.etag, {reinterpret_cast(body.data()), body.size()}); + return true; } bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept @@ -104,114 +147,44 @@ bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept const auto uri = config.getReference().render(/*withParams=*/false); - auto probe = [&](uint32_t k, uint64_t mBits) { - auto positions = bloomBitPositions(storePath, k, mBits); - bool definitelyMissing = !diskCache->probeBloomFilter(uri, positions); - if (definitelyMissing) - debug("Bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); - return definitelyMissing; - }; - - /* Fast path: filter already loaded or known disabled. */ + /* Per-process cooldown after a failed fetch, so an unavailable filter + doesn't cause a fetch on every query. */ { auto state(bloomState.lock()); - if (state->status == BloomState::Disabled) - return false; - if (state->status == BloomState::Ready) - return probe(state->k, state->mBits); - } - - /* Slow path: acquire a cross-process file lock so concurrent first-probers - don't race on the network. */ - auto lockDir = getCacheDir() / "bloom-filter-locks"; - std::filesystem::create_directories(lockDir); - auto lockFile = - lockDir / hashString(HashAlgorithm::SHA256, uri).to_string(HashFormat::Base16, /*includePrefix=*/false); - PathLocks fetchLock( - {lockFile.string()}, fmt("waiting for another Nix process to fetch Bloom filter for '%s'...", uri)); - - /* Check disk cache while holding the lock: another process may have - just refreshed it. */ - NarInfoDiskCache::BloomFilterMeta meta; - bool haveMeta = false; - std::string expectedETag; - if (auto m = diskCache->lookupBloomFilter(uri)) { - auto ttl = (time_t) settings.getNarInfoDiskCacheSettings().ttlNegative.get(); - if (time(nullptr) - m->timestamp < ttl) { - meta = *m; - haveMeta = true; - } else { - expectedETag = m->etag; + if (!state->enabled) { + if (std::chrono::steady_clock::now() < state->disabledUntil) + return false; + state->enabled = true; // cooldown elapsed; try again } } - if (!haveMeta) { - /* `*bloomFilterUrl` can be a full (absolute) URL or a path - relative to the cache root; either way the resolution is - done by `getFile()` / `makeRequest()`, the same as for NAR - URLs in `.narinfo` files. */ - ConditionalGetResult res; - try { - res = getFileConditional(*bloomFilterUrl, expectedETag); - } catch (Error & e) { - warn("failed to fetch Bloom filter from cache '%s': %s; disabling for this process", uri, e.message()); - bloomState.lock()->status = BloomState::Disabled; - return false; - } - - if (res.notModified) { - debug("Bloom filter for '%s' unchanged (304 Not Modified)", uri); - diskCache->touchBloomFilter(uri, res.etag.empty() ? expectedETag : res.etag); - auto m = diskCache->lookupBloomFilter(uri); - if (!m) { - warn("Bloom filter cache row missing after 304 for '%s'; disabling", uri); - bloomState.lock()->status = BloomState::Disabled; - return false; - } - meta = *m; - } else if (!res.data) { - warn("Bloom filter at '%s' returned 404; disabling for this process", uri); - bloomState.lock()->status = BloomState::Disabled; - return false; - } else { - const auto & body = *res.data; - constexpr size_t headerLen = 8 + 8 + 8 + 8; - if (body.size() < headerLen || std::memcmp(body.data(), "NixBloom", 8) != 0) { - warn("Bloom filter from cache '%s' has invalid magic; disabling", uri); - bloomState.lock()->status = BloomState::Disabled; - return false; - } - StringSource source(std::string_view(body).substr(8)); - uint64_t version; - uint32_t k; - uint64_t mBits; - try { - source >> version >> k >> mBits; - } catch (SerialisationError &) { - warn("Bloom filter from cache '%s' has invalid header; disabling", uri); - bloomState.lock()->status = BloomState::Disabled; + auto r = diskCache->probeBloomFilter(uri, storePath); + + if (!r) { + /* No fresh filter cached. Acquire a cross-process file lock so + concurrent first-probers don't all hit the network, then + re-check and fetch. */ + auto lockDir = getCacheDir() / "bloom-filter-locks"; + std::filesystem::create_directories(lockDir); + auto lockFile = + lockDir / hashString(HashAlgorithm::SHA256, uri).to_string(HashFormat::Base16, /*includePrefix=*/false); + PathLocks fetchLock( + {lockFile.string()}, fmt("waiting for another Nix process to fetch Bloom filter for '%s'...", uri)); + + r = diskCache->probeBloomFilter(uri, storePath); + if (!r) { + if (!fetchBloomFilter(uri)) return false; - } - if (version != 1 || mBits == 0 || mBits % 8 != 0 || body.size() != headerLen + mBits / 8) { - warn("Bloom filter from cache '%s' has invalid header; disabling", uri); - bloomState.lock()->status = BloomState::Disabled; - return false; - } - std::span bits( - reinterpret_cast(body.data() + headerLen), (size_t) (mBits / 8)); - diskCache->upsertBloomFilter(uri, res.etag, k, mBits, bits); - meta = {.k = k, .mBits = mBits, .etag = res.etag, .timestamp = time(nullptr)}; + r = diskCache->probeBloomFilter(uri, storePath); } } - { - auto state(bloomState.lock()); - state->status = BloomState::Ready; - state->k = meta.k; - state->mBits = meta.mBits; - } + if (!r) + return false; - return probe(meta.k, meta.mBits); + if (!*r) + debug("Bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); + return !*r; } catch (...) { ignoreExceptionExceptInterrupt(); return false; diff --git a/src/libstore/bloom-filter.cc b/src/libstore/bloom-filter.cc index 95b0f85c38e3..90b4f1e6e279 100644 --- a/src/libstore/bloom-filter.cc +++ b/src/libstore/bloom-filter.cc @@ -5,6 +5,28 @@ namespace nix { +std::optional parseBloomFilterHeader(std::string_view header) +{ + using namespace std::string_view_literals; + if (header.size() < bloomFilterHeaderLen || header.substr(0, 8) != "NixBloom"sv) + return std::nullopt; + + StringSource source(header.substr(8)); + uint64_t version; + uint32_t k; + uint64_t mBits; + try { + source >> version >> k >> mBits; + } catch (SerialisationError &) { + return std::nullopt; + } + + if (version != 1 || mBits == 0 || mBits % 8 != 0) + return std::nullopt; + + return BloomFilterParams{.k = k, .mBits = mBits}; +} + std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate) { /* Rejects NaN as well, because all comparisons with NaN are false. */ @@ -25,18 +47,17 @@ std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRat k = uint32_t(std::max(1, kL)); } - constexpr size_t headerLen = 8 + 8 + 8 + 8; - StringSink sink(headerLen + mBits / 8); + StringSink sink(bloomFilterHeaderLen + mBits / 8); using namespace std::string_view_literals; sink("NixBloom"sv); - sink << 1; // version + sink << 1; // version sink << k; sink << mBits; - assert(sink.s.size() == headerLen); + assert(sink.s.size() == bloomFilterHeaderLen); - sink.s.resize(headerLen + mBits / 8); - char * bits = sink.s.data() + headerLen; + sink.s.resize(bloomFilterHeaderLen + mBits / 8); + char * bits = sink.s.data() + bloomFilterHeaderLen; for (auto & path : paths) forEachBloomBitPosition(path, k, mBits, [&](uint64_t pos) { bits[pos / 8] |= uint8_t(1) << (pos % 8); }); diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 245f32d651a5..38c3ee86d3c9 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -9,6 +9,7 @@ #include "nix/util/sync.hh" #include +#include namespace nix { @@ -98,17 +99,32 @@ struct alignas(8) /* Work around ASAN failures on i686-linux. */ private: std::vector> signers; + /** + * Per-process cooldown that suppresses Bloom filter use after a failed + * fetch, so we don't re-hit an unavailable filter on every query. Mirrors + * `HttpBinaryCacheStore::maybeDisable()`. + */ struct BloomState { - enum Status { Pending, Ready, Disabled }; - - Status status = Pending; - uint32_t k = 0; - uint64_t mBits = 0; + bool enabled = true; + std::chrono::steady_clock::time_point disabledUntil; }; Sync bloomState; + /** + * Disable the Bloom filter for this cache for a short cooldown after a + * failed fetch. + */ + void maybeDisableBloomFilter(std::string_view uri); + + /** + * Fetch (with a conditional GET), validate, and store the Bloom filter in + * the disk cache. Returns false if the filter is unavailable/invalid (and + * disables it for a cooldown). Caller must hold the fetch lock. + */ + bool fetchBloomFilter(const std::string & uri); + protected: /** diff --git a/src/libstore/include/nix/store/bloom-filter.hh b/src/libstore/include/nix/store/bloom-filter.hh index b49af3060902..4d3251bedc33 100644 --- a/src/libstore/include/nix/store/bloom-filter.hh +++ b/src/libstore/include/nix/store/bloom-filter.hh @@ -7,14 +7,40 @@ #include #include +#include #include +#include namespace nix { /** - * Build a bloom-filter blob (32-byte header + raw bit array, see - * `doc/manual/source/protocols/binary-cache-bloom-filter.md`) from a - * set of store paths. + * Size of the Bloom filter blob header: magic(8) + version(8) + k(8) + mBits(8). + * See `doc/manual/source/protocols/binary-cache-bloom-filter.md`. + */ +constexpr size_t bloomFilterHeaderLen = 8 + 8 + 8 + 8; + +/** + * The parameters of a Bloom filter, as encoded in its header. + */ +struct BloomFilterParams +{ + uint32_t k; + uint64_t mBits; +}; + +/** + * Parse and validate the `bloomFilterHeaderLen`-byte header at the start + * of a Bloom filter blob: magic `NixBloom`, version 1, `mBits != 0` and a + * multiple of 8. Returns `std::nullopt` if the header is too short or + * invalid. Does *not* check that the total body length matches `mBits`; + * the caller does that when it has the whole body. + */ +std::optional parseBloomFilterHeader(std::string_view header); + +/** + * Build a bloom-filter blob (`bloomFilterHeaderLen`-byte header + raw bit + * array, see `doc/manual/source/protocols/binary-cache-bloom-filter.md`) + * from a set of store paths. */ std::string buildBloomFilter(const StorePathSet & paths, double falsePositiveRate); diff --git a/src/libstore/include/nix/store/nar-info-disk-cache.hh b/src/libstore/include/nix/store/nar-info-disk-cache.hh index 4b9e93b157b1..505ec25792c7 100644 --- a/src/libstore/include/nix/store/nar-info-disk-cache.hh +++ b/src/libstore/include/nix/store/nar-info-disk-cache.hh @@ -54,29 +54,26 @@ struct NarInfoDiskCache virtual std::pair> lookupRealisation(const std::string & uri, const DrvOutput & id) = 0; - struct BloomFilterMeta - { - uint32_t k; - uint64_t mBits; - std::string etag; - time_t timestamp; - }; - /** - * Return the metadata for a cached Bloom filter, or nullopt if none is cached. - * Does not check the TTL; the caller decides whether to refresh. + * Probe `path` against the cached Bloom filter for `uri`. + * + * Returns `std::nullopt` if there is no Bloom filter cached for this + * cache, or the cached one is stale (older than the negative TTL) — the + * caller should (re)fetch and try again. Otherwise returns whether the + * filter says the path is *possibly present* (`true`) or *definitely not + * present* (`false`). + * + * The filter parameters (`k`, `mBits`) and the bits are read from the + * same stored blob in a single transaction, so they cannot drift. */ - virtual std::optional lookupBloomFilter(const std::string & uri) = 0; + virtual std::optional probeBloomFilter(const std::string & uri, const StorePath & path) = 0; /** - * Store a freshly fetched Bloom filter blob (just the bit array, no header). + * Store a freshly fetched Bloom filter blob (the full response body: + * header + bit array). */ - virtual void upsertBloomFilter( - const std::string & uri, - const std::string & etag, - uint32_t k, - uint64_t mBits, - std::span bits) = 0; + virtual void + upsertBloomFilter(const std::string & uri, const std::string & etag, std::span blob) = 0; /** * Refresh the timestamp (and optionally the etag) of an existing Bloom filter @@ -85,12 +82,11 @@ struct NarInfoDiskCache virtual void touchBloomFilter(const std::string & uri, const std::string & etag) = 0; /** - * Probe `bitPositions` against the cached Bloom filter via random-access - * blob reads. Returns true if every position has its bit set (i.e. the - * Bloom filter says "possibly present"), false otherwise (definitely - * not present, OR no filter is cached). + * Return the etag of the currently cached Bloom filter for `uri` + * (regardless of its age), or nullopt if none is cached or it has no + * etag. Used to send `If-None-Match` when refetching. */ - virtual bool probeBloomFilter(const std::string & uri, std::span bitPositions) = 0; + virtual std::optional getBloomFilterETag(const std::string & uri) = 0; /** * Return a singleton cache object that can be used concurrently by diff --git a/src/libstore/nar-info-disk-cache.cc b/src/libstore/nar-info-disk-cache.cc index 152929e6d8b7..5f73f8c55e52 100644 --- a/src/libstore/nar-info-disk-cache.cc +++ b/src/libstore/nar-info-disk-cache.cc @@ -1,4 +1,5 @@ #include "nix/store/nar-info-disk-cache.hh" +#include "nix/store/bloom-filter.hh" #include "nix/util/users.hh" #include "nix/util/sync.hh" #include "nix/util/finally.hh" @@ -29,9 +30,7 @@ create table if not exists BloomFilters ( cache integer primary key not null, timestamp integer not null, etag text, - k integer not null, - mBits integer not null, - bits blob not null, + blob blob not null, -- full filter body (header + bit array) foreign key (cache) references BinaryCaches(id) on delete cascade ); @@ -87,8 +86,8 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache { SQLite db; SQLiteStmt insertCache, queryCache, insertNAR, insertMissingNAR, queryNAR, insertRealisation, - insertMissingRealisation, queryRealisation, purgeCache, queryBloomFilter, insertBloomFilter, - touchBloomFilter, queryBloomFilterRowId; + insertMissingRealisation, queryRealisation, purgeCache, queryBloomFilterETag, insertBloomFilter, + touchBloomFilter, queryFreshBloomFilter; std::map caches; }; @@ -118,13 +117,15 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache state->db, "select id, storeDir, wantMassQuery, priority, bloomFilterUrl from BinaryCaches where url = ? and timestamp > ?"); - state->queryBloomFilter.create(state->db, "select timestamp, etag, k, mBits from BloomFilters where cache = ?"); + state->queryBloomFilterETag.create(state->db, "select etag from BloomFilters where cache = ?"); - state->queryBloomFilterRowId.create(state->db, "select rowid from BloomFilters where cache = ?"); + /* `>=` (not `>`) so a filter (re)fetched and stamped at the probe's + reference time still counts as fresh; see `probeBloomFilter`. */ + state->queryFreshBloomFilter.create( + state->db, "select rowid from BloomFilters where cache = ? and timestamp >= ?"); state->insertBloomFilter.create( - state->db, - "insert or replace into BloomFilters(cache, timestamp, etag, k, mBits, bits) values (?, ?, ?, ?, ?, ?)"); + state->db, "insert or replace into BloomFilters(cache, timestamp, etag, blob) values (?, ?, ?, ?)"); state->touchBloomFilter.create(state->db, "update BloomFilters set timestamp = ?, etag = ? where cache = ?"); @@ -426,29 +427,61 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache }); } - std::optional lookupBloomFilter(const std::string & uri) override + std::optional probeBloomFilter(const std::string & uri, const StorePath & path) override { - return retrySQLite>([&]() -> std::optional { + return retrySQLite>([&]() -> std::optional { auto state(_state.lock()); auto & cache(getCache(*state, uri)); - auto q(state->queryBloomFilter.use().apply(cache.info.id)); - if (!q.next()) + + /* Use a fixed reference time (captured at the first probe in + this process) rather than the moving wall clock. Otherwise a + filter we (re)fetched and stamped a moment ago could already + read as "stale" — especially under `--refresh`, which sets + `ttlNegative` to 0 — and we'd re-fetch the shared filter on + every query. With a fixed `startTime`, a filter stamped at or + after `startTime` stays fresh for the rest of the process. */ + static auto startTime = time(nullptr); + + int64_t rowid; + { + auto q(state->queryFreshBloomFilter.use().apply(cache.info.id).apply(startTime - settings.ttlNegative)); + if (!q.next()) + return std::nullopt; // no filter cached, or stale + rowid = q.getInt(0); + } + + sqlite3_blob * blob = nullptr; + if (sqlite3_blob_open(state->db, "main", "BloomFilters", "blob", rowid, /*write=*/0, &blob) != SQLITE_OK) + SQLiteError::throw_(state->db, "opening bloom-filter blob"); + Finally _closeBlob([&] { + if (blob) + sqlite3_blob_close(blob); + }); + + /* Read and parse the header to get the filter parameters. */ + char header[bloomFilterHeaderLen]; + if (sqlite3_blob_bytes(blob) < (int) bloomFilterHeaderLen + || sqlite3_blob_read(blob, header, bloomFilterHeaderLen, 0) != SQLITE_OK) + return std::nullopt; // corrupt; treat as absent so we refetch + auto params = parseBloomFilterHeader({header, bloomFilterHeaderLen}); + if (!params) return std::nullopt; - return BloomFilterMeta{ - .k = (uint32_t) q.getInt(2), - .mBits = (uint64_t) q.getInt(3), - .etag = q.isNull(1) ? std::string{} : q.getStr(1), - .timestamp = (time_t) q.getInt(0), - }; + + bool allSet = true; + forEachBloomBitPosition(path, params->k, params->mBits, [&](uint64_t pos) { + if (!allSet) + return; + unsigned char byte = 0; + if (sqlite3_blob_read(blob, &byte, 1, (int) (bloomFilterHeaderLen + pos / 8)) != SQLITE_OK) + SQLiteError::throw_(state->db, "reading bloom-filter blob"); + if (!((byte >> (pos % 8)) & 1)) + allSet = false; + }); + return allSet; }); } - void upsertBloomFilter( - const std::string & uri, - const std::string & etag, - uint32_t k, - uint64_t mBits, - std::span bits) override + void upsertBloomFilter(const std::string & uri, const std::string & etag, std::span blob) override { retrySQLite([&]() { auto state(_state.lock()); @@ -457,9 +490,7 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache .apply(cache.info.id) .apply(time(nullptr)) .apply(etag, !etag.empty()) - .apply((uint64_t) k) - .apply(mBits) - .apply(reinterpret_cast(bits.data()), bits.size()) + .apply(reinterpret_cast(blob.data()), blob.size()) .exec(); }); } @@ -473,37 +504,15 @@ struct NarInfoDiskCacheImpl : NarInfoDiskCache }); } - bool probeBloomFilter(const std::string & uri, std::span bitPositions) override + std::optional getBloomFilterETag(const std::string & uri) override { - return retrySQLite([&]() -> bool { + return retrySQLite>([&]() -> std::optional { auto state(_state.lock()); auto & cache(getCache(*state, uri)); - - int64_t rowid; - { - auto q(state->queryBloomFilterRowId.use().apply(cache.info.id)); - if (!q.next()) - return false; // no cached filter - rowid = q.getInt(0); - } - - sqlite3_blob * blob = nullptr; - if (sqlite3_blob_open(state->db, "main", "BloomFilters", "bits", rowid, /*write=*/0, &blob) != SQLITE_OK) - SQLiteError::throw_(state->db, "opening bloom-filter blob"); - Finally _closeBlob([&] { - if (blob) - sqlite3_blob_close(blob); - }); - - for (auto pos : bitPositions) { - unsigned char byte = 0; - int rc = sqlite3_blob_read(blob, &byte, 1, (int) (pos / 8)); - if (rc != SQLITE_OK) - SQLiteError::throw_(state->db, "reading bloom-filter blob"); - if (!((byte >> (pos % 8)) & 1)) - return false; - } - return true; + auto q(state->queryBloomFilterETag.use().apply(cache.info.id)); + if (!q.next() || q.isNull(0)) + return std::nullopt; + return q.getStr(0); }); } }; diff --git a/tests/functional/binary-cache.sh b/tests/functional/binary-cache.sh index b8bb38c0088d..e6adaa436b11 100755 --- a/tests/functional/binary-cache.sh +++ b/tests/functional/binary-cache.sh @@ -175,6 +175,9 @@ done # `--refresh` should force the cached filter to be treated as stale; the # client must re-fetch with `If-None-Match` and the server should reply 304 # Not Modified instead of resending the body. +# Sleep so the cached entry is stamped strictly before this --refresh +# process starts (freshness is at 1-second resolution). +sleep 1 prev=$(grep -c "url=/bloom-filter" "$nixServeLog") nix path-info --debug --refresh --store "$httpBinaryCacheUrl" "$fake" 2> "$TEST_ROOT/bloom-log3" || true # One additional /bloom-filter request was made. From 538e10719206b9a3a9ffe70c31fb0c2b1fdb3bbd Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 16:34:16 +0200 Subject: [PATCH 21/25] bloom filter: inline maybeDisableBloomFilter as a local lambda It was only used by fetchBloomFilter; make it a local `disable` lambda there and drop the member function and its declaration. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/libstore/binary-cache-store.cc | 33 +++++++++---------- .../include/nix/store/binary-cache-store.hh | 6 ---- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 3fc16b04abf8..7a72f8e547dd 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -88,19 +88,21 @@ BinaryCacheStore::getFileConditional(const std::string & path, const std::string return ConditionalGetResult{.data = std::move(data), .etag = "", .notModified = false}; } -void BinaryCacheStore::maybeDisableBloomFilter(std::string_view uri) -{ - auto state(bloomState.lock()); - if (state->enabled) { - int t = 60; - debug("disabling Bloom filter for cache '%s' for %d seconds", uri, t); - state->enabled = false; - state->disabledUntil = std::chrono::steady_clock::now() + std::chrono::seconds(t); - } -} - bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) { + /* Disable the Bloom filter for this cache for a short cooldown, so an + unavailable/broken filter doesn't cause a fetch on every query. */ + auto disable = [&] { + auto state(bloomState.lock()); + if (state->enabled) { + int t = 60; + debug("disabling Bloom filter for cache '%s' for %d seconds", uri, t); + state->enabled = false; + state->disabledUntil = std::chrono::steady_clock::now() + std::chrono::seconds(t); + } + return false; + }; + auto expectedETag = diskCache->getBloomFilterETag(uri).value_or(""); /* `*bloomFilterUrl` can be a full (absolute) URL or a path relative to @@ -111,8 +113,7 @@ bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) res = getFileConditional(*bloomFilterUrl, expectedETag); } catch (Error & e) { warn("failed to fetch Bloom filter from cache '%s': %s; disabling for now", uri, e.message()); - maybeDisableBloomFilter(uri); - return false; + return disable(); } if (res.notModified) { @@ -123,16 +124,14 @@ bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) if (!res.data) { warn("Bloom filter at '%s' returned 404; disabling for now", uri); - maybeDisableBloomFilter(uri); - return false; + return disable(); } const auto & body = *res.data; auto params = parseBloomFilterHeader(body); if (!params || body.size() != bloomFilterHeaderLen + params->mBits / 8) { warn("Bloom filter from cache '%s' is malformed; disabling for now", uri); - maybeDisableBloomFilter(uri); - return false; + return disable(); } diskCache->upsertBloomFilter(uri, res.etag, {reinterpret_cast(body.data()), body.size()}); diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 38c3ee86d3c9..9f781e35d63c 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -112,12 +112,6 @@ private: Sync bloomState; - /** - * Disable the Bloom filter for this cache for a short cooldown after a - * failed fetch. - */ - void maybeDisableBloomFilter(std::string_view uri); - /** * Fetch (with a conditional GET), validate, and store the Bloom filter in * the disk cache. Returns false if the filter is unavailable/invalid (and From cbe4c3fbd6d92f02a7c7c138501a38e80cc7f398 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 16:36:30 +0200 Subject: [PATCH 22/25] Drop catch all --- src/libstore/binary-cache-store.cc | 71 ++++++++++++++---------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 7a72f8e547dd..f966bbf31f97 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -140,54 +140,49 @@ bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept { - try { - if (!diskCache || !bloomFilterUrl) - return false; + if (!diskCache || !bloomFilterUrl) + return false; - const auto uri = config.getReference().render(/*withParams=*/false); + const auto uri = config.getReference().render(/*withParams=*/false); - /* Per-process cooldown after a failed fetch, so an unavailable filter - doesn't cause a fetch on every query. */ - { - auto state(bloomState.lock()); - if (!state->enabled) { - if (std::chrono::steady_clock::now() < state->disabledUntil) - return false; - state->enabled = true; // cooldown elapsed; try again - } + /* Per-process cooldown after a failed fetch, so an unavailable filter + doesn't cause a fetch on every query. */ + { + auto state(bloomState.lock()); + if (!state->enabled) { + if (std::chrono::steady_clock::now() < state->disabledUntil) + return false; + state->enabled = true; // cooldown elapsed; try again } + } - auto r = diskCache->probeBloomFilter(uri, storePath); + auto r = diskCache->probeBloomFilter(uri, storePath); - if (!r) { - /* No fresh filter cached. Acquire a cross-process file lock so - concurrent first-probers don't all hit the network, then - re-check and fetch. */ - auto lockDir = getCacheDir() / "bloom-filter-locks"; - std::filesystem::create_directories(lockDir); - auto lockFile = - lockDir / hashString(HashAlgorithm::SHA256, uri).to_string(HashFormat::Base16, /*includePrefix=*/false); - PathLocks fetchLock( - {lockFile.string()}, fmt("waiting for another Nix process to fetch Bloom filter for '%s'...", uri)); + if (!r) { + /* No fresh filter cached. Acquire a cross-process file lock so + concurrent first-probers don't all hit the network, then + re-check and fetch. */ + auto lockDir = getCacheDir() / "bloom-filter-locks"; + std::filesystem::create_directories(lockDir); + auto lockFile = + lockDir / hashString(HashAlgorithm::SHA256, uri).to_string(HashFormat::Base16, /*includePrefix=*/false); + PathLocks fetchLock( + {lockFile.string()}, fmt("waiting for another Nix process to fetch Bloom filter for '%s'...", uri)); + r = diskCache->probeBloomFilter(uri, storePath); + if (!r) { + if (!fetchBloomFilter(uri)) + return false; r = diskCache->probeBloomFilter(uri, storePath); - if (!r) { - if (!fetchBloomFilter(uri)) - return false; - r = diskCache->probeBloomFilter(uri, storePath); - } } + } - if (!r) - return false; - - if (!*r) - debug("Bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); - return !*r; - } catch (...) { - ignoreExceptionExceptInterrupt(); + if (!r) return false; - } + + if (!*r) + debug("Bloom filter for '%s' ruled out '%s'", uri, printStorePath(storePath)); + return !*r; } std::optional BinaryCacheStore::getNixCacheInfo() From c400002a53bdd751c004285507fec8c7fb899a19 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 16:40:16 +0200 Subject: [PATCH 23/25] Drop testing code --- src/nix/generate-bloom-filter.cc | 33 -------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/nix/generate-bloom-filter.cc b/src/nix/generate-bloom-filter.cc index 17bd27581718..813f3c59d898 100644 --- a/src/nix/generate-bloom-filter.cc +++ b/src/nix/generate-bloom-filter.cc @@ -70,39 +70,6 @@ struct CmdGenerateBloomFilter : StoreCommand blob.size(), paths.size(), falsePositiveRate); - -#if 0 - /* Self-check the empirical false-positive rate by probing the - just-built filter with 10 000 random store paths. */ - auto readU64 = [&](size_t off) { - uint64_t v = 0; - for (int i = 0; i < 8; ++i) - v |= uint64_t((unsigned char) blob[off + i]) << (8 * i); - return v; - }; - uint32_t k = uint32_t(readU64(16)); - uint64_t mBits = readU64(24); - const char * bits = blob.data() + 32; - - constexpr size_t numSamples = 1000000; - size_t falsePositives = 0; - for (size_t i = 0; i < numSamples; ++i) { - auto p = StorePath::random("nix-bloom-fpr-probe"); - bool allSet = true; - forEachBloomBitPosition(p, k, mBits, [&](uint64_t pos) { - if (!((uint8_t(bits[pos / 8]) >> (pos % 8)) & 1)) - allSet = false; - }); - if (allSet) - ++falsePositives; - } - notice( - "Empirical false-positive rate over %d random probes: %d (%f, target %f).", - numSamples, - falsePositives, - double(falsePositives) / double(numSamples), - falsePositiveRate); -#endif } }; From 063501bb9841d59de03eb8bbd577ac4481efb5fd Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 8 Jun 2026 17:02:15 +0200 Subject: [PATCH 24/25] binary cache: add use-bloom-filter setting (default true) Lets a client opt out of consulting a cache's advertised Bloom filter. isDefinitelyMissing bails out early when it's false. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/libstore/binary-cache-store.cc | 2 +- src/libstore/include/nix/store/binary-cache-store.hh | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index f966bbf31f97..c9aea0cba86b 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -140,7 +140,7 @@ bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept { - if (!diskCache || !bloomFilterUrl) + if (!diskCache || !bloomFilterUrl || !config.useBloomFilter) return false; const auto uri = config.getReference().render(/*withParams=*/false); diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 9f781e35d63c..16547fbdb70a 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -68,6 +68,17 @@ struct BinaryCacheStoreConfig : virtual StoreConfig The meaning and accepted values depend on the compression method selected. `-1` specifies that the default compression level should be used. )"}; + + Setting useBloomFilter{ + this, + true, + "use-bloom-filter", + R"( + Whether to use the Bloom filter advertised by this binary cache (if + any) to avoid querying `.narinfo` files for store paths that are + definitely not in the cache. Set to `false` to disable this + optimization. + )"}; }; /** From cfce9450e01dda60749f2bac7e18f368ca9e1f57 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 10 Jun 2026 15:59:16 +0200 Subject: [PATCH 25/25] Drop noexcept --- src/libstore/binary-cache-store.cc | 2 +- src/libstore/include/nix/store/binary-cache-store.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index c9aea0cba86b..7917a8604a79 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -138,7 +138,7 @@ bool BinaryCacheStore::fetchBloomFilter(const std::string & uri) return true; } -bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) noexcept +bool BinaryCacheStore::isDefinitelyMissing(const StorePath & storePath) { if (!diskCache || !bloomFilterUrl || !config.useBloomFilter) return false; diff --git a/src/libstore/include/nix/store/binary-cache-store.hh b/src/libstore/include/nix/store/binary-cache-store.hh index 16547fbdb70a..da22d58f549f 100644 --- a/src/libstore/include/nix/store/binary-cache-store.hh +++ b/src/libstore/include/nix/store/binary-cache-store.hh @@ -231,7 +231,7 @@ public: * case (no filter advertised, filter disabled after a failure, * filter says "possibly present"). Never throws. */ - bool isDefinitelyMissing(const StorePath & storePath) noexcept; + bool isDefinitelyMissing(const StorePath & storePath); private: