diff --git a/diskann-bftree/src/neighbors.rs b/diskann-bftree/src/neighbors.rs index 117e9c31b..c0dc9ef2c 100644 --- a/diskann-bftree/src/neighbors.rs +++ b/diskann-bftree/src/neighbors.rs @@ -83,6 +83,11 @@ impl NeighborProvider { /// Retrieve the neighbor list of a vector. /// /// Does not acquire any lock. Callers must ensure appropriate synchronization. + /// + /// `neighbors` is cleared first upon each invocation. A vector with no + /// stored neighbor list yet (bf-tree `NotFound`) yields an empty list rather + /// than an error, so neighbor lists are created lazily on first write. + /// One data copy is involved which copies the data from bf-tree to `neighbors` pub fn get_neighbors(&self, vector_id: I, neighbors: &mut AdjacencyList) -> ANNResult<()> { #[cfg(test)] self.num_get_calls.increment(); @@ -149,9 +154,15 @@ impl NeighborProvider { )); } bf_tree::LeafReadResult::NotFound => { - return Err(ANNError::log_index_error( - "The bf-tree entry for the vector key is marked as not found", - )); + // The vertex has no stored neighbor list yet. bf-tree natively + // distinguishes "absent" from "present but empty", so treat + // absence as an empty adjacency list rather than an error. This + // lets neighbor lists be created lazily on first write, avoiding + // an O(max_points) eager initialization at construction. + // + // `guard` is dropped at the end of this match without `finish`, + // which clears `neighbors` to the empty list (identical to the + // `Found(0)` case above). } }; @@ -456,6 +467,70 @@ mod tests { assert!(neighbor_provider.get_neighbors(1, &mut result).is_err()); } + /// Lazy init: reading ids that were never written must yield an empty list + /// rather than erroring or panicking. This hammers a wide sweep of unwritten + /// ids (including some interleaved with written ones) to confirm the + /// `NotFound` -> empty mapping holds and never crashes. + #[test] + fn test_get_unwritten_ids_returns_empty() { + let locks = Arc::new(StripedLocks::new()); + let neighbor_provider = new_provider(64); + let mut scratch = neighbor_provider.scratch(&locks); + + // Write neighbor lists for a sparse set of ids, leaving large gaps. + let written: [u32; 3] = [10, 1_000, 100_000]; + for &id in &written { + scratch.write_neighbors(id, &[1, 2, 3]).unwrap(); + } + + let mut result = AdjacencyList::with_capacity(64); + + // Sweep a wide id range, including the written ids and the gaps between + // and beyond them. The buffer is reused across calls to confirm it is + // cleared on every read. + for id in 0u32..200_000 { + neighbor_provider.get_neighbors(id, &mut result).unwrap(); + if written.contains(&id) { + assert_eq!(&[1, 2, 3], &*result, "written id {id} lost its list"); + } else { + assert!(result.is_empty(), "unwritten id {id} should be empty"); + } + } + + // Ids far beyond anything ever written are still just empty, not errors. + for id in [u32::MAX - 1, u32::MAX] { + neighbor_provider.get_neighbors(id, &mut result).unwrap(); + assert!(result.is_empty()); + } + } + + /// Lazy init under concurrency: many threads simultaneously hammering reads + /// of never-written ids must all observe empty lists with no panics, lost + /// updates, or errors. + #[tokio::test(flavor = "multi_thread")] + async fn test_concurrent_reads_of_unwritten_ids() { + let neighbor_provider = new_shared_provider(64); + let num_threads = stress_thread_count(); + + let mut set = JoinSet::new(); + for t in 0..num_threads { + let neighbor_provider = Arc::clone(&neighbor_provider); + set.spawn(async move { + let mut result = AdjacencyList::with_capacity(64); + // Overlapping id ranges across threads to maximize contention on + // the shared bf-tree read path for absent keys. + for id in (t * 1_000)..(t * 1_000 + 10_000) { + neighbor_provider.get_neighbors(id, &mut result).unwrap(); + assert!(result.is_empty()); + } + }); + } + + while let Some(res) = set.join_next().await { + res.unwrap(); + } + } + /// Test the interleaved and parallel traversal of the Bf-Tree /// by invoking the async accessors of the neighbor list provider #[tokio::test(flavor = "multi_thread", worker_threads = 5)] diff --git a/diskann-bftree/src/provider.rs b/diskann-bftree/src/provider.rs index 839115f09..1a9f773ca 100644 --- a/diskann-bftree/src/provider.rs +++ b/diskann-bftree/src/provider.rs @@ -352,18 +352,11 @@ where let provider = Self::new_empty(params.clone(), quant_precursor)?; provider.set_start_points(Hidden(()), start_points)?; - { - // Initialize all neighborhoods to be empty lists. - // This is a temporary solution to the problem of trying to access - // an uninitialized neighbor list in functions `consolidate_deletes` and - // `consolidate_simple` and getting an error. This is a stop-gap solution - // until BF-tree API is improved to handle `exists` queries. - let mut scratch = provider.neighbor_provider.scratch(&provider.locks); - for i in 0..params.max_points { - let vector_id = i as u32; - scratch.write_neighbors(vector_id, &[])?; - } - } + // Neighbor lists are created lazily on first write. `get_neighbors` + // treats a bf-tree `NotFound` as an empty adjacency list, so there is no + // need to eagerly initialize an empty list for every id up front. This + // keeps construction O(num_start_points) instead of O(max_points), + // which matters for billion-scale, larger-than-memory datasets. Ok(provider) } @@ -2504,11 +2497,12 @@ mod tests { // let mut out = AdjacencyList::from_iter_untrusted([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]); // len = 10 - // Attempt to access non-existant vector's neighbor list should fail as NotFound - assert!(provider + // Accessing a vector with no stored neighbor list yet succeeds and + // yields an empty list (lazy initialization via bf-tree `NotFound`). + provider .neighbor_provider .get_neighbors(200, &mut out) - .is_err()); + .unwrap(); assert!(out.is_empty()); }