From 9592e064486ed632b5ad339425e77fcc7f4d4266 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 12 May 2026 19:53:45 +0000 Subject: [PATCH] fix(ruvector-router-core): rebuild index from storage on open + fix HNSW result heap - VectorDB::new() now loads all persisted vectors into the HNSW index on startup, fixing search() always returning 0 results after restart (closes #430) - Fix result heap in search_knn_internal: was min-heap, must be max-heap so peek() returns the furthest result for correct pruning at scale - Remove ef_construction.min(m*2) beam clamp in insert() so the full candidate pool is used during graph construction --- crates/ruvector-router-core/src/index.rs | 17 ++++++++--------- crates/ruvector-router-core/src/vector_db.rs | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/crates/ruvector-router-core/src/index.rs b/crates/ruvector-router-core/src/index.rs index 559ddf566..83f2904ad 100644 --- a/crates/ruvector-router-core/src/index.rs +++ b/crates/ruvector-router-core/src/index.rs @@ -116,8 +116,7 @@ impl HnswIndex { } // Find nearest neighbors (safe now - no locks held) - let neighbors = - self.search_knn_internal(&vector, self.config.ef_construction.min(self.config.m * 2)); + let neighbors = self.search_knn_internal(&vector, self.config.ef_construction); // Re-acquire graph lock for modifications let mut graph = self.graph.write(); @@ -187,7 +186,7 @@ impl HnswIndex { let entry_id = entry_point.as_ref().unwrap(); let mut visited = HashSet::new(); let mut candidates = BinaryHeap::new(); - let mut result = BinaryHeap::new(); + let mut result: BinaryHeap> = BinaryHeap::new(); // Calculate distance to entry point if let Some(entry_vec) = vectors.get(entry_id) { @@ -199,14 +198,14 @@ impl HnswIndex { }; candidates.push(neighbor.clone()); - result.push(neighbor); + result.push(std::cmp::Reverse(neighbor)); visited.insert(entry_id.clone()); } // Search phase while let Some(current) = candidates.pop() { // Check if we should continue - if let Some(furthest) = result.peek() { + if let Some(std::cmp::Reverse(furthest)) = result.peek() { if current.distance > furthest.distance && result.len() >= ef { break; } @@ -235,11 +234,11 @@ impl HnswIndex { // Add to results if better than current worst if result.len() < ef { - result.push(neighbor); - } else if let Some(worst) = result.peek() { + result.push(std::cmp::Reverse(neighbor)); + } else if let Some(std::cmp::Reverse(worst)) = result.peek() { if dist < worst.distance { result.pop(); - result.push(neighbor); + result.push(std::cmp::Reverse(neighbor)); } } } @@ -248,7 +247,7 @@ impl HnswIndex { } // Convert to sorted vector - let mut sorted_results: Vec = result.into_iter().collect(); + let mut sorted_results: Vec = result.into_iter().map(|r| r.0).collect(); sorted_results.sort_by(|a, b| { a.distance .partial_cmp(&b.distance) diff --git a/crates/ruvector-router-core/src/vector_db.rs b/crates/ruvector-router-core/src/vector_db.rs index eacb8035b..88a58d7b2 100644 --- a/crates/ruvector-router-core/src/vector_db.rs +++ b/crates/ruvector-router-core/src/vector_db.rs @@ -32,8 +32,20 @@ impl VectorDB { let index = Arc::new(HnswIndex::new(hnsw_config)); - let stats = Arc::new(RwLock::new(VectorDbStats { - total_vectors: 0, + // Rebuild index from persisted vectors (fixes search returning 0 results after restart) + let stored_ids = storage.get_all_ids()?; + if !stored_ids.is_empty() { + let mut entries = Vec::with_capacity(stored_ids.len()); + for id in &stored_ids { + if let Some(vector) = storage.get(id)? { + entries.push((id.clone(), vector)); + } + } + index.insert_batch(entries)?; + } + + let stats = Arc::new(RwLock::new(VectorDbStats { + total_vectors: stored_ids.len(), index_size_bytes: 0, storage_size_bytes: 0, avg_query_latency_us: 0.0,