diff --git a/fuzz/fuzz_targets/search_rankquant.rs b/fuzz/fuzz_targets/search_rankquant.rs index 6c7b386..c9e5ab5 100644 --- a/fuzz/fuzz_targets/search_rankquant.rs +++ b/fuzz/fuzz_targets/search_rankquant.rs @@ -110,7 +110,12 @@ fn assert_results(label: &str, res: &SearchResults, nq: usize, k_eff: usize, n: let cur = (scores[slot], ids[slot]); assert!( cur.0 <= prev.0, - "{label}: row {qi} not sorted at slots {} and {slot}", + "{label}: row {qi} violates score-desc order at slots {} and {slot}: prev={prev:?} cur={cur:?}", + slot - 1, + ); + assert!( + cur.0 != prev.0 || cur.1 > prev.1, + "{label}: row {qi} violates id-asc tie order at slots {} and {slot}: prev={prev:?} cur={cur:?}", slot - 1, ); } diff --git a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs index 3a45c4b..81bd1a1 100644 --- a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs +++ b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs @@ -31,6 +31,24 @@ struct TwoStageInput { payload: Vec, } +fn assert_rankquant_order(label: &str, scores: &[f32], ids: &[i64]) { + assert_eq!(scores.len(), ids.len(), "{label}: score/id length mismatch"); + for slot in 1..scores.len() { + let prev = (scores[slot - 1], ids[slot - 1]); + let cur = (scores[slot], ids[slot]); + assert!( + cur.0 <= prev.0, + "{label}: violates score-desc order at slots {} and {slot}: prev={prev:?} cur={cur:?}", + slot - 1, + ); + assert!( + cur.0 != prev.0 || cur.1 >= prev.1, + "{label}: violates id-asc tie order at slots {} and {slot}: prev={prev:?} cur={cur:?}", + slot - 1, + ); + } +} + impl<'a> Arbitrary<'a> for TwoStageInput { fn arbitrary(u: &mut Unstructured<'a>) -> Result { let dim = *u.choose(&[64usize, 128, 256, 512])?; @@ -108,7 +126,7 @@ fuzz_target!(|input: TwoStageInput| { assert_eq!(scores.len(), k_eff); assert_eq!(ids.len(), k_eff); assert!(scores.iter().all(|score| score.is_finite())); - assert!(scores.windows(2).all(|pair| pair[0] >= pair[1])); + assert_rankquant_order("subset rerank", &scores, &ids); for &id in &ids { assert!(id >= 0); assert!(subset_candidates.contains(&(id as u32))); diff --git a/ordvec-ffi/include/ordvec.h b/ordvec-ffi/include/ordvec.h index cc4f8c7..9627727 100644 --- a/ordvec-ffi/include/ordvec.h +++ b/ordvec-ffi/include/ordvec.h @@ -52,6 +52,10 @@ typedef struct { const float *query; uint64_t dim; uint64_t k; + /** + * Optional subset rows. Rows are global row IDs, may be unsorted, and may + * contain duplicates; duplicate entries are scored independently. + */ const uint32_t *candidate_rows; uint64_t candidate_count; uint64_t flags; diff --git a/ordvec-ffi/src/lib.rs b/ordvec-ffi/src/lib.rs index 03f660d..1e832f8 100644 --- a/ordvec-ffi/src/lib.rs +++ b/ordvec-ffi/src/lib.rs @@ -73,6 +73,8 @@ pub struct ordvec_search_params_t { pub query: *const f32, pub dim: u64, pub k: u64, + /// Optional subset rows. Rows are global row IDs, may be unsorted, and may + /// contain duplicates; duplicate entries are scored independently. pub candidate_rows: *const u32, pub candidate_count: u64, pub flags: u64, @@ -1012,6 +1014,36 @@ mod tests { ); assert_eq!(returned, 2); assert_eq!([hits[0].row_id, hits[1].row_id], [1, 2]); + + let duplicate_candidates = [3u32, 1, 1, 2]; + params.k = 3; + params.candidate_rows = duplicate_candidates.as_ptr(); + params.candidate_count = duplicate_candidates.len() as u64; + let mut hits = vec![ + ordvec_hit_t { + row_id: 0, + id: 0, + score: 0.0, + reserved: 0 + }; + 3 + ]; + let mut stats = default_stats(); + assert_eq!( + ordvec_index_search( + handle, + ¶ms, + hits.as_mut_ptr(), + 3, + &mut returned, + &mut stats + ), + ORDVEC_STATUS_OK + ); + assert_eq!(returned, 3); + assert_eq!([hits[0].row_id, hits[1].row_id, hits[2].row_id], [1, 1, 2]); + assert_eq!(stats.candidate_count, 4); + assert_eq!(stats.vectors_scored, 4); ordvec_index_free(handle); } std::fs::remove_file(path).ok(); diff --git a/ordvec-go/README.md b/ordvec-go/README.md index 8fde2ef..3ea87e4 100644 --- a/ordvec-go/README.md +++ b/ordvec-go/README.md @@ -19,3 +19,8 @@ Search with `nil` options or `nil` `SearchOptions.Candidates` performs a full search. An empty, non-nil `Candidates` slice is treated as an explicit empty subset and returns a typed `StatusBadArgument`, matching the C ABI v1 pointer/count contract. + +Subset candidates are global row IDs. They may be unsorted and may contain +duplicates; duplicate entries are scored independently and can produce duplicate +hits. Deduplicate `SearchOptions.Candidates` before calling `Search` when unique +hits are required. diff --git a/ordvec-go/ordvec.go b/ordvec-go/ordvec.go index 6ca2593..4c0665f 100644 --- a/ordvec-go/ordvec.go +++ b/ordvec-go/ordvec.go @@ -126,6 +126,9 @@ type Stats struct { } type SearchOptions struct { + // Candidates is an optional subset of global row IDs. Entries may be + // unsorted and may contain duplicates; duplicate entries are scored + // independently and can produce duplicate hits. Candidates []uint32 UserTag uint64 } diff --git a/ordvec-go/ordvec_test.go b/ordvec-go/ordvec_test.go index 4cdf2ee..ca2ef9b 100644 --- a/ordvec-go/ordvec_test.go +++ b/ordvec-go/ordvec_test.go @@ -121,6 +121,28 @@ func TestRankQuantSubsetSearchOrdersByRowID(t *testing.T) { } } +func TestRankQuantSubsetSearchAllowsDuplicateHits(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + defer idx.Close() + + hits, stats, err := idx.Search(query16(), 3, &SearchOptions{ + Candidates: []uint32{3, 1, 1, 2}, + }) + if err != nil { + t.Fatal(err) + } + got := []uint64{hits[0].RowID, hits[1].RowID, hits[2].RowID} + if got[0] != 1 || got[1] != 1 || got[2] != 2 { + t.Fatalf("unexpected row order: %v", got) + } + if stats.Kind != KindRankQuant || stats.CandidateCount != 4 || stats.VectorsScored != 4 { + t.Fatalf("unexpected stats: %+v", stats) + } +} + func TestBitmapSubsetSearchAllowsDuplicateHits(t *testing.T) { idx, err := Load(writeBitmapFixture(t)) if err != nil { diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index 53721ea..d86cb38 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -735,6 +735,11 @@ impl RankQuant { /// filled are returned as ``-1``. Uses the same AVX-512 → AVX2 → scalar /// dispatch as ``search_asymmetric``. /// + /// ``candidates`` may be unsorted and may contain duplicate global doc IDs. + /// Each candidate entry is scored independently, so duplicate IDs may + /// produce duplicate returned global IDs. Deduplicate the array before + /// calling this method when unique hits are required. + /// /// If the shortlist came from [`Bitmap`], this is the exact RankQuant /// rerank stage over that survivor set; it does not itself apply or /// calibrate a bitmap overlap threshold. diff --git a/ordvec-python/tests/test_rank_quant.py b/ordvec-python/tests/test_rank_quant.py index 21bf830..7eb562e 100644 --- a/ordvec-python/tests/test_rank_quant.py +++ b/ordvec-python/tests/test_rank_quant.py @@ -347,6 +347,20 @@ def test_search_asymmetric_subset_ties_use_global_row_ids(): np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32)) +def test_search_asymmetric_subset_duplicate_candidates_remain_duplicates(): + vectors = np.ones((12, 64), dtype=np.float32) + idx = RankQuant(dim=64, bits=2) + idx.add(vectors) + + candidates = np.array([7, 8, 7], dtype=np.uint32) + scores, ids = idx.search_asymmetric_subset( + np.zeros(64, dtype=np.float32), candidates, k=2 + ) + + np.testing.assert_array_equal(ids, np.array([7, 7], dtype=np.int64)) + np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32)) + + def test_search_asymmetric_subset_k_caps_at_candidate_count(): # k > len(candidates) should silently cap — no panic, no sentinel # padding beyond the candidate-set size. diff --git a/src/quant.rs b/src/quant.rs index f770043..e626f47 100644 --- a/src/quant.rs +++ b/src/quant.rs @@ -338,10 +338,11 @@ impl RankQuant { #[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))] let simd_tier = select_simd_tier(dim, bits); - // For the AVX2 path we drop the per-lane centre subtract from - // the hot loop and add it back as a per-query constant offset - // to the top-k scores at finalize time. Ranking is invariant - // to this constant; absolute scores stay exact. + // The SIMD paths drop the per-lane centre subtract from the hot + // loop. The query-constant offset is applied inside TopK before + // eviction, so boundary ties use the same exposed score tuple that + // callers receive. + #[cfg(target_arch = "x86_64")] let centre = ((1u32 << bits) as f32 - 1.0) / 2.0; queries @@ -351,27 +352,30 @@ impl RankQuant { .for_each(|((q, out_scores), out_indices)| { let q_unit = l2_normalise(q); let mut top = TopK::new(k_eff); - #[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))] - let mut centre_drop_used = false; + #[cfg(target_arch = "x86_64")] + let centre_offset = { + let q_sum: f32 = q_unit.iter().sum(); + -centre * q_sum * inv_norm + }; #[cfg(target_arch = "x86_64")] unsafe { match (simd_tier, bits) { (SimdTier::Avx512, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx512, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } _ => scan_via_lut_scalar( &self.packed, @@ -399,25 +403,6 @@ impl RankQuant { top.finalize_into(out_scores, out_indices); - if centre_drop_used { - // The asym kernels drop the per-lane `- centre` term from - // the hot loop; it is a query-constant shift, re-applied - // here. Guarded by `is_finite` so it lands only on filled - // slots: when fewer than `k` docs were scored the trailing - // top-k positions stay at the `f32::NEG_INFINITY` sentinel, - // and `NEG_INFINITY + offset` would wrongly turn a sentinel - // into a finite score. (Real scores are always finite — the - // finite-input policy guarantees it — so the guard only ever - // skips sentinels, never a genuine result.) - let q_sum: f32 = q_unit.iter().sum(); - let offset = -centre * q_sum * inv_norm; - for s in out_scores.iter_mut() { - if s.is_finite() { - *s += offset; - } - } - } - let _ = bytes_per_vec; // shape clarity }); @@ -540,6 +525,11 @@ impl RankQuant { /// descending, then global row ID ascending, matching the full-index /// search tie policy even when `candidates` is unsorted. /// + /// `candidates` may contain duplicate global row IDs. Each candidate entry + /// is scored independently, so duplicate IDs may produce duplicate returned + /// global IDs. Callers that require unique hits should deduplicate the + /// candidate list before calling this method. + /// /// Uses the same AVX-512 → AVX2 → scalar dispatch as /// [`Self::search_asymmetric`] and the same centre-drop math, just /// iterates over the provided candidate list instead of all `n` @@ -585,12 +575,16 @@ impl RankQuant { let norm = rankquant_norm(dim, bits); let inv_norm = 1.0_f32 / norm; + #[cfg(target_arch = "x86_64")] let centre = ((1u32 << bits) as f32 - 1.0) / 2.0; - // L2-normalise the query and gather centre-correction. + // L2-normalise the query. let q_unit = l2_normalise(query); - let q_sum: f32 = q_unit.iter().sum(); - let centre_offset = -centre * q_sum * inv_norm; + #[cfg(target_arch = "x86_64")] + let centre_offset = { + let q_sum: f32 = q_unit.iter().sum(); + -centre * q_sum * inv_norm + }; // Pack the candidate docs' bytes into a contiguous buffer so // the SIMD kernels can scan them as if they were a small dense @@ -609,26 +603,24 @@ impl RankQuant { #[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))] let simd_tier = select_simd_tier(dim, bits); let mut top = TopK::new_with_tie_keys(k_eff, candidates); - #[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))] - let mut centre_drop_used = false; #[cfg(target_arch = "x86_64")] unsafe { match (simd_tier, bits) { (SimdTier::Avx512, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx512, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 2) => { + top.set_score_offset(centre_offset); scan_b2_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } (SimdTier::Avx2, 4) => { + top.set_score_offset(centre_offset); scan_b4_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top); - centre_drop_used = true; } _ => scan_via_lut_scalar( &sub_packed, @@ -657,17 +649,6 @@ impl RankQuant { let mut scores = vec![f32::NEG_INFINITY; k_eff]; let mut local_indices = vec![-1i64; k_eff]; top.finalize_into(&mut scores, &mut local_indices); - if centre_drop_used { - // Re-apply the per-query centre shift dropped from the kernel hot - // loop; the `is_finite` guard skips unfilled top-k slots (still at - // the `f32::NEG_INFINITY` sentinel) so a sentinel never becomes a - // finite score. See the matching note in `search_asymmetric`. - for s in scores.iter_mut() { - if s.is_finite() { - *s += centre_offset; - } - } - } // Map local → global doc IDs. let global_indices: Vec = local_indices .iter() diff --git a/src/util.rs b/src/util.rs index 0229f72..83759c7 100644 --- a/src/util.rs +++ b/src/util.rs @@ -368,6 +368,7 @@ pub(crate) struct TopK { indices: Vec, tie_keys: Vec, tie_key_by_index: Option>, + score_offset: f32, filled: usize, /// Slot holding the worst kept entry under `(score asc, tie_key /// desc)` — the next to be evicted. @@ -387,6 +388,7 @@ impl TopK { indices: vec![-1; k], tie_keys: vec![i64::MAX; k], tie_key_by_index: None, + score_offset: 0.0, filled: 0, worst_pos: 0, worst_val: f32::INFINITY, @@ -406,8 +408,20 @@ impl TopK { top } + /// Apply a query-constant score offset before every insertion. + /// + /// SIMD RankQuant asymmetric kernels drop the bucket-center term in the hot + /// loop. Applying the offset here makes eviction and final ordering use the + /// same exposed score tuple returned to callers. + #[inline] + #[cfg_attr(not(target_arch = "x86_64"), allow(dead_code))] + pub(crate) fn set_score_offset(&mut self, score_offset: f32) { + self.score_offset = score_offset; + } + #[inline] pub(crate) fn maybe_insert(&mut self, score: f32, idx: usize) { + let score = score + self.score_offset; // Convert the doc_id to its i64 storage form once, up front. doc_ids // are `< n_vectors ≤ MAX_VECTORS` (2^26) by the `add` cap, so this // never fails in practice; the checked conversion makes the "a doc_id @@ -578,6 +592,22 @@ mod tests { assert_eq!(indices, [0, 1]); } + #[test] + fn topk_score_offset_is_part_of_eviction_key() { + let mut top = TopK::new(1); + top.set_score_offset(16_777_216.0); + + top.maybe_insert(1.0, 10); + top.maybe_insert(0.0, 3); + + let mut scores = [f32::NEG_INFINITY; 1]; + let mut indices = [-1; 1]; + top.finalize_into(&mut scores, &mut indices); + + assert_eq!(scores, [16_777_216.0]); + assert_eq!(indices, [3]); + } + #[test] fn checked_new_len_accepts_up_to_max() { use crate::rank_io::MAX_VECTORS;