diff --git a/fuzz/fuzz_targets/search_rankquant.rs b/fuzz/fuzz_targets/search_rankquant.rs
index 6c7b386..c9e5ab5 100644
--- a/fuzz/fuzz_targets/search_rankquant.rs
+++ b/fuzz/fuzz_targets/search_rankquant.rs
@@ -110,7 +110,12 @@ fn assert_results(label: &str, res: &SearchResults, nq: usize, k_eff: usize, n:
             let cur = (scores[slot], ids[slot]);
             assert!(
                 cur.0 <= prev.0,
-                "{label}: row {qi} not sorted at slots {} and {slot}",
+                "{label}: row {qi} violates score-desc order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
+                slot - 1,
+            );
+            assert!(
+                cur.0 != prev.0 || cur.1 > prev.1,
+                "{label}: row {qi} violates id-asc tie order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
                 slot - 1,
             );
         }
diff --git a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
index 3a45c4b..81bd1a1 100644
--- a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
+++ b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
@@ -31,6 +31,24 @@ struct TwoStageInput {
     payload: Vec<u8>,
 }
 
+fn assert_rankquant_order(label: &str, scores: &[f32], ids: &[i64]) {
+    assert_eq!(scores.len(), ids.len(), "{label}: score/id length mismatch");
+    for slot in 1..scores.len() {
+        let prev = (scores[slot - 1], ids[slot - 1]);
+        let cur = (scores[slot], ids[slot]);
+        assert!(
+            cur.0 <= prev.0,
+            "{label}: violates score-desc order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
+            slot - 1,
+        );
+        assert!(
+            cur.0 != prev.0 || cur.1 >= prev.1,
+            "{label}: violates id-asc tie order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
+            slot - 1,
+        );
+    }
+}
+
 impl<'a> Arbitrary<'a> for TwoStageInput {
     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
         let dim = *u.choose(&[64usize, 128, 256, 512])?;
@@ -108,7 +126,7 @@ fuzz_target!(|input: TwoStageInput| {
     assert_eq!(scores.len(), k_eff);
     assert_eq!(ids.len(), k_eff);
     assert!(scores.iter().all(|score| score.is_finite()));
-    assert!(scores.windows(2).all(|pair| pair[0] >= pair[1]));
+    assert_rankquant_order("subset rerank", &scores, &ids);
     for &id in &ids {
         assert!(id >= 0);
         assert!(subset_candidates.contains(&(id as u32)));
diff --git a/ordvec-ffi/include/ordvec.h b/ordvec-ffi/include/ordvec.h
index cc4f8c7..9627727 100644
--- a/ordvec-ffi/include/ordvec.h
+++ b/ordvec-ffi/include/ordvec.h
@@ -52,6 +52,10 @@ typedef struct {
   const float *query;
   uint64_t dim;
   uint64_t k;
+  /**
+   * Optional subset rows. Rows are global row IDs, may be unsorted, and may
+   * contain duplicates; duplicate entries are scored independently.
+   */
   const uint32_t *candidate_rows;
   uint64_t candidate_count;
   uint64_t flags;
diff --git a/ordvec-ffi/src/lib.rs b/ordvec-ffi/src/lib.rs
index 03f660d..1e832f8 100644
--- a/ordvec-ffi/src/lib.rs
+++ b/ordvec-ffi/src/lib.rs
@@ -73,6 +73,8 @@ pub struct ordvec_search_params_t {
     pub query: *const f32,
     pub dim: u64,
     pub k: u64,
+    /// Optional subset rows. Rows are global row IDs, may be unsorted, and may
+    /// contain duplicates; duplicate entries are scored independently.
     pub candidate_rows: *const u32,
     pub candidate_count: u64,
     pub flags: u64,
@@ -1012,6 +1014,36 @@ mod tests {
             );
             assert_eq!(returned, 2);
             assert_eq!([hits[0].row_id, hits[1].row_id], [1, 2]);
+
+            let duplicate_candidates = [3u32, 1, 1, 2];
+            params.k = 3;
+            params.candidate_rows = duplicate_candidates.as_ptr();
+            params.candidate_count = duplicate_candidates.len() as u64;
+            let mut hits = vec![
+                ordvec_hit_t {
+                    row_id: 0,
+                    id: 0,
+                    score: 0.0,
+                    reserved: 0
+                };
+                3
+            ];
+            let mut stats = default_stats();
+            assert_eq!(
+                ordvec_index_search(
+                    handle,
+                    &params,
+                    hits.as_mut_ptr(),
+                    3,
+                    &mut returned,
+                    &mut stats
+                ),
+                ORDVEC_STATUS_OK
+            );
+            assert_eq!(returned, 3);
+            assert_eq!([hits[0].row_id, hits[1].row_id, hits[2].row_id], [1, 1, 2]);
+            assert_eq!(stats.candidate_count, 4);
+            assert_eq!(stats.vectors_scored, 4);
             ordvec_index_free(handle);
         }
         std::fs::remove_file(path).ok();
diff --git a/ordvec-go/README.md b/ordvec-go/README.md
index 8fde2ef..3ea87e4 100644
--- a/ordvec-go/README.md
+++ b/ordvec-go/README.md
@@ -19,3 +19,8 @@ Search with `nil` options or `nil` `SearchOptions.Candidates` performs a full
 search. An empty, non-nil `Candidates` slice is treated as an explicit empty
 subset and returns a typed `StatusBadArgument`, matching the C ABI v1
 pointer/count contract.
+
+Subset candidates are global row IDs. They may be unsorted and may contain
+duplicates; duplicate entries are scored independently and can produce duplicate
+hits. Deduplicate `SearchOptions.Candidates` before calling `Search` when unique
+hits are required.
diff --git a/ordvec-go/ordvec.go b/ordvec-go/ordvec.go
index 6ca2593..4c0665f 100644
--- a/ordvec-go/ordvec.go
+++ b/ordvec-go/ordvec.go
@@ -126,6 +126,9 @@ type Stats struct {
 }
 
 type SearchOptions struct {
+	// Candidates is an optional subset of global row IDs. Entries may be
+	// unsorted and may contain duplicates; duplicate entries are scored
+	// independently and can produce duplicate hits.
 	Candidates []uint32
 	UserTag    uint64
 }
diff --git a/ordvec-go/ordvec_test.go b/ordvec-go/ordvec_test.go
index 4cdf2ee..ca2ef9b 100644
--- a/ordvec-go/ordvec_test.go
+++ b/ordvec-go/ordvec_test.go
@@ -121,6 +121,28 @@ func TestRankQuantSubsetSearchOrdersByRowID(t *testing.T) {
 	}
 }
 
+func TestRankQuantSubsetSearchAllowsDuplicateHits(t *testing.T) {
+	idx, err := Load(writeRankQuantFixture(t))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer idx.Close()
+
+	hits, stats, err := idx.Search(query16(), 3, &SearchOptions{
+		Candidates: []uint32{3, 1, 1, 2},
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := []uint64{hits[0].RowID, hits[1].RowID, hits[2].RowID}
+	if got[0] != 1 || got[1] != 1 || got[2] != 2 {
+		t.Fatalf("unexpected row order: %v", got)
+	}
+	if stats.Kind != KindRankQuant || stats.CandidateCount != 4 || stats.VectorsScored != 4 {
+		t.Fatalf("unexpected stats: %+v", stats)
+	}
+}
+
 func TestBitmapSubsetSearchAllowsDuplicateHits(t *testing.T) {
 	idx, err := Load(writeBitmapFixture(t))
 	if err != nil {
diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs
index 53721ea..d86cb38 100644
--- a/ordvec-python/src/lib.rs
+++ b/ordvec-python/src/lib.rs
@@ -735,6 +735,11 @@ impl RankQuant {
     /// filled are returned as ``-1``. Uses the same AVX-512 → AVX2 → scalar
     /// dispatch as ``search_asymmetric``.
     ///
+    /// ``candidates`` may be unsorted and may contain duplicate global doc IDs.
+    /// Each candidate entry is scored independently, so duplicate IDs may
+    /// produce duplicate returned global IDs. Deduplicate the array before
+    /// calling this method when unique hits are required.
+    ///
     /// If the shortlist came from [`Bitmap`], this is the exact RankQuant
     /// rerank stage over that survivor set; it does not itself apply or
     /// calibrate a bitmap overlap threshold.
diff --git a/ordvec-python/tests/test_rank_quant.py b/ordvec-python/tests/test_rank_quant.py
index 21bf830..7eb562e 100644
--- a/ordvec-python/tests/test_rank_quant.py
+++ b/ordvec-python/tests/test_rank_quant.py
@@ -347,6 +347,20 @@ def test_search_asymmetric_subset_ties_use_global_row_ids():
     np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32))
 
 
+def test_search_asymmetric_subset_duplicate_candidates_remain_duplicates():
+    vectors = np.ones((12, 64), dtype=np.float32)
+    idx = RankQuant(dim=64, bits=2)
+    idx.add(vectors)
+
+    candidates = np.array([7, 8, 7], dtype=np.uint32)
+    scores, ids = idx.search_asymmetric_subset(
+        np.zeros(64, dtype=np.float32), candidates, k=2
+    )
+
+    np.testing.assert_array_equal(ids, np.array([7, 7], dtype=np.int64))
+    np.testing.assert_array_equal(scores, np.array([0.0, 0.0], dtype=np.float32))
+
+
 def test_search_asymmetric_subset_k_caps_at_candidate_count():
     # k > len(candidates) should silently cap — no panic, no sentinel
     # padding beyond the candidate-set size.
diff --git a/src/quant.rs b/src/quant.rs
index f770043..e626f47 100644
--- a/src/quant.rs
+++ b/src/quant.rs
@@ -338,10 +338,11 @@ impl RankQuant {
         #[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))]
         let simd_tier = select_simd_tier(dim, bits);
 
-        // For the AVX2 path we drop the per-lane centre subtract from
-        // the hot loop and add it back as a per-query constant offset
-        // to the top-k scores at finalize time. Ranking is invariant
-        // to this constant; absolute scores stay exact.
+        // The SIMD paths drop the per-lane centre subtract from the hot
+        // loop. The query-constant offset is applied inside TopK before
+        // eviction, so boundary ties use the same exposed score tuple that
+        // callers receive.
+        #[cfg(target_arch = "x86_64")]
         let centre = ((1u32 << bits) as f32 - 1.0) / 2.0;
 
         queries
@@ -351,27 +352,30 @@ impl RankQuant {
             .for_each(|((q, out_scores), out_indices)| {
                 let q_unit = l2_normalise(q);
                 let mut top = TopK::new(k_eff);
-                #[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))]
-                let mut centre_drop_used = false;
+                #[cfg(target_arch = "x86_64")]
+                let centre_offset = {
+                    let q_sum: f32 = q_unit.iter().sum();
+                    -centre * q_sum * inv_norm
+                };
 
                 #[cfg(target_arch = "x86_64")]
                 unsafe {
                     match (simd_tier, bits) {
                         (SimdTier::Avx512, 2) => {
+                            top.set_score_offset(centre_offset);
                             scan_b2_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
-                            centre_drop_used = true;
                         }
                         (SimdTier::Avx512, 4) => {
+                            top.set_score_offset(centre_offset);
                             scan_b4_asym_avx512(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
-                            centre_drop_used = true;
                         }
                         (SimdTier::Avx2, 2) => {
+                            top.set_score_offset(centre_offset);
                             scan_b2_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
-                            centre_drop_used = true;
                         }
                         (SimdTier::Avx2, 4) => {
+                            top.set_score_offset(centre_offset);
                             scan_b4_asym_avx2(&self.packed, n, dim, &q_unit, inv_norm, &mut top);
-                            centre_drop_used = true;
                         }
                         _ => scan_via_lut_scalar(
                             &self.packed,
@@ -399,25 +403,6 @@ impl RankQuant {
 
                 top.finalize_into(out_scores, out_indices);
 
-                if centre_drop_used {
-                    // The asym kernels drop the per-lane `- centre` term from
-                    // the hot loop; it is a query-constant shift, re-applied
-                    // here. Guarded by `is_finite` so it lands only on filled
-                    // slots: when fewer than `k` docs were scored the trailing
-                    // top-k positions stay at the `f32::NEG_INFINITY` sentinel,
-                    // and `NEG_INFINITY + offset` would wrongly turn a sentinel
-                    // into a finite score. (Real scores are always finite — the
-                    // finite-input policy guarantees it — so the guard only ever
-                    // skips sentinels, never a genuine result.)
-                    let q_sum: f32 = q_unit.iter().sum();
-                    let offset = -centre * q_sum * inv_norm;
-                    for s in out_scores.iter_mut() {
-                        if s.is_finite() {
-                            *s += offset;
-                        }
-                    }
-                }
-
                 let _ = bytes_per_vec; // shape clarity
             });
 
@@ -540,6 +525,11 @@ impl RankQuant {
     /// descending, then global row ID ascending, matching the full-index
     /// search tie policy even when `candidates` is unsorted.
     ///
+    /// `candidates` may contain duplicate global row IDs. Each candidate entry
+    /// is scored independently, so duplicate IDs may produce duplicate returned
+    /// global IDs. Callers that require unique hits should deduplicate the
+    /// candidate list before calling this method.
+    ///
     /// Uses the same AVX-512 → AVX2 → scalar dispatch as
     /// [`Self::search_asymmetric`] and the same centre-drop math, just
     /// iterates over the provided candidate list instead of all `n`
@@ -585,12 +575,16 @@ impl RankQuant {
 
         let norm = rankquant_norm(dim, bits);
         let inv_norm = 1.0_f32 / norm;
+        #[cfg(target_arch = "x86_64")]
         let centre = ((1u32 << bits) as f32 - 1.0) / 2.0;
 
-        // L2-normalise the query and gather centre-correction.
+        // L2-normalise the query.
         let q_unit = l2_normalise(query);
-        let q_sum: f32 = q_unit.iter().sum();
-        let centre_offset = -centre * q_sum * inv_norm;
+        #[cfg(target_arch = "x86_64")]
+        let centre_offset = {
+            let q_sum: f32 = q_unit.iter().sum();
+            -centre * q_sum * inv_norm
+        };
 
         // Pack the candidate docs' bytes into a contiguous buffer so
         // the SIMD kernels can scan them as if they were a small dense
@@ -609,26 +603,24 @@ impl RankQuant {
         #[cfg_attr(not(target_arch = "x86_64"), allow(unused_variables))]
         let simd_tier = select_simd_tier(dim, bits);
         let mut top = TopK::new_with_tie_keys(k_eff, candidates);
-        #[cfg_attr(not(target_arch = "x86_64"), allow(unused_mut))]
-        let mut centre_drop_used = false;
         #[cfg(target_arch = "x86_64")]
         unsafe {
             match (simd_tier, bits) {
                 (SimdTier::Avx512, 2) => {
+                    top.set_score_offset(centre_offset);
                     scan_b2_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
-                    centre_drop_used = true;
                 }
                 (SimdTier::Avx512, 4) => {
+                    top.set_score_offset(centre_offset);
                     scan_b4_asym_avx512(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
-                    centre_drop_used = true;
                 }
                 (SimdTier::Avx2, 2) => {
+                    top.set_score_offset(centre_offset);
                     scan_b2_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
-                    centre_drop_used = true;
                 }
                 (SimdTier::Avx2, 4) => {
+                    top.set_score_offset(centre_offset);
                     scan_b4_asym_avx2(&sub_packed, m, dim, &q_unit, inv_norm, &mut top);
-                    centre_drop_used = true;
                 }
                 _ => scan_via_lut_scalar(
                     &sub_packed,
@@ -657,17 +649,6 @@ impl RankQuant {
         let mut scores = vec![f32::NEG_INFINITY; k_eff];
         let mut local_indices = vec![-1i64; k_eff];
         top.finalize_into(&mut scores, &mut local_indices);
-        if centre_drop_used {
-            // Re-apply the per-query centre shift dropped from the kernel hot
-            // loop; the `is_finite` guard skips unfilled top-k slots (still at
-            // the `f32::NEG_INFINITY` sentinel) so a sentinel never becomes a
-            // finite score. See the matching note in `search_asymmetric`.
-            for s in scores.iter_mut() {
-                if s.is_finite() {
-                    *s += centre_offset;
-                }
-            }
-        }
         // Map local → global doc IDs.
         let global_indices: Vec<i64> = local_indices
             .iter()
diff --git a/src/util.rs b/src/util.rs
index 0229f72..83759c7 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -368,6 +368,7 @@ pub(crate) struct TopK {
     indices: Vec<i64>,
     tie_keys: Vec<i64>,
     tie_key_by_index: Option<Vec<i64>>,
+    score_offset: f32,
     filled: usize,
     /// Slot holding the worst kept entry under `(score asc, tie_key
     /// desc)` — the next to be evicted.
@@ -387,6 +388,7 @@ impl TopK {
             indices: vec![-1; k],
             tie_keys: vec![i64::MAX; k],
             tie_key_by_index: None,
+            score_offset: 0.0,
             filled: 0,
             worst_pos: 0,
             worst_val: f32::INFINITY,
@@ -406,8 +408,20 @@ impl TopK {
         top
     }
 
+    /// Apply a query-constant score offset before every insertion.
+    ///
+    /// SIMD RankQuant asymmetric kernels drop the bucket-center term in the hot
+    /// loop. Applying the offset here makes eviction and final ordering use the
+    /// same exposed score tuple returned to callers.
+    #[inline]
+    #[cfg_attr(not(target_arch = "x86_64"), allow(dead_code))]
+    pub(crate) fn set_score_offset(&mut self, score_offset: f32) {
+        self.score_offset = score_offset;
+    }
+
     #[inline]
     pub(crate) fn maybe_insert(&mut self, score: f32, idx: usize) {
+        let score = score + self.score_offset;
         // Convert the doc_id to its i64 storage form once, up front. doc_ids
         // are `< n_vectors ≤ MAX_VECTORS` (2^26) by the `add` cap, so this
         // never fails in practice; the checked conversion makes the "a doc_id
@@ -578,6 +592,22 @@ mod tests {
         assert_eq!(indices, [0, 1]);
     }
 
+    #[test]
+    fn topk_score_offset_is_part_of_eviction_key() {
+        let mut top = TopK::new(1);
+        top.set_score_offset(16_777_216.0);
+
+        top.maybe_insert(1.0, 10);
+        top.maybe_insert(0.0, 3);
+
+        let mut scores = [f32::NEG_INFINITY; 1];
+        let mut indices = [-1; 1];
+        top.finalize_into(&mut scores, &mut indices);
+
+        assert_eq!(scores, [16_777_216.0]);
+        assert_eq!(indices, [3]);
+    }
+
     #[test]
     fn checked_new_len_accepts_up_to_max() {
         use crate::rank_io::MAX_VECTORS;