diff --git a/docs/RANK_MODES.md b/docs/RANK_MODES.md
index f36fed1..f95b4ac 100644
--- a/docs/RANK_MODES.md
+++ b/docs/RANK_MODES.md
@@ -427,6 +427,9 @@ serialisers living in [`src/rank_io.rs`](../src/rank_io.rs) and
 [`src/sign_bitmap.rs`](../src/sign_bitmap.rs). `RankQuant`
 additionally exposes `search_asymmetric_subset` for scoring a
 precomputed candidate set — the rerank half of the two-stage pattern.
+Candidate IDs are global row ordinals; duplicate candidates are scored as
+separate entries and can produce duplicate hits, so callers that need
+unique output rows should deduplicate candidate lists before reranking.
 
 `RankQuantFastscan` (re-exported `#[doc(hidden)]`) is an optional
 single-pass b=2 fast path; it supports `add`/`search` but not
diff --git a/fuzz/fuzz_targets/search_rankquant.rs b/fuzz/fuzz_targets/search_rankquant.rs
index c9e5ab5..3a2b678 100644
--- a/fuzz/fuzz_targets/search_rankquant.rs
+++ b/fuzz/fuzz_targets/search_rankquant.rs
@@ -9,8 +9,8 @@
 //! huge value. Invalid dimensions, non-finite floats, and ragged vector lengths
 //! are caller contract violations, so this target avoids them and treats any
 //! panic as a compute-path bug. Assertions stay structural: shape, finite
-//! scores, valid doc IDs, score-descending rows, and repeat determinism in one
-//! process.
+//! scores, valid doc IDs, score-descending/doc-ID-ascending rows, and repeat
+//! determinism in one process.
 #![no_main]
 
 use libfuzzer_sys::{
@@ -105,20 +105,20 @@ fn assert_results(label: &str, res: &SearchResults, nq: usize, k_eff: usize, n:
                 "{label}: doc id {id} out of range for n={n} at query {qi} slot {slot}",
             );
         }
-        for slot in 1..k_eff {
-            let prev = (scores[slot - 1], ids[slot - 1]);
-            let cur = (scores[slot], ids[slot]);
-            assert!(
-                cur.0 <= prev.0,
-                "{label}: row {qi} violates score-desc order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
-                slot - 1,
-            );
-            assert!(
-                cur.0 != prev.0 || cur.1 > prev.1,
-                "{label}: row {qi} violates id-asc tie order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
-                slot - 1,
-            );
-        }
+        assert_score_then_id_order(label, qi, scores, ids);
+    }
+}
+
+fn assert_score_then_id_order(label: &str, qi: usize, scores: &[f32], ids: &[i64]) {
+    for slot in 1..scores.len() {
+        let prev = (scores[slot - 1], ids[slot - 1]);
+        let cur = (scores[slot], ids[slot]);
+        let score_order = cur.0.total_cmp(&prev.0);
+        assert!(
+            score_order.is_lt() || (score_order.is_eq() && cur.1 > prev.1),
+            "{label}: row {qi} violates score-desc/doc-id-asc order at slots {} and {slot}",
+            slot - 1,
+        );
     }
 }
 
diff --git a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
index 81bd1a1..1e35582 100644
--- a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
+++ b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
@@ -11,7 +11,9 @@
 //! reranking agrees with a full RankQuant search.
 //!
 //! Contract: no panic, abort, or out-of-bounds access on any in-range candidate
-//! input, and full-corpus candidate reranking must match full RankQuant search.
+//! input, subset reranking must preserve score-descending/doc-ID-ascending
+//! ordering, and full-corpus candidate reranking must match full RankQuant
+//! search.
 #![no_main]
 
 use libfuzzer_sys::{
@@ -36,8 +38,9 @@ fn assert_rankquant_order(label: &str, scores: &[f32], ids: &[i64]) {
     for slot in 1..scores.len() {
         let prev = (scores[slot - 1], ids[slot - 1]);
         let cur = (scores[slot], ids[slot]);
+        let score_order = cur.0.total_cmp(&prev.0);
         assert!(
-            cur.0 <= prev.0,
+            score_order.is_lt() || score_order.is_eq(),
             "{label}: violates score-desc order at slots {} and {slot}: prev={prev:?} cur={cur:?}",
             slot - 1,
         );
diff --git a/ordvec-ffi/include/ordvec.h b/ordvec-ffi/include/ordvec.h
index 87ab16a..6907655 100644
--- a/ordvec-ffi/include/ordvec.h
+++ b/ordvec-ffi/include/ordvec.h
@@ -228,6 +228,11 @@ void ordvec_index_free(ordvec_index_t *index);
 /**
  * Run a synchronous single-query search.
  *
+ * When `params.candidate_rows` is supplied, those IDs are global row ordinals
+ * and may be unsorted or duplicated. Duplicate candidates are scored as
+ * separate entries and can produce duplicate hits; callers that need unique
+ * output rows must deduplicate before calling.
+ *
  * # Safety
  *
  * `index` must be a live handle returned by `ordvec_index_load`. All non-null
diff --git a/ordvec-ffi/src/lib.rs b/ordvec-ffi/src/lib.rs
index 98eab78..773e871 100644
--- a/ordvec-ffi/src/lib.rs
+++ b/ordvec-ffi/src/lib.rs
@@ -873,6 +873,11 @@ pub unsafe extern "C" fn ordvec_index_free(index: *mut ordvec_index_t) {
 #[no_mangle]
 /// Run a synchronous single-query search.
 ///
+/// When `params.candidate_rows` is supplied, those IDs are global row ordinals
+/// and may be unsorted or duplicated. Duplicate candidates are scored as
+/// separate entries and can produce duplicate hits; callers that need unique
+/// output rows must deduplicate before calling.
+///
 /// # Safety
 ///
 /// `index` must be a live handle returned by `ordvec_index_load`. All non-null
diff --git a/ordvec-go/doc.go b/ordvec-go/doc.go
index fb79eff..eaf3458 100644
--- a/ordvec-go/doc.go
+++ b/ordvec-go/doc.go
@@ -6,4 +6,8 @@
 // Search pins and passes caller-owned query and candidate slices to the C ABI
 // without copying them. Callers must not mutate those slices until Search
 // returns.
+//
+// Candidate slices are entry lists, not sets. Duplicate candidate IDs are scored
+// independently and can produce duplicate hits; callers that require unique row
+// IDs should deduplicate before Search.
 package ordvec
diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs
index 82a129e..984d8b1 100644
--- a/ordvec-python/src/lib.rs
+++ b/ordvec-python/src/lib.rs
@@ -731,9 +731,14 @@ impl RankQuant {
     /// Asymmetric scoring restricted to a candidate subset (e.g. the top-M
     /// shortlist from a [`Bitmap`] or [`SignBitmap`] probe). Returns
     /// ``(scores, global_ids)`` where ``global_ids`` are the original doc
-    /// indices (mapped from the local candidate slot); slots that could not be
-    /// filled are returned as ``-1``. Uses the same AVX-512 → AVX2 → scalar
-    /// dispatch as ``search_asymmetric``.
+    /// indices (mapped from the local candidate slot). ``k`` is capped to the
+    /// candidate-list length; the subset path does not add sentinel padding.
+    /// Uses the same AVX-512 → AVX2 → scalar dispatch as ``search_asymmetric``.
+    ///
+    /// ``candidates`` may be unsorted and may contain duplicates. Duplicate
+    /// candidate IDs are scored as separate entries and can produce duplicate
+    /// hits; callers that require unique row IDs should deduplicate before
+    /// calling.
     ///
     /// ``candidates`` may be unsorted and may contain duplicate global doc IDs.
     /// Each candidate entry is scored independently, so duplicate IDs may
diff --git a/ordvec-python/tests/test_rank_quant.py b/ordvec-python/tests/test_rank_quant.py
index 7eb562e..79ef676 100644
--- a/ordvec-python/tests/test_rank_quant.py
+++ b/ordvec-python/tests/test_rank_quant.py
@@ -310,8 +310,9 @@ def test_search_asymmetric_subset_returns_global_ids():
     assert ids.dtype == np.int64
     # Self-query against a candidate set containing self → top-1 is self.
     assert int(ids[0]) == 0
-    # All returned ids are from the candidate set (or sentinel -1).
-    candidate_set = set(candidates.tolist()) | {-1}
+    # All returned ids are from the candidate set; k is capped instead of
+    # sentinel-padding unfilled slots.
+    candidate_set = set(candidates.tolist())
     for i in ids:
         assert int(i) in candidate_set
 
diff --git a/src/quant.rs b/src/quant.rs
index e626f47..cc40553 100644
--- a/src/quant.rs
+++ b/src/quant.rs
@@ -524,7 +524,6 @@ impl RankQuant {
     /// to global IDs before returning). Results are ordered by score
     /// descending, then global row ID ascending, matching the full-index
     /// search tie policy even when `candidates` is unsorted.
-    ///
     /// `candidates` may contain duplicate global row IDs. Each candidate entry
     /// is scored independently, so duplicate IDs may produce duplicate returned
     /// global IDs. Callers that require unique hits should deduplicate the
diff --git a/src/util.rs b/src/util.rs
index 83759c7..f6f7a5b 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -368,6 +368,11 @@ pub(crate) struct TopK {
     indices: Vec<i64>,
     tie_keys: Vec<i64>,
     tie_key_by_index: Option<Vec<i64>>,
+    /// Query-constant score offset applied before insertion/eviction.
+    ///
+    /// RankQuant SIMD asymmetric kernels can drop a per-query centre term from
+    /// the hot loop. Applying it here keeps TopK's retention key identical to
+    /// the public visible score key, including f32 rounding-collapse ties.
     score_offset: f32,
     filled: usize,
     /// Slot holding the worst kept entry under `(score asc, tie_key
@@ -451,8 +456,11 @@ impl TopK {
             // order: a higher score, or an equal score with a lower row key.
             // Full-index scans use `doc_id` as the tie key. Subset scans use
             // global row IDs while still emitting local scratch-buffer indices.
-            let better =
-                score > self.worst_val || (score == self.worst_val && tie_key < self.worst_tie_key);
+            let better = match score.total_cmp(&self.worst_val) {
+                std::cmp::Ordering::Greater => true,
+                std::cmp::Ordering::Equal => tie_key < self.worst_tie_key,
+                std::cmp::Ordering::Less => false,
+            };
             if better {
                 self.scores[self.worst_pos] = score;
                 self.indices[self.worst_pos] = id;
@@ -472,7 +480,12 @@ impl TopK {
         for i in 0..self.filled {
             let s = self.scores[i];
             let tie_key = self.tie_keys[i];
-            if s < wv || (s == wv && tie_key > wt) {
+            let worse = match s.total_cmp(&wv) {
+                std::cmp::Ordering::Less => true,
+                std::cmp::Ordering::Equal => tie_key > wt,
+                std::cmp::Ordering::Greater => false,
+            };
+            if worse {
                 wv = s;
                 wt = tie_key;
                 wp = i;
@@ -594,18 +607,17 @@ mod tests {
 
     #[test]
     fn topk_score_offset_is_part_of_eviction_key() {
-        let mut top = TopK::new(1);
+        let mut top = TopK::new_with_tie_keys(1, &[10, 3]);
         top.set_score_offset(16_777_216.0);
-
-        top.maybe_insert(1.0, 10);
-        top.maybe_insert(0.0, 3);
+        top.maybe_insert(1.0, 0);
+        top.maybe_insert(0.0, 1);
 
         let mut scores = [f32::NEG_INFINITY; 1];
         let mut indices = [-1; 1];
         top.finalize_into(&mut scores, &mut indices);
 
         assert_eq!(scores, [16_777_216.0]);
-        assert_eq!(indices, [3]);
+        assert_eq!(indices, [1]);
     }
 
     #[test]
diff --git a/tests/index/quant.rs b/tests/index/quant.rs
index 35b3e26..d829972 100644
--- a/tests/index/quant.rs
+++ b/tests/index/quant.rs
@@ -64,7 +64,7 @@ fn assert_rankquant_result_shape_and_order(
             let prev = (scores[slot - 1], ids[slot - 1]);
             let cur = (scores[slot], ids[slot]);
             assert!(
-                cur.0 <= prev.0,
+                cur.0.total_cmp(&prev.0).is_le(),
                 "{label}: row {qi} not sorted at slots {} and {slot}",
                 slot - 1,
             );
diff --git a/tests/index/two_stage.rs b/tests/index/two_stage.rs
index d434234..b63f477 100644
--- a/tests/index/two_stage.rs
+++ b/tests/index/two_stage.rs
@@ -19,6 +19,19 @@ fn assert_two_stage_invariants(sign: &SignBitmap, rankquant: &RankQuant) {
     assert_eq!(sign.len(), N);
 }
 
+fn assert_score_then_id_order(scores: &[f32], ids: &[i64]) {
+    for slot in 1..scores.len() {
+        let prev = (scores[slot - 1], ids[slot - 1]);
+        let cur = (scores[slot], ids[slot]);
+        let score_order = cur.0.total_cmp(&prev.0);
+        assert!(
+            score_order.is_lt() || (score_order.is_eq() && cur.1 >= prev.1),
+            "results violate score-desc/doc-id-asc order at slots {} and {slot}",
+            slot - 1,
+        );
+    }
+}
+
 #[test]
 fn sign_rankquant_pipeline_handles_edge_candidate_and_k_shapes() {
     let (sign, rankquant, _corpus) = build_two_stage(2);
@@ -51,6 +64,7 @@ fn sign_rankquant_pipeline_handles_edge_candidate_and_k_shapes() {
     assert_eq!(scores.len(), shortlist.len());
     assert_eq!(ids.len(), shortlist.len());
     assert!(ids.iter().all(|&id| shortlist.contains(&(id as u32))));
+    assert_score_then_id_order(&scores, &ids);
 }
 
 #[test]
@@ -64,18 +78,42 @@ fn sign_rankquant_full_candidate_set_matches_full_rankquant_search() {
     let full = rankquant.search_asymmetric(query, 16);
     let (subset_scores, subset_ids) = rankquant.search_asymmetric_subset(query, &candidates, 16);
 
-    assert!(subset_ids
-        .iter()
-        .all(|&id| candidates.contains(&(id as u32))));
+    assert_eq!(subset_ids, full.indices_for_query(0));
     assert_eq!(subset_scores.len(), full.scores_for_query(0).len());
-    let mut subset_scores_sorted = subset_scores;
-    let mut full_scores_sorted = full.scores_for_query(0).to_vec();
-    subset_scores_sorted.sort_by(|left, right| left.total_cmp(right));
-    full_scores_sorted.sort_by(|left, right| left.total_cmp(right));
-    for (subset, full) in subset_scores_sorted.iter().zip(&full_scores_sorted) {
+    assert_score_then_id_order(&subset_scores, &subset_ids);
+    for (subset, full) in subset_scores.iter().zip(full.scores_for_query(0)) {
         assert!(
             (subset - full).abs() <= 1e-6,
             "subset score {subset} diverged from full score {full}"
         );
     }
 }
+
+#[test]
+fn sign_rankquant_subset_orders_visible_ties_after_centre_offset() {
+    let dim = 128usize;
+    let n_vectors = 5usize;
+    let bits = 4u8;
+    let payload = [
+        158u8, 158, 158, 158, 158, 158, 158, 158, 158, 158, 137, 10, 10,
+    ];
+    let floats: Vec<f32> = (0..((n_vectors + 1) * dim))
+        .map(|i| payload[i % payload.len()] as f32 - 128.0)
+        .collect();
+    let (corpus, query) = floats.split_at(n_vectors * dim);
+
+    let mut sign = SignBitmap::new(dim);
+    let mut rankquant = RankQuant::new(dim, bits);
+    sign.add(corpus);
+    rankquant.add(corpus);
+
+    let candidates = sign.top_m_candidates(query, n_vectors);
+    assert_eq!(candidates.len(), n_vectors);
+
+    let (scores, ids) = rankquant.search_asymmetric_subset(query, &candidates, n_vectors + 1);
+
+    assert_eq!(scores.len(), n_vectors);
+    assert_eq!(ids.len(), n_vectors);
+    assert!(scores.iter().all(|score| score.is_finite()));
+    assert_score_then_id_order(&scores, &ids);
+}