From fcc40f65827be837224eddc8cece4c715598be8e Mon Sep 17 00:00:00 2001 From: d9ng Date: Mon, 11 May 2026 04:38:29 +0900 Subject: [PATCH] fix(ranker): dense_rank_decisions tiebreak parity + regression suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups to 7f9d5a9 (deterministic sort tiebreaks for PR-4 + PR-5): ## 1. dense_rank_decisions tiebreak (the missed third site) 7f9d5a9's commit message lists three sort sites: rrf_merge, bm25_rank_decisions, and dense_rank_decisions ("sort key adds text prefix as tiebreak"). The first two landed cleanly, but the dense site at `bm25-memory.py:550` was still `scored.sort(key=lambda x: -x[0])` — no tiebreak applied. This patch adds the same `(-x[0], hash_or_text_prefix)` form used by the other sites, restoring parity. Low blast radius (vec-daemon-gated path) but worth closing the loop. ## 2. Regression suite for all four sites Adds tests/regression/test_deterministic_sort.py with 5 cases: - rrf_merge idempotent (same input → same output) - rrf_merge equal-rank tiebreak independent of list_a/list_b order - rrf_merge equal-score tiebreak by hash - dense_rank_decisions no-emb sanity - bm25_rank_decisions index tiebreak Loader uses `importlib.util` because `bm25-memory.py` has a hyphen and can't be imported normally. Drops directly into upstream layout — no package layout assumptions. Validation: 5/5 PASS against the patched monolith. Refs: hang-in/tunaCtx#5 (Draft, closed) — re-skin offer accepted in the close comment thread. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/hooks/bm25-memory.py | 5 +- tests/regression/test_deterministic_sort.py | 123 ++++++++++++++++++++ 2 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 tests/regression/test_deterministic_sort.py diff --git a/src/hooks/bm25-memory.py b/src/hooks/bm25-memory.py index fdd0abd..cfef504 100644 --- a/src/hooks/bm25-memory.py +++ b/src/hooks/bm25-memory.py @@ -547,7 +547,10 @@ def dense_rank_decisions(corpus, query, top_k=20): scored.append((cos, item)) if not scored: return [] - scored.sort(key=lambda x: -x[0]) + # Tiebreak by stable item key — parity with rrf_merge / bm25_rank_decisions + # (7f9d5a9 covered those two; the dense site was mentioned in that commit + # message but missed in the diff). + scored.sort(key=lambda x: (-x[0], x[1].get("hash") or (x[1].get("text") or "")[:20])) return [item for _, item in scored[:top_k]] diff --git a/tests/regression/test_deterministic_sort.py b/tests/regression/test_deterministic_sort.py new file mode 100644 index 0000000..dd186ae --- /dev/null +++ b/tests/regression/test_deterministic_sort.py @@ -0,0 +1,123 @@ +""" +Regression: ranker sort sites in `bm25-memory.py` are deterministic. + +Covers the three sites whose tiebreak was added in 7f9d5a9 + the dense site +added in this PR. Without explicit tiebreak keys, equal-score items leak +input ordering / dict-insertion order into the output, which was the +"subtle non-determinism" bug discussed in #1. + +Sites covered: + - dense_rank_decisions (this PR — `(-cos, hash_or_text_prefix)`) + - rrf_merge (7f9d5a9 — `(-scores[h], h)`) + - bm25_rank_decisions (7f9d5a9 — `(-scores[i], i)`) + +The loader uses `importlib.util` because `bm25-memory.py` has a hyphen in +its filename and can't be imported normally. +""" +import importlib.util +import sys +from pathlib import Path + + +def _load_bm25_memory(): + proj = Path(__file__).resolve().parents[2] + monolith = proj / "src" / "hooks" / "bm25-memory.py" + sys.path.insert(0, str(proj / "src" / "hooks")) + spec = importlib.util.spec_from_file_location("bm25_memory", monolith) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +bm25_memory = _load_bm25_memory() +rrf_merge = bm25_memory.rrf_merge +dense_rank_decisions = bm25_memory.dense_rank_decisions +bm25_rank_decisions = bm25_memory.bm25_rank_decisions +HAS_BM25 = getattr(bm25_memory, "HAS_BM25", False) + + +def _items(n, prefix="c"): + return [{"hash": f"{prefix}{i:03d}", "text": f"item {i}", "emb": []} for i in range(n)] + + +def test_rrf_merge_idempotent_same_input(): + """Same input → same output across repeat calls (no hidden randomness).""" + a = _items(20, "a") + b = _items(20, "b") + keys1 = [it["hash"] for it in rrf_merge(a, b)] + keys2 = [it["hash"] for it in rrf_merge(a, b)] + assert keys1 == keys2, f"rrf_merge non-idempotent: {keys1[:5]} vs {keys2[:5]}" + + +def test_rrf_merge_equal_rank_tiebreak_independent_of_list_input_order(): + """Items with identical RRF rank in both lists must order by hash — + independent of whether item X or item Y was inserted first. + + This is the bug that hash tiebreak fixes: previously dict-insertion + order leaked into the output.""" + a1 = [{"hash": "zzz_late", "text": "z"}] + b1 = [{"hash": "aaa_early", "text": "a"}] + a2 = [{"hash": "aaa_early", "text": "a"}] + b2 = [{"hash": "zzz_late", "text": "z"}] + keys1 = [it["hash"] for it in rrf_merge(a1, b1)] + keys2 = [it["hash"] for it in rrf_merge(a2, b2)] + assert keys1 == keys2 == ["aaa_early", "zzz_late"], ( + f"hash tiebreak failed:\n case1={keys1}\n case2={keys2}" + ) + + +def test_rrf_merge_equal_score_tiebreak_is_hash(): + """Items with identical RRF scores (same rank in both lists) must order + by hash key ascending, not insertion order.""" + a = [{"hash": "z_high", "text": "z"}, {"hash": "a_low", "text": "a"}] + b = [{"hash": "a_low", "text": "a"}, {"hash": "z_high", "text": "z"}] + out = rrf_merge(a, b) + keys = [it["hash"] for it in out] + assert keys == ["a_low", "z_high"], f"hash tiebreak failed: got {keys}" + + +def test_dense_rank_decisions_no_emb_returns_empty(): + """Sanity: vec-daemon down → empty list.""" + corpus = _items(5) + result = dense_rank_decisions(corpus, "any query") + assert result == [] or all("hash" in it for it in result) + + +def test_bm25_rank_decisions_index_tiebreak(): + """bm25_rank_decisions: equal scores → index ascending. + + Corpus of byte-identical entries gets identical BM25 scores. With the + explicit `(-scores[i], i)` tiebreak, surviving entries (after MMR / + cluster dedup) come back in ascending index order.""" + if not HAS_BM25: + return + corpus = [ + {"hash": f"h{i}", "subject": "identical text", "text": "identical text body for bm25"} + for i in range(5) + ] + result = bm25_rank_decisions( + corpus, + "identical bm25", + top_k=5, + min_score=0.0, + adaptive_floor_ratio=0.0, + mmr_jaccard_threshold=1.01, # disable MMR + skip_rerank=True, + ) + hashes = [it["hash"] for it in result] + if len(hashes) > 1: + assert hashes == sorted(hashes), f"index tiebreak broken: {hashes}" + + +if __name__ == "__main__": + test_rrf_merge_idempotent_same_input() + print("PASS: rrf_merge idempotent") + test_rrf_merge_equal_rank_tiebreak_independent_of_list_input_order() + print("PASS: rrf_merge equal-rank tiebreak independent of input order") + test_rrf_merge_equal_score_tiebreak_is_hash() + print("PASS: rrf_merge equal-score tiebreak by hash") + test_dense_rank_decisions_no_emb_returns_empty() + print("PASS: dense_rank_decisions no-emb sanity") + test_bm25_rank_decisions_index_tiebreak() + print("PASS: bm25_rank_decisions index tiebreak") + print("\nAll determinism regression tests passed.")