algorithmicgovernance · smodee · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 28, 2026
diff --git a/bioscancast/insight/config.py b/bioscancast/insight/config.py
@@ -16,6 +16,8 @@
     "max_chunks_per_document": 12,
     "extraction_max_output_tokens": 4096,
     "chunk_workers": 6,
+    "low_survival_doc_threshold": 5,
+    "low_survival_top_k": 20,
 }
 
 
@@ -43,6 +45,18 @@ class InsightConfig:
     Set to 1 for sequential execution (useful for debugging or rate-
     limit-sensitive setups)."""
 
+    low_survival_doc_threshold: int = 5
+    """When the filter passes fewer than this many usable documents to
+    insight, switch to ``low_survival_top_k`` for both retrieval and the
+    per-document chunk cap. q7 reached insight with only 2 surviving
+    documents; in that regime per-doc retrieval depth becomes the
+    bottleneck on coverage."""
+
+    low_survival_top_k: int = 20
+    """Retrieval / per-doc cap used when usable documents are at or below
+    ``low_survival_doc_threshold``. Set to ``None`` (or equal to
+    ``retrieval_top_k``) to disable the adaptive lift."""
+
     @classmethod
     def from_dict(cls, d: dict) -> InsightConfig:
         """Create an InsightConfig from a dict, ignoring unknown keys."""

diff --git a/bioscancast/insight/extraction/chunk_extractor.py b/bioscancast/insight/extraction/chunk_extractor.py
@@ -221,6 +221,24 @@ def _quote_matches(quote: str, chunk_text: str) -> Optional[str]:
     if unwrap_quote in unwrap_chunk:
         return unwrap_quote
 
+    # Layer 4: case-insensitive substring. Catches the model lowercasing
+    # the leading letter of a sentence it quotes from mid-paragraph -
+    # otherwise verbatim drift that's very common (q12 live runs:
+    # "there are now 750 suspected cases..." vs the source's "There are
+    # now 750..."). Returns the chunk's own casing so the stored quote
+    # reflects the source. Crucially this does NOT recover content-
+    # insertion hallucinations: a fabricated continuation still fails the
+    # substring test regardless of case (verified against the q12
+    # "...have been reported in Ituri, North Kivu" fabrication, whose real
+    # source text continues "...and 906 suspected cases").
+    ci_chunk = norm_chunk.lower()
+    for candidate in (norm_quote, stripped):
+        if not candidate:
+            continue
+        idx = ci_chunk.find(candidate.lower())
+        if idx >= 0:
+            return norm_chunk[idx: idx + len(candidate)]
+
     return None
 
 

diff --git a/bioscancast/insight/extraction/prompts.py b/bioscancast/insight/extraction/prompts.py
@@ -27,7 +27,12 @@
 by the chunk text.  Do NOT infer, speculate, or use outside knowledge.
 2. For each fact, provide a verbatim quote from the chunk (max 200 \
 characters) that supports the claim.  The quote must be an exact \
-substring of the chunk text.
+substring of the chunk text.  The quote MUST be the sentence (or \
+sentence fragment) that carries the figure itself — it must contain \
+the metric_value either as digits (e.g. "82"), as a number-word \
+(e.g. "eighty-two", "a dozen"), or as a clear relative reference \
+(e.g. "a quarter of the population"). A contextual or supporting \
+sentence that mentions the topic but not the figure is NOT acceptable.
 3. If the chunk contains no relevant facts, return an empty facts list. \
 This is expected and common — most chunks are irrelevant.
 4. Do NOT answer the forecast question.  Your job is fact extraction, \
@@ -40,12 +45,13 @@
 6. For metric_name, use one of these canonical snake_case values when \
 applicable (this lets downstream dedup merge facts about the same \
 metric across sources):
-   - confirmed_cases       (suspected, probable, possible all get \
-their own variants below)
-   - suspected_cases
-   - probable_cases
-   - confirmed_or_probable_cases
-   - deaths
+   - confirmed_cases       (the "confirmed" tier — lab-confirmed)
+   - suspected_cases       (the "not-yet-confirmed" tier — covers \
+"suspected", "probable", and "possible" reporting categories)
+   - confirmed_or_probable_cases   (WHO/CDC's combined reporting bucket)
+   - deaths                (lab-confirmed deaths)
+   - suspected_deaths      (the "not-yet-confirmed" tier for deaths — \
+covers "suspected", "probable", "under investigation" reporting)
    - hospitalizations
    - recoveries
    - vaccinations_administered
@@ -58,7 +64,10 @@
    If none of these fit, invent a short snake_case label. Do NOT put \
 qualifiers (sex, age, sub-region, time-period like "weekly") in \
 metric_name — capture those in `summary` or `location` instead. \
-"cases", "reported cases", "total cases" all map to confirmed_cases.
+"cases", "reported cases", "total cases" all map to confirmed_cases. \
+"suspected cases", "probable cases", "possible cases" all map to \
+suspected_cases. "deaths" alone maps to deaths; "suspected deaths", \
+"probable deaths", "deaths under investigation" map to suspected_deaths.
 7. Be aware of cognitive biases that affect information processing:
    - Anchoring: do not over-weight the first number you encounter.
    - Availability: rare dramatic events are not necessarily more likely.

diff --git a/bioscancast/insight/pipeline.py b/bioscancast/insight/pipeline.py
@@ -103,6 +103,30 @@ def run(
         result = InsightRunResult()
         embedding_cache: dict[str, list[float]] = {}
 
+        # Adaptive top-k: when the filter passes through only a handful
+        # of usable documents, lift retrieval depth so the per-doc chunk
+        # budget isn't the bottleneck on coverage. See InsightConfig
+        # docstrings for the rationale.
+        usable_doc_count = sum(
+            1 for d in documents if d.status != "failed" and d.chunks
+        )
+        if usable_doc_count <= config.low_survival_doc_threshold:
+            effective_top_k = max(config.retrieval_top_k, config.low_survival_top_k)
+            effective_max_chunks = max(
+                config.max_chunks_per_document, config.low_survival_top_k
+            )
+            if effective_top_k != config.retrieval_top_k:
+                result.notes.append(
+                    f"Low-survival adaptive top_k engaged: "
+                    f"{usable_doc_count} usable docs (≤ threshold "
+                    f"{config.low_survival_doc_threshold}); "
+                    f"retrieval_top_k={effective_top_k} (default "
+                    f"{config.retrieval_top_k})."
+                )
+        else:
+            effective_top_k = config.retrieval_top_k
+            effective_max_chunks = config.max_chunks_per_document
+
         for doc in documents:
             # --- Skip check ---
             if doc.status == "failed" or not doc.chunks:
@@ -126,15 +150,15 @@ def run(
                 question,
                 doc,
                 self._llm,
-                top_k=config.retrieval_top_k,
+                top_k=effective_top_k,
                 bm25_weight=config.bm25_weight,
                 embedding_weight=config.embedding_weight,
                 embedding_model=config.embedding_model,
                 embedding_cache=embedding_cache,
             )
 
             # Cap chunks per document
-            scored_chunks = scored_chunks[: config.max_chunks_per_document]
+            scored_chunks = scored_chunks[:effective_max_chunks]
 
             # --- Per-chunk extraction (parallel within a doc) ---
             # Live tests on real biosecurity documents show the per-doc

diff --git a/bioscancast/tests/test_insight_chunk_extractor.py b/bioscancast/tests/test_insight_chunk_extractor.py
@@ -367,13 +367,43 @@ def test_response_returned_for_budget_tracking():
 ]
 
 
+_LAYER4_CASE_INSENSITIVE_CASES = [
+    (
+        # Real q12 finding: model lowercased the leading "T" of a sentence
+        # it quoted from mid-paragraph; otherwise verbatim.
+        "leading letter lowercased by model",
+        "There are now 750 suspected cases and 177 suspected deaths, though more are expected.",
+        "there are now 750 suspected cases and 177 suspected deaths",
+        True,
+    ),
+    (
+        # Real q12 finding: same drift on a longer attribution clause.
+        "leading 'The' lowercased mid-paragraph quote",
+        "The Congolese Ministry of Communication, in a post to X on Sunday, said that there were 904 suspected cases and 119 suspected deaths.",
+        "the Congolese Ministry of Communication, in a post to X on Sunday, said that there were 904 suspected cases and 119 suspected deaths",
+        True,
+    ),
+]
+
+
 _HALLUCINATION_CASES = [
     (
         "fabricated word inserted into list",
         "Ghana and Liberia have reported human mpox due to clade IIa MPXV.",
         "Ghana, Atlantis, and Liberia have reported human mpox due to clade IIa MPXV.",
         False,
     ),
+    (
+        # Real q12 finding: model bolted a real prefix ("a total of 105
+        # confirmed cases (including 10 deaths)") onto a fabricated
+        # continuation. The source actually continues "...and 906
+        # suspected cases". Must stay rejected even with the new
+        # case-insensitive layer 4.
+        "real prefix bolted onto fabricated continuation (q12)",
+        "According to the Ministry of Health of DRC on 25 May, a total of 105 confirmed cases (including 10 deaths) and 906 suspected cases.",
+        "a total of 105 confirmed cases (including 10 deaths) have been reported in Ituri, North Kivu, and South Kivu",
+        False,
+    ),
     (
         "wholesale fabrication",
         "Some real chunk content about measles cases in Utah.",
@@ -410,7 +440,10 @@ def test_response_returned_for_budget_tracking():
 
 @pytest.mark.parametrize(
     "label,chunk_text,quote,should_match",
-    _LAYER1_NFKC_CASES + _LAYER2_TERMINAL_PUNCTUATION_CASES + _LAYER3_WRAPPING_PUNCTUATION_CASES,
+    _LAYER1_NFKC_CASES
+    + _LAYER2_TERMINAL_PUNCTUATION_CASES
+    + _LAYER3_WRAPPING_PUNCTUATION_CASES
+    + _LAYER4_CASE_INSENSITIVE_CASES,
 )
 def test_quote_matches_accepts_real_quotes_with_normalisation_drift(
     label, chunk_text, quote, should_match

diff --git a/bioscancast/tests/test_insight_pipeline.py b/bioscancast/tests/test_insight_pipeline.py
@@ -59,7 +59,9 @@ def test_pipeline_single_document():
         RISK_ASSESSMENT_RESPONSE, # chunk p4 (no facts)
     ])
 
-    config = InsightConfig(retrieval_top_k=5, max_chunks_per_document=5)
+    config = InsightConfig(
+        retrieval_top_k=5, max_chunks_per_document=5, low_survival_top_k=5,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN])
@@ -91,7 +93,9 @@ def test_pipeline_skips_failed_documents():
         EMPTY_RESPONSE,  # For the one chunk that gets extracted
     ])
 
-    config = InsightConfig(retrieval_top_k=1, max_chunks_per_document=1)
+    config = InsightConfig(
+        retrieval_top_k=1, max_chunks_per_document=1, low_survival_top_k=1,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     # Include a failed document alongside a successful one
@@ -114,7 +118,9 @@ def test_pipeline_budget_tracking():
         SUDAN_TABLE_RESPONSE,
     ])
 
-    config = InsightConfig(retrieval_top_k=2, max_chunks_per_document=2)
+    config = InsightConfig(
+        retrieval_top_k=2, max_chunks_per_document=2, low_survival_top_k=2,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN])
@@ -137,6 +143,7 @@ def test_pipeline_stops_on_budget_exceeded():
     config = InsightConfig(
         retrieval_top_k=2,
         max_chunks_per_document=2,
+        low_survival_top_k=2,
         max_input_tokens_per_run=1,  # Absurdly low -> triggers immediately
     )
     pipeline = InsightPipeline(llm_client=client, config=config)
@@ -170,7 +177,9 @@ def test_pipeline_deduplication():
         DUPLICATE_SUDAN_CASE_COUNT,  # doc 2 -> 1 fact (duplicate case)
     ])
 
-    config = InsightConfig(retrieval_top_k=1, max_chunks_per_document=1)
+    config = InsightConfig(
+        retrieval_top_k=1, max_chunks_per_document=1, low_survival_top_k=1,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN, doc2])
@@ -580,6 +589,7 @@ def test_pipeline_parallel_chunk_extraction_produces_all_records():
     config = InsightConfig(
         retrieval_top_k=4,
         max_chunks_per_document=4,
+        low_survival_top_k=4,
         chunk_workers=4,
     )
     pipeline = InsightPipeline(llm_client=fake, config=config)
@@ -603,10 +613,12 @@ def test_pipeline_sequential_and_parallel_produce_same_record_count():
     of records when the fake LLM is content-keyed (so result depends on
     chunk content, not worker order)."""
     config_seq = InsightConfig(
-        retrieval_top_k=4, max_chunks_per_document=4, chunk_workers=1,
+        retrieval_top_k=4, max_chunks_per_document=4, low_survival_top_k=4,
+        chunk_workers=1,
     )
     config_par = InsightConfig(
-        retrieval_top_k=4, max_chunks_per_document=4, chunk_workers=4,
+        retrieval_top_k=4, max_chunks_per_document=4, low_survival_top_k=4,
+        chunk_workers=4,
     )
 
     seq_pipeline = InsightPipeline(
@@ -653,7 +665,8 @@ def embed(self, texts, *, model):
 
     fake = _IntermittentFake()
     config = InsightConfig(
-        retrieval_top_k=4, max_chunks_per_document=4, chunk_workers=4,
+        retrieval_top_k=4, max_chunks_per_document=4, low_survival_top_k=4,
+        chunk_workers=4,
     )
     pipeline = InsightPipeline(llm_client=fake, config=config)
     # Must not raise — failed chunk is logged and skipped
@@ -684,7 +697,9 @@ def test_pipeline_multi_document():
         H5N1_TABLE_RESPONSE,
     ])
 
-    config = InsightConfig(retrieval_top_k=2, max_chunks_per_document=2)
+    config = InsightConfig(
+        retrieval_top_k=2, max_chunks_per_document=2, low_survival_top_k=2,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_H5N1, [DOC_WHO_SUDAN, DOC_CDC_H5N1])
@@ -709,7 +724,9 @@ def test_pipeline_output_records_valid():
         SUDAN_TABLE_RESPONSE,
     ])
 
-    config = InsightConfig(retrieval_top_k=2, max_chunks_per_document=2)
+    config = InsightConfig(
+        retrieval_top_k=2, max_chunks_per_document=2, low_survival_top_k=2,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN])