diff --git a/bioscancast/insight/config.py b/bioscancast/insight/config.py
index fad6418..6eaf37b 100644
--- a/bioscancast/insight/config.py
+++ b/bioscancast/insight/config.py
@@ -16,6 +16,8 @@
     "max_chunks_per_document": 12,
     "extraction_max_output_tokens": 4096,
     "chunk_workers": 6,
+    "low_survival_doc_threshold": 5,
+    "low_survival_top_k": 20,
 }
 
 
@@ -43,6 +45,18 @@ class InsightConfig:
     Set to 1 for sequential execution (useful for debugging or rate-
     limit-sensitive setups)."""
 
+    low_survival_doc_threshold: int = 5
+    """When the filter passes fewer than this many usable documents to
+    insight, switch to ``low_survival_top_k`` for both retrieval and the
+    per-document chunk cap. q7 reached insight with only 2 surviving
+    documents; in that regime per-doc retrieval depth becomes the
+    bottleneck on coverage."""
+
+    low_survival_top_k: int = 20
+    """Retrieval / per-doc cap used when usable documents are at or below
+    ``low_survival_doc_threshold``. Set to ``None`` (or equal to
+    ``retrieval_top_k``) to disable the adaptive lift."""
+
     @classmethod
     def from_dict(cls, d: dict) -> InsightConfig:
         """Create an InsightConfig from a dict, ignoring unknown keys."""
diff --git a/bioscancast/insight/extraction/chunk_extractor.py b/bioscancast/insight/extraction/chunk_extractor.py
index dd24d71..d8a773d 100644
--- a/bioscancast/insight/extraction/chunk_extractor.py
+++ b/bioscancast/insight/extraction/chunk_extractor.py
@@ -221,6 +221,24 @@ def _quote_matches(quote: str, chunk_text: str) -> Optional[str]:
     if unwrap_quote in unwrap_chunk:
         return unwrap_quote
 
+    # Layer 4: case-insensitive substring. Catches the model lowercasing
+    # the leading letter of a sentence it quotes from mid-paragraph -
+    # otherwise verbatim drift that's very common (q12 live runs:
+    # "there are now 750 suspected cases..." vs the source's "There are
+    # now 750..."). Returns the chunk's own casing so the stored quote
+    # reflects the source. Crucially this does NOT recover content-
+    # insertion hallucinations: a fabricated continuation still fails the
+    # substring test regardless of case (verified against the q12
+    # "...have been reported in Ituri, North Kivu" fabrication, whose real
+    # source text continues "...and 906 suspected cases").
+    ci_chunk = norm_chunk.lower()
+    for candidate in (norm_quote, stripped):
+        if not candidate:
+            continue
+        idx = ci_chunk.find(candidate.lower())
+        if idx >= 0:
+            return norm_chunk[idx: idx + len(candidate)]
+
     return None
 
 
diff --git a/bioscancast/insight/extraction/prompts.py b/bioscancast/insight/extraction/prompts.py
index 0f74b4c..a750014 100644
--- a/bioscancast/insight/extraction/prompts.py
+++ b/bioscancast/insight/extraction/prompts.py
@@ -27,7 +27,12 @@
 by the chunk text.  Do NOT infer, speculate, or use outside knowledge.
 2. For each fact, provide a verbatim quote from the chunk (max 200 \
 characters) that supports the claim.  The quote must be an exact \
-substring of the chunk text.
+substring of the chunk text.  The quote MUST be the sentence (or \
+sentence fragment) that carries the figure itself — it must contain \
+the metric_value either as digits (e.g. "82"), as a number-word \
+(e.g. "eighty-two", "a dozen"), or as a clear relative reference \
+(e.g. "a quarter of the population"). A contextual or supporting \
+sentence that mentions the topic but not the figure is NOT acceptable.
 3. If the chunk contains no relevant facts, return an empty facts list. \
 This is expected and common — most chunks are irrelevant.
 4. Do NOT answer the forecast question.  Your job is fact extraction, \
@@ -40,12 +45,13 @@
 6. For metric_name, use one of these canonical snake_case values when \
 applicable (this lets downstream dedup merge facts about the same \
 metric across sources):
-   - confirmed_cases       (suspected, probable, possible all get \
-their own variants below)
-   - suspected_cases
-   - probable_cases
-   - confirmed_or_probable_cases
-   - deaths
+   - confirmed_cases       (the "confirmed" tier — lab-confirmed)
+   - suspected_cases       (the "not-yet-confirmed" tier — covers \
+"suspected", "probable", and "possible" reporting categories)
+   - confirmed_or_probable_cases   (WHO/CDC's combined reporting bucket)
+   - deaths                (lab-confirmed deaths)
+   - suspected_deaths      (the "not-yet-confirmed" tier for deaths — \
+covers "suspected", "probable", "under investigation" reporting)
    - hospitalizations
    - recoveries
    - vaccinations_administered
@@ -58,7 +64,10 @@
    If none of these fit, invent a short snake_case label. Do NOT put \
 qualifiers (sex, age, sub-region, time-period like "weekly") in \
 metric_name — capture those in `summary` or `location` instead. \
-"cases", "reported cases", "total cases" all map to confirmed_cases.
+"cases", "reported cases", "total cases" all map to confirmed_cases. \
+"suspected cases", "probable cases", "possible cases" all map to \
+suspected_cases. "deaths" alone maps to deaths; "suspected deaths", \
+"probable deaths", "deaths under investigation" map to suspected_deaths.
 7. Be aware of cognitive biases that affect information processing:
    - Anchoring: do not over-weight the first number you encounter.
    - Availability: rare dramatic events are not necessarily more likely.
diff --git a/bioscancast/insight/pipeline.py b/bioscancast/insight/pipeline.py
index ea294fd..7d5c5dd 100644
--- a/bioscancast/insight/pipeline.py
+++ b/bioscancast/insight/pipeline.py
@@ -103,6 +103,30 @@ def run(
         result = InsightRunResult()
         embedding_cache: dict[str, list[float]] = {}
 
+        # Adaptive top-k: when the filter passes through only a handful
+        # of usable documents, lift retrieval depth so the per-doc chunk
+        # budget isn't the bottleneck on coverage. See InsightConfig
+        # docstrings for the rationale.
+        usable_doc_count = sum(
+            1 for d in documents if d.status != "failed" and d.chunks
+        )
+        if usable_doc_count <= config.low_survival_doc_threshold:
+            effective_top_k = max(config.retrieval_top_k, config.low_survival_top_k)
+            effective_max_chunks = max(
+                config.max_chunks_per_document, config.low_survival_top_k
+            )
+            if effective_top_k != config.retrieval_top_k:
+                result.notes.append(
+                    f"Low-survival adaptive top_k engaged: "
+                    f"{usable_doc_count} usable docs (≤ threshold "
+                    f"{config.low_survival_doc_threshold}); "
+                    f"retrieval_top_k={effective_top_k} (default "
+                    f"{config.retrieval_top_k})."
+                )
+        else:
+            effective_top_k = config.retrieval_top_k
+            effective_max_chunks = config.max_chunks_per_document
+
         for doc in documents:
             # --- Skip check ---
             if doc.status == "failed" or not doc.chunks:
@@ -126,7 +150,7 @@ def run(
                 question,
                 doc,
                 self._llm,
-                top_k=config.retrieval_top_k,
+                top_k=effective_top_k,
                 bm25_weight=config.bm25_weight,
                 embedding_weight=config.embedding_weight,
                 embedding_model=config.embedding_model,
@@ -134,7 +158,7 @@ def run(
             )
 
             # Cap chunks per document
-            scored_chunks = scored_chunks[: config.max_chunks_per_document]
+            scored_chunks = scored_chunks[:effective_max_chunks]
 
             # --- Per-chunk extraction (parallel within a doc) ---
             # Live tests on real biosecurity documents show the per-doc
diff --git a/bioscancast/tests/test_insight_chunk_extractor.py b/bioscancast/tests/test_insight_chunk_extractor.py
index 2ba0421..4a112b9 100644
--- a/bioscancast/tests/test_insight_chunk_extractor.py
+++ b/bioscancast/tests/test_insight_chunk_extractor.py
@@ -367,6 +367,25 @@ def test_response_returned_for_budget_tracking():
 ]
 
 
+_LAYER4_CASE_INSENSITIVE_CASES = [
+    (
+        # Real q12 finding: model lowercased the leading "T" of a sentence
+        # it quoted from mid-paragraph; otherwise verbatim.
+        "leading letter lowercased by model",
+        "There are now 750 suspected cases and 177 suspected deaths, though more are expected.",
+        "there are now 750 suspected cases and 177 suspected deaths",
+        True,
+    ),
+    (
+        # Real q12 finding: same drift on a longer attribution clause.
+        "leading 'The' lowercased mid-paragraph quote",
+        "The Congolese Ministry of Communication, in a post to X on Sunday, said that there were 904 suspected cases and 119 suspected deaths.",
+        "the Congolese Ministry of Communication, in a post to X on Sunday, said that there were 904 suspected cases and 119 suspected deaths",
+        True,
+    ),
+]
+
+
 _HALLUCINATION_CASES = [
     (
         "fabricated word inserted into list",
@@ -374,6 +393,17 @@ def test_response_returned_for_budget_tracking():
         "Ghana, Atlantis, and Liberia have reported human mpox due to clade IIa MPXV.",
         False,
     ),
+    (
+        # Real q12 finding: model bolted a real prefix ("a total of 105
+        # confirmed cases (including 10 deaths)") onto a fabricated
+        # continuation. The source actually continues "...and 906
+        # suspected cases". Must stay rejected even with the new
+        # case-insensitive layer 4.
+        "real prefix bolted onto fabricated continuation (q12)",
+        "According to the Ministry of Health of DRC on 25 May, a total of 105 confirmed cases (including 10 deaths) and 906 suspected cases.",
+        "a total of 105 confirmed cases (including 10 deaths) have been reported in Ituri, North Kivu, and South Kivu",
+        False,
+    ),
     (
         "wholesale fabrication",
         "Some real chunk content about measles cases in Utah.",
@@ -410,7 +440,10 @@ def test_response_returned_for_budget_tracking():
 
 @pytest.mark.parametrize(
     "label,chunk_text,quote,should_match",
-    _LAYER1_NFKC_CASES + _LAYER2_TERMINAL_PUNCTUATION_CASES + _LAYER3_WRAPPING_PUNCTUATION_CASES,
+    _LAYER1_NFKC_CASES
+    + _LAYER2_TERMINAL_PUNCTUATION_CASES
+    + _LAYER3_WRAPPING_PUNCTUATION_CASES
+    + _LAYER4_CASE_INSENSITIVE_CASES,
 )
 def test_quote_matches_accepts_real_quotes_with_normalisation_drift(
     label, chunk_text, quote, should_match
diff --git a/bioscancast/tests/test_insight_pipeline.py b/bioscancast/tests/test_insight_pipeline.py
index 1d6b9ba..679924a 100644
--- a/bioscancast/tests/test_insight_pipeline.py
+++ b/bioscancast/tests/test_insight_pipeline.py
@@ -59,7 +59,9 @@ def test_pipeline_single_document():
         RISK_ASSESSMENT_RESPONSE, # chunk p4 (no facts)
     ])
 
-    config = InsightConfig(retrieval_top_k=5, max_chunks_per_document=5)
+    config = InsightConfig(
+        retrieval_top_k=5, max_chunks_per_document=5, low_survival_top_k=5,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN])
@@ -91,7 +93,9 @@ def test_pipeline_skips_failed_documents():
         EMPTY_RESPONSE,  # For the one chunk that gets extracted
     ])
 
-    config = InsightConfig(retrieval_top_k=1, max_chunks_per_document=1)
+    config = InsightConfig(
+        retrieval_top_k=1, max_chunks_per_document=1, low_survival_top_k=1,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     # Include a failed document alongside a successful one
@@ -114,7 +118,9 @@ def test_pipeline_budget_tracking():
         SUDAN_TABLE_RESPONSE,
     ])
 
-    config = InsightConfig(retrieval_top_k=2, max_chunks_per_document=2)
+    config = InsightConfig(
+        retrieval_top_k=2, max_chunks_per_document=2, low_survival_top_k=2,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN])
@@ -137,6 +143,7 @@ def test_pipeline_stops_on_budget_exceeded():
     config = InsightConfig(
         retrieval_top_k=2,
         max_chunks_per_document=2,
+        low_survival_top_k=2,
         max_input_tokens_per_run=1,  # Absurdly low -> triggers immediately
     )
     pipeline = InsightPipeline(llm_client=client, config=config)
@@ -170,7 +177,9 @@ def test_pipeline_deduplication():
         DUPLICATE_SUDAN_CASE_COUNT,  # doc 2 -> 1 fact (duplicate case)
     ])
 
-    config = InsightConfig(retrieval_top_k=1, max_chunks_per_document=1)
+    config = InsightConfig(
+        retrieval_top_k=1, max_chunks_per_document=1, low_survival_top_k=1,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN, doc2])
@@ -580,6 +589,7 @@ def test_pipeline_parallel_chunk_extraction_produces_all_records():
     config = InsightConfig(
         retrieval_top_k=4,
         max_chunks_per_document=4,
+        low_survival_top_k=4,
         chunk_workers=4,
     )
     pipeline = InsightPipeline(llm_client=fake, config=config)
@@ -603,10 +613,12 @@ def test_pipeline_sequential_and_parallel_produce_same_record_count():
     of records when the fake LLM is content-keyed (so result depends on
     chunk content, not worker order)."""
     config_seq = InsightConfig(
-        retrieval_top_k=4, max_chunks_per_document=4, chunk_workers=1,
+        retrieval_top_k=4, max_chunks_per_document=4, low_survival_top_k=4,
+        chunk_workers=1,
     )
     config_par = InsightConfig(
-        retrieval_top_k=4, max_chunks_per_document=4, chunk_workers=4,
+        retrieval_top_k=4, max_chunks_per_document=4, low_survival_top_k=4,
+        chunk_workers=4,
     )
 
     seq_pipeline = InsightPipeline(
@@ -653,7 +665,8 @@ def embed(self, texts, *, model):
 
     fake = _IntermittentFake()
     config = InsightConfig(
-        retrieval_top_k=4, max_chunks_per_document=4, chunk_workers=4,
+        retrieval_top_k=4, max_chunks_per_document=4, low_survival_top_k=4,
+        chunk_workers=4,
     )
     pipeline = InsightPipeline(llm_client=fake, config=config)
     # Must not raise — failed chunk is logged and skipped
@@ -684,7 +697,9 @@ def test_pipeline_multi_document():
         H5N1_TABLE_RESPONSE,
     ])
 
-    config = InsightConfig(retrieval_top_k=2, max_chunks_per_document=2)
+    config = InsightConfig(
+        retrieval_top_k=2, max_chunks_per_document=2, low_survival_top_k=2,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_H5N1, [DOC_WHO_SUDAN, DOC_CDC_H5N1])
@@ -709,7 +724,9 @@ def test_pipeline_output_records_valid():
         SUDAN_TABLE_RESPONSE,
     ])
 
-    config = InsightConfig(retrieval_top_k=2, max_chunks_per_document=2)
+    config = InsightConfig(
+        retrieval_top_k=2, max_chunks_per_document=2, low_survival_top_k=2,
+    )
     pipeline = InsightPipeline(llm_client=client, config=config)
 
     result = pipeline.run(QUESTION_SUDAN, [DOC_WHO_SUDAN])