From 14d7f785c0205087ce0b89ee54b872a2596da9aa Mon Sep 17 00:00:00 2001
From: RuneLind <rulind@gmail.com>
Date: Tue, 12 May 2026 20:49:04 +0200
Subject: [PATCH 1/4] Add CRAG-lite corrective retrieval around the knowledge
 search tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 of the corrective-retrieval work (plan: mimir/plans/huginn-muninn-corrective-rag.md).
After a copilot-sdk bot calls Huginn's `search_knowledge`, optionally grade the
results with a dedicated Haiku call and do a bounded corrective re-query before
the model sees them — consuming the Phase-0 contract (`bestScore`,
`confidenceBand`, `retryHints`, `noConfidentResults`, `min_relevance`).

- `knowledge-grader.ts` — awaiting Haiku evaluator → correct/ambiguous/insufficient
  + rewritten query / suggested collection. Fail-soft to "correct".
- `corrective-retrieval.ts` — grade → re-query `/api/search` (rerank=true) →
  merge + dedupe by collection/doc_id (parsed from the rendered result text) →
  consolidated text + `corrective` metadata. Hard cap 1 retry (configurable 2),
  never recursive.
- `knowledge-search-client.ts` — HTTP client for `/api/search` + a renderer
  mirroring Huginn's MCP-adapter result format.
- copilot-sdk connector: registers a `hooks.onPostToolUse` handler that runs the
  corrective pass and returns a `modifiedResult`; re-appends any trailing Huginn
  trace marker so downstream trace extraction is unaffected. Claude-CLI bots
  can't be intercepted this way — left to Phase 3 (prompt-level guidance).
- Trace spans: `knowledge_grade` + `knowledge_requery` synthesized under the
  tool span (`corrective-trace-spans.ts`); a corrective chip on the parent tool
  span in the dashboard waterfall.
- Config: per-bot `correctiveRetrieval` block, `CORRECTIVE_RETRIEVAL_ENABLED`
  global default, `CORRECTIVE_RETRIEVAL_DISABLED` kill-switch. Off by default —
  when off the hook isn't registered and behaviour is byte-identical to before.
- Tests: grader (verdict parsing, fail-soft), orchestrator (retry/merge/dedupe,
  budget exhaustion, budget clamp, re-query errors), search client (rendering,
  doc-id extraction, fetch), trace-span planner, connector hook helpers.
---
 .env.example                                  |   6 +
 CLAUDE.md                                     |  18 ++
 package.json                                  |   4 +-
 src/ai/CLAUDE.md                              |   6 +-
 src/ai/connectors/copilot-sdk.ts              | 158 +++++++++-
 src/ai/connectors/corrective-hook.test.ts     | 139 +++++++++
 src/ai/corrective-config.test.ts              |  48 ++++
 src/ai/corrective-config.ts                   |  46 +++
 src/ai/corrective-retrieval.test.ts           | 225 +++++++++++++++
 src/ai/corrective-retrieval.ts                | 240 ++++++++++++++++
 src/ai/huginn-trace-pointer.ts                |  29 ++
 src/ai/knowledge-grader.test.ts               |  78 +++++
 src/ai/knowledge-grader.ts                    | 150 ++++++++++
 src/ai/knowledge-search-client.test.ts        | 138 +++++++++
 src/ai/knowledge-search-client.ts             | 271 ++++++++++++++++++
 src/ai/tool-status.ts                         |  10 +
 src/bots/config.ts                            |  13 +-
 src/core/corrective-trace-spans.test.ts       |  63 ++++
 src/core/corrective-trace-spans.ts            |  94 ++++++
 src/core/tool-spans.ts                        |  40 ++-
 src/dashboard/views/components/span-label.ts  |  53 +++-
 .../views/components/traces-waterfall.ts      |  18 ++
 src/types.ts                                  |  26 ++
 23 files changed, 1851 insertions(+), 22 deletions(-)
 create mode 100644 src/ai/connectors/corrective-hook.test.ts
 create mode 100644 src/ai/corrective-config.test.ts
 create mode 100644 src/ai/corrective-config.ts
 create mode 100644 src/ai/corrective-retrieval.test.ts
 create mode 100644 src/ai/corrective-retrieval.ts
 create mode 100644 src/ai/knowledge-grader.test.ts
 create mode 100644 src/ai/knowledge-grader.ts
 create mode 100644 src/ai/knowledge-search-client.test.ts
 create mode 100644 src/ai/knowledge-search-client.ts
 create mode 100644 src/core/corrective-trace-spans.test.ts
 create mode 100644 src/core/corrective-trace-spans.ts

diff --git a/.env.example b/.env.example
index 714031e..cc5d2c4 100644
--- a/.env.example
+++ b/.env.example
@@ -34,3 +34,9 @@ WHISPER_MODEL_PATH=./models/ggml-base.en.bin
 # TRACING_ENABLED=true
 # TRACING_RETENTION_DAYS=7
 # PROMPT_SNAPSHOTS_RETENTION_DAYS=3
+
+# Corrective retrieval (CRAG-lite) around the knowledge search tool — off by
+# default. Opt in per-bot via config.json `correctiveRetrieval`, or globally here.
+# CORRECTIVE_RETRIEVAL_ENABLED=true
+# CORRECTIVE_RETRIEVAL_BUDGET=1          # max corrective re-queries per search (1–2)
+# CORRECTIVE_RETRIEVAL_DISABLED=1        # hard kill-switch — overrides per-bot config
diff --git a/CLAUDE.md b/CLAUDE.md
index 7142b4e..ca9e53e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -217,6 +217,7 @@ All fields are optional — falls back to global `.env` values:
 | `showWaterfall` | boolean | `true` | Show request progress waterfall overlay in web chat |
 | `contextWindow` | number | — | Context window size in tokens (e.g. `32768`). Shown as usage in web chat and percentage in Telegram footer |
 | `prompts` | object | — | Configurable prompts: `jiraAnalysis` (Jira research instruction, content appended automatically), `investigateCode` (follow-up code investigation prompt) |
+| `correctiveRetrieval` | object | off | CRAG-lite corrective loop around the knowledge search tool — `{ enabled?: boolean, retryBudget?: 1\|2 }`. Only the `copilot-sdk` connector honours it; off by default. See "Corrective retrieval" below. |
 
 ### Database
 
@@ -253,6 +254,9 @@ PostgreSQL + pgvector via Docker (single container).
 | `SLACK_APP_TOKEN_<NAME>` | No | — | Slack app-level token (per bot) |
 | `SLACK_ALLOWED_USER_IDS_<NAME>` | No | — | Comma-separated Slack user IDs |
 | `LOG_DIR` | No | `./logs` | Log file directory (set `none` to disable file logging) |
+| `CORRECTIVE_RETRIEVAL_ENABLED` | No | `false` | Global default for the CRAG-lite corrective loop (per-bot `correctiveRetrieval.enabled` overrides) |
+| `CORRECTIVE_RETRIEVAL_BUDGET` | No | `1` | Default max corrective re-queries per knowledge search (clamped to 1–2) |
+| `CORRECTIVE_RETRIEVAL_DISABLED` | No | — | Set to `1` to hard-disable corrective retrieval everywhere, regardless of per-bot config |
 | `GOAL_CHECK_INTERVAL_MS` | No | — | Legacy alias for `SCHEDULER_INTERVAL_MS` |
 | `GOAL_CHECK_ENABLED` | No | — | Legacy alias for `SCHEDULER_ENABLED` |
 
@@ -352,6 +356,20 @@ uvx --from "git+https://github.com/oraios/serena" serena project index /path/to/
 | `src/dashboard/views/serena-page.ts` | Dashboard UI for managing instances |
 | `src/dashboard/mcp-client.ts` | MCP Debug client — supports both stdio and HTTP servers |
 
+## Corrective Retrieval (CRAG-lite)
+
+A CRAG-style "grade the search results, re-query if they're weak" loop wrapped around Huginn's `search_knowledge` MCP tool. **Off by default**; enable per-bot in `config.json` (`"correctiveRetrieval": { "enabled": true, "retryBudget": 1 }`), globally via `CORRECTIVE_RETRIEVAL_ENABLED=true`, or hard-disable everywhere with `CORRECTIVE_RETRIEVAL_DISABLED=1`.
+
+How it works (copilot-sdk connector only):
+1. The connector registers a Copilot SDK `onPostToolUse` hook. When a bot calls `search_knowledge`, the hook intercepts the result before the model sees it.
+2. `src/ai/knowledge-grader.ts` — a dedicated **awaiting** Haiku call grades the results (`correct` / `ambiguous` / `insufficient`) and, if weak, proposes a rewritten query and/or a better collection. Fail-soft: any Haiku error → `correct` (no change).
+3. `src/ai/corrective-retrieval.ts` — if not `correct` and the retry budget (1, configurable to 2) isn't spent, re-queries Huginn's `/api/search` (`src/ai/knowledge-search-client.ts`) with `rerank=true`, merges the fresh hits into the original result text (deduped by `collection/doc_id` parsed from the rendered output), and appends an inline note. Never recursive.
+4. Traces: `knowledge_grade` + `knowledge_requery` spans synthesized under the tool span (`src/core/corrective-trace-spans.ts`), rendered in the dashboard waterfall with a corrective chip on the parent tool span.
+
+**Connector asymmetry:** Claude-CLI bots run the MCP tool inside their own process, so the result can't be intercepted — they get nothing here (Phase 3 will add prompt-level corrective guidance instead). When the toggle is off, the hook isn't registered and behaviour is byte-identical to before.
+
+Key files: `src/ai/knowledge-grader.ts`, `src/ai/corrective-retrieval.ts`, `src/ai/knowledge-search-client.ts`, `src/ai/corrective-config.ts`, `src/core/corrective-trace-spans.ts`, hook wiring in `src/ai/connectors/copilot-sdk.ts`.
+
 ## Slack Bot
 When implementing Slack bot features, be aware of the different message contexts (DMs, threads, channels, Assistant API) — each has different API constraints and capabilities. Check Slack app configuration settings (like 'Agent or Assistant' toggle) as a potential root cause before writing code fixes.
 
diff --git a/package.json b/package.json
index 93559d4..0573af9 100644
--- a/package.json
+++ b/package.json
@@ -12,8 +12,8 @@
     "cleanup": "bun run scripts/cleanup-stale-mcp.ts",
     "cleanup:kill": "bun run scripts/cleanup-stale-mcp.ts --kill",
     "typecheck": "tsc --noEmit",
-    "test": "bun test src/utils/ src/bot/telegram-format.test.ts src/bot/topic-commands.test.ts src/slack/slack-format.test.ts src/ai/result-parser.test.ts src/ai/stream-parser.test.ts src/ai/tool-restrictions.test.ts src/ai/knowledge-search.test.ts src/ai/mcp-status.test.ts src/ai/huginn-trace.test.ts src/db/ src/core/topic-commands.test.ts src/core/mcp-env-snapshot.test.ts src/core/tool-spans.test.ts src/core/process-error.test.ts src/core/search-trace-spans.test.ts src/chat/state.test.ts src/chat/chat-config.test.ts src/chat/views/components/ src/dashboard/routes/route-utils.test.ts src/startup/adapter-audit.test.ts src/voice/tts.test.ts && bun test src/scheduler/executor.test.ts && bun test src/core/message-processor.test.ts && bun test src/ai/prompt-builder.test.ts src/ai/executor.test.ts src/bot/handler.test.ts src/bot/middleware.test.ts src/slack/handler.test.ts src/memory/ src/scheduler/detector.test.ts src/scheduler/briefing-prompt.test.ts src/watchers/ src/goals/detector.test.ts src/dashboard/agent-status.test.ts src/dashboard/activity-log.test.ts src/dashboard/views/components/ && bun test src/tracing/tracer.test.ts",
-    "test:unit": "bun test src/utils/ src/ai/result-parser.test.ts src/ai/stream-parser.test.ts src/ai/tool-restrictions.test.ts src/ai/knowledge-search.test.ts src/ai/mcp-status.test.ts src/ai/huginn-trace.test.ts src/slack/slack-format.test.ts src/bot/telegram-format.test.ts src/bot/topic-commands.test.ts src/bots/config.test.ts src/chat/views/components/ src/dashboard/routes/route-utils.test.ts src/dashboard/agent-status.test.ts src/dashboard/activity-log.test.ts src/dashboard/views/components/ src/watchers/runner.test.ts src/goals/detector.test.ts src/startup/adapter-audit.test.ts src/core/mcp-env-snapshot.test.ts src/core/tool-spans.test.ts src/core/process-error.test.ts src/core/search-trace-spans.test.ts && bun test src/tracing/tracer.test.ts",
+    "test": "bun test src/utils/ src/bot/telegram-format.test.ts src/bot/topic-commands.test.ts src/slack/slack-format.test.ts src/ai/result-parser.test.ts src/ai/stream-parser.test.ts src/ai/tool-restrictions.test.ts src/ai/knowledge-search.test.ts src/ai/knowledge-search-client.test.ts src/ai/knowledge-grader.test.ts src/ai/corrective-retrieval.test.ts src/ai/corrective-config.test.ts src/ai/connectors/corrective-hook.test.ts src/ai/mcp-status.test.ts src/ai/huginn-trace.test.ts src/db/ src/core/topic-commands.test.ts src/core/mcp-env-snapshot.test.ts src/core/tool-spans.test.ts src/core/process-error.test.ts src/core/search-trace-spans.test.ts src/core/corrective-trace-spans.test.ts src/chat/state.test.ts src/chat/chat-config.test.ts src/chat/views/components/ src/dashboard/routes/route-utils.test.ts src/startup/adapter-audit.test.ts src/voice/tts.test.ts && bun test src/scheduler/executor.test.ts && bun test src/core/message-processor.test.ts && bun test src/ai/prompt-builder.test.ts src/ai/executor.test.ts src/bot/handler.test.ts src/bot/middleware.test.ts src/slack/handler.test.ts src/memory/ src/scheduler/detector.test.ts src/scheduler/briefing-prompt.test.ts src/watchers/ src/goals/detector.test.ts src/dashboard/agent-status.test.ts src/dashboard/activity-log.test.ts src/dashboard/views/components/ && bun test src/tracing/tracer.test.ts",
+    "test:unit": "bun test src/utils/ src/ai/result-parser.test.ts src/ai/stream-parser.test.ts src/ai/tool-restrictions.test.ts src/ai/knowledge-search.test.ts src/ai/knowledge-search-client.test.ts src/ai/knowledge-grader.test.ts src/ai/corrective-retrieval.test.ts src/ai/corrective-config.test.ts src/ai/connectors/corrective-hook.test.ts src/ai/mcp-status.test.ts src/ai/huginn-trace.test.ts src/slack/slack-format.test.ts src/bot/telegram-format.test.ts src/bot/topic-commands.test.ts src/bots/config.test.ts src/chat/views/components/ src/dashboard/routes/route-utils.test.ts src/dashboard/agent-status.test.ts src/dashboard/activity-log.test.ts src/dashboard/views/components/ src/watchers/runner.test.ts src/goals/detector.test.ts src/startup/adapter-audit.test.ts src/core/mcp-env-snapshot.test.ts src/core/tool-spans.test.ts src/core/process-error.test.ts src/core/search-trace-spans.test.ts src/core/corrective-trace-spans.test.ts && bun test src/tracing/tracer.test.ts",
     "test:db": "bun test src/db/",
     "test:handlers": "bun test src/core/message-processor.test.ts src/ai/prompt-builder.test.ts src/ai/executor.test.ts src/bot/handler.test.ts src/bot/middleware.test.ts src/slack/handler.test.ts src/memory/ src/scheduler/detector.test.ts src/scheduler/briefing-prompt.test.ts src/watchers/ src/goals/detector.test.ts src/chat/state.test.ts src/voice/tts.test.ts",
     "test:integration": "bun test src/chat/integration.test.ts",
diff --git a/src/ai/CLAUDE.md b/src/ai/CLAUDE.md
index a614f54..04b2d42 100644
--- a/src/ai/CLAUDE.md
+++ b/src/ai/CLAUDE.md
@@ -15,7 +15,11 @@
 | `json-extract.ts` | Extract JSON objects from mixed text output |
 | `haiku-extraction.ts` | Shared Haiku executor for async extraction tasks (memories, goals, tasks) |
 | `huginn-trace.ts` | Inline-fence Huginn trace handling (legacy mode) — `parseHuginnTrace`, `extractMcpResultText`, oversized-CLI-divert recovery |
-| `huginn-trace-pointer.ts` | Phase 2 out-of-band trace channel — parses `huginn-trace-url:` line and fetches the trace from Huginn's `/api/trace/<id>` endpoint. Preferred when `HUGINN_TRACE_POINTER=1` is set on Huginn. Also exports `processMcpToolResult()` — the unwrap → peel → fetch pipeline connectors run on every tool result |
+| `huginn-trace-pointer.ts` | Phase 2 out-of-band trace channel — parses `huginn-trace-url:` line and fetches the trace from Huginn's `/api/trace/<id>` endpoint. Preferred when `HUGINN_TRACE_POINTER=1` is set on Huginn. Also exports `processMcpToolResult()` — the unwrap → peel → fetch pipeline connectors run on every tool result — and `peelTraceMarkerForRewrite()` for connectors that rewrite a tool result and need to re-append the trace marker |
+| `knowledge-grader.ts` | CRAG-lite retrieval evaluator — an awaiting Haiku call that grades knowledge-search results (`correct`/`ambiguous`/`insufficient`) and proposes a rewritten query/collection. Fail-soft to `correct`. |
+| `corrective-retrieval.ts` | Corrective grade-and-requery orchestrator — `runCorrectiveRetrieval()`: grade → bounded re-query Huginn → merge+dedupe → consolidated text + `corrective` metadata. ≤1 retry (configurable to 2), non-recursive. |
+| `knowledge-search-client.ts` | HTTP client for Huginn's `/api/search` + a renderer mirroring the MCP adapter's result format, used by the corrective re-query path. |
+| `corrective-config.ts` | Resolves the per-bot corrective-retrieval toggle + retry budget (kill-switch > per-bot config.json > global env defaults). |
 | `connectors/` | Three connector implementations (see below) |
 
 ## Connector Abstraction
diff --git a/src/ai/connectors/copilot-sdk.ts b/src/ai/connectors/copilot-sdk.ts
index 1b05ed5..72fe97d 100644
--- a/src/ai/connectors/copilot-sdk.ts
+++ b/src/ai/connectors/copilot-sdk.ts
@@ -1,17 +1,20 @@
-import { CopilotClient, approveAll, type SessionEvent, type CustomAgentConfig } from "@github/copilot-sdk";
+import { CopilotClient, approveAll, type SessionEvent, type SessionConfig, type CustomAgentConfig, type ToolResultObject } from "@github/copilot-sdk";
 import type { Config } from "../../config.ts";
 import type { BotConfig } from "../../bots/config.ts";
 import type { ClaudeExecResult } from "../executor.ts";
 import type { StreamProgressCallback } from "../stream-parser.ts";
 import { formatToolDisplayName, isReportIntentTool, extractIntentText } from "../stream-parser.ts";
 import { truncateOutput } from "../truncate-output.ts";
-import { processMcpToolResult } from "../huginn-trace-pointer.ts";
-import type { ToolCall } from "../../types.ts";
+import { processMcpToolResult, peelTraceMarkerForRewrite } from "../huginn-trace-pointer.ts";
+import type { CorrectiveToolMeta, ToolCall } from "../../types.ts";
 import { parseMcpConfig } from "./copilot-mcp.ts";
 import { preflightMcpForRequest } from "../mcp-status.ts";
 import { getLog } from "../../logging.ts";
 import { resolve } from "node:path";
 import { discoverSerenaConfigs } from "../../serena/config.ts";
+import { isKnowledgeSearchTool } from "../tool-status.ts";
+import { resolveCorrectiveConfig } from "../corrective-config.ts";
+import { runCorrectiveRetrieval, type CorrectiveMetadata, type CorrectiveRetrievalContext } from "../corrective-retrieval.ts";
 
 const log = getLog("ai", "copilot-sdk");
 
@@ -80,6 +83,43 @@ export async function executePrompt(
   // Build custom subagents (e.g. verify-code for grep/diff verification)
   const customAgents = buildCustomAgents(botConfig);
 
+  // Corrective retrieval (CRAG-lite): when enabled for this bot, an onPostToolUse
+  // hook grades each knowledge-search result with Haiku and, if it's weak, does a
+  // bounded re-query — splicing the fresh hits into the result before the model
+  // sees it. Off by default (see src/ai/corrective-config.ts); when off, the hook
+  // isn't registered at all and behaviour is byte-identical to before.
+  const correctiveCfg = resolveCorrectiveConfig(botConfig);
+  const correctiveOutcomes: CorrectiveMetadata[] = [];
+  const correctiveEnabled = correctiveCfg.enabled && hasMcp;
+  const userQuestion = correctiveEnabled ? extractUserQuestion(prompt) : "";
+  const correctiveHooks: SessionConfig["hooks"] | undefined = correctiveEnabled
+    ? {
+        onPostToolUse: async (input) => {
+          if (!isKnowledgeSearchTool(input.toolName)) return;
+          try {
+            const result = await applyCorrectiveRetrieval({
+              toolName: input.toolName,
+              toolArgs: input.toolArgs,
+              toolResult: input.toolResult,
+              botConfig,
+              budget: correctiveCfg.retryBudget,
+              userQuestion,
+            });
+            if (result) {
+              correctiveOutcomes.push(result.metadata);
+              if (result.modifiedResult) return { modifiedResult: result.modifiedResult };
+            }
+          } catch (e) {
+            log.warn("Corrective retrieval hook failed: {error}", {
+              botName: botConfig.name,
+              error: e instanceof Error ? e.message : String(e),
+            });
+          }
+          return;
+        },
+      }
+    : undefined;
+
   // Create session per request (system prompt is dynamic — memories, goals, history change per message)
   const session = await cl.createSession({
     model,
@@ -92,6 +132,7 @@ export async function executePrompt(
     ...(hasMcp ? { mcpServers } : {}),
     ...(customAgents.length > 0 ? { customAgents } : {}),
     ...(botConfig.excludedTools?.length ? { excludedTools: botConfig.excludedTools } : {}),
+    ...(correctiveHooks ? { hooks: correctiveHooks } : {}),
   });
 
   // Track tool calls for waterfall
@@ -259,6 +300,11 @@ export async function executePrompt(
     const wallClockMs = performance.now() - wallStart;
     const content = response?.data?.content ?? "";
 
+    // Attach corrective-retrieval metadata to the matching knowledge-search tool
+    // calls so the traces waterfall can synthesize knowledge_grade / knowledge_requery
+    // spans (onPostToolUse gives no toolCallId, so this matches by tool order).
+    if (correctiveOutcomes.length > 0) attachCorrectiveOutcomes(toolCalls, correctiveOutcomes);
+
     return {
       result: content,
       costUsd: 0, // Copilot SDK doesn't report cost (subscription model)
@@ -332,3 +378,109 @@ function abbreviateInput(args: unknown): string | undefined {
   const json = JSON.stringify(args);
   return json.length > 500 ? json.slice(0, 500) + "…" : json;
 }
+
+// ── Corrective retrieval (CRAG-lite) helpers ───────────────────────────────
+
+export interface ApplyCorrectiveArgs {
+  toolName: string;
+  toolArgs: unknown;
+  toolResult: ToolResultObject;
+  botConfig: Pick<BotConfig, "name" | "dir">;
+  budget: number;
+  userQuestion: string;
+  /** Injectable for tests — forwarded to {@link runCorrectiveRetrieval}. */
+  searchFn?: CorrectiveRetrievalContext["searchFn"];
+  gradeFn?: CorrectiveRetrievalContext["gradeFn"];
+}
+
+/**
+ * Run the corrective grade-and-requery pass on a knowledge-search tool result.
+ * Returns `null` when there's nothing to act on (empty result, tool error);
+ * otherwise always returns the `metadata` (for tracing) and, when results were
+ * merged in, a `modifiedResult` to hand back to the model. The trailing Huginn
+ * trace marker, if any, is peeled off the body before splicing and re-appended
+ * after, so downstream trace extraction is unaffected.
+ */
+export async function applyCorrectiveRetrieval(
+  args: ApplyCorrectiveArgs,
+): Promise<{ modifiedResult?: ToolResultObject; metadata: CorrectiveMetadata } | null> {
+  const { toolResult, toolArgs, botConfig, budget, userQuestion } = args;
+  const originalText = toolResult?.textResultForLlm;
+  if (typeof originalText !== "string" || originalText.length === 0) return null;
+  // Tool errors (server down, bad collection) carry an `error` field — don't
+  // grade those; the model handles the error itself.
+  if (toolResult.resultType && toolResult.resultType !== "success") return null;
+
+  const { body, remainder } = peelTraceMarkerForRewrite(originalText);
+
+  const argObj = toolArgs && typeof toolArgs === "object" ? (toolArgs as Record<string, unknown>) : {};
+  const originalQuery = typeof argObj.query === "string" ? argObj.query.trim() : "";
+  const originalCollections = normalizeCollections(argObj.collection);
+
+  const outcome = await runCorrectiveRetrieval({
+    question: userQuestion || originalQuery,
+    originalQuery,
+    originalCollections,
+    originalResultText: body,
+    budget,
+    botName: botConfig.name,
+    cwd: botConfig.dir,
+    log,
+    graderTimeoutMs: 30_000,
+    searchFn: args.searchFn,
+    gradeFn: args.gradeFn,
+  });
+
+  if (!outcome.changed) return { metadata: outcome.metadata };
+
+  return {
+    metadata: outcome.metadata,
+    modifiedResult: { ...toolResult, textResultForLlm: outcome.text + remainder },
+  };
+}
+
+function normalizeCollections(v: unknown): string[] | undefined {
+  if (typeof v === "string" && v.trim()) return [v.trim()];
+  if (Array.isArray(v)) {
+    const arr = v.filter((x): x is string => typeof x === "string" && x.trim().length > 0);
+    return arr.length > 0 ? arr : undefined;
+  }
+  return undefined;
+}
+
+/** Pull the current user turn out of the assembled prompt for grading. The
+ *  prompt-builder puts history in a `<conversation_history>` block followed by
+ *  the current message, so everything after the last close tag is the turn.
+ *  Capped so the grader prompt stays cheap. */
+export function extractUserQuestion(prompt: string): string {
+  const closeTag = "</conversation_history>";
+  const idx = prompt.lastIndexOf(closeTag);
+  const tail = idx !== -1 ? prompt.slice(idx + closeTag.length) : prompt;
+  const trimmed = tail.trim();
+  return trimmed.length > 1500 ? trimmed.slice(-1500).trim() : trimmed;
+}
+
+/** Attach corrective outcomes to the knowledge-search tool calls in order
+ *  (onPostToolUse exposes no toolCallId, so the i-th outcome maps to the i-th
+ *  knowledge-search tool call). */
+export function attachCorrectiveOutcomes(toolCalls: ToolCall[], outcomes: CorrectiveMetadata[]): void {
+  let i = 0;
+  for (const tc of toolCalls) {
+    if (i >= outcomes.length) break;
+    if (!isKnowledgeSearchTool(tc.name)) continue;
+    tc.corrective = correctiveMetaToToolMeta(outcomes[i++]!);
+  }
+}
+
+function correctiveMetaToToolMeta(m: CorrectiveMetadata): CorrectiveToolMeta {
+  return {
+    retries: m.retries,
+    verdicts: m.verdicts,
+    reasons: m.reasons,
+    queriesTried: m.queriesTried,
+    collectionsTried: m.collectionsTried.map((c) => c ?? null),
+    finalVerdict: m.finalVerdict,
+    graderMs: m.graderMs,
+    requeryMs: m.requeryMs,
+  };
+}
diff --git a/src/ai/connectors/corrective-hook.test.ts b/src/ai/connectors/corrective-hook.test.ts
new file mode 100644
index 0000000..56b66c1
--- /dev/null
+++ b/src/ai/connectors/corrective-hook.test.ts
@@ -0,0 +1,139 @@
+import { test, expect, describe } from "bun:test";
+import { applyCorrectiveRetrieval, extractUserQuestion, attachCorrectiveOutcomes } from "./copilot-sdk.ts";
+import type { CorrectiveMetadata } from "../corrective-retrieval.ts";
+import type { ToolCall } from "../../types.ts";
+import type { KnowledgeGrade } from "../knowledge-grader.ts";
+
+describe("extractUserQuestion", () => {
+  test("returns the current turn after the conversation_history block", () => {
+    const prompt = "<conversation_history>\nuser: hi\nassistant: hello\n</conversation_history>\n\nWhat SEDs belong to LA_BUC_02?";
+    expect(extractUserQuestion(prompt)).toBe("What SEDs belong to LA_BUC_02?");
+  });
+
+  test("returns the whole prompt when there's no history block", () => {
+    expect(extractUserQuestion("just a question")).toBe("just a question");
+  });
+
+  test("caps very long tails to the last 1500 chars", () => {
+    const long = "x".repeat(5000);
+    const out = extractUserQuestion(long);
+    expect(out.length).toBe(1500);
+  });
+});
+
+describe("attachCorrectiveOutcomes", () => {
+  function tc(name: string): ToolCall {
+    return { id: name, name, displayName: name, durationMs: 1, startOffsetMs: 0 };
+  }
+  function meta(finalVerdict: string): CorrectiveMetadata {
+    return { retries: 1, verdicts: ["insufficient", finalVerdict] as KnowledgeGrade["verdict"][], reasons: ["x", "y"], queriesTried: ["q"], collectionsTried: [undefined], finalVerdict: finalVerdict as KnowledgeGrade["verdict"], graderMs: 100, requeryMs: [50] };
+  }
+
+  test("maps the i-th outcome to the i-th knowledge-search tool call, skipping others", () => {
+    const calls = [tc("knowledge-search_knowledge"), tc("yggdrasil-symbol_context"), tc("knowledge-search_knowledge")];
+    attachCorrectiveOutcomes(calls, [meta("correct"), meta("ambiguous")]);
+    expect(calls[0]!.corrective?.finalVerdict).toBe("correct");
+    expect(calls[1]!.corrective).toBeUndefined();
+    expect(calls[2]!.corrective?.finalVerdict).toBe("ambiguous");
+    expect(calls[2]!.corrective?.collectionsTried).toEqual([null]);
+  });
+
+  test("no-op when there are no outcomes", () => {
+    const calls = [tc("knowledge-search_knowledge")];
+    attachCorrectiveOutcomes(calls, []);
+    expect(calls[0]!.corrective).toBeUndefined();
+  });
+});
+
+describe("applyCorrectiveRetrieval", () => {
+  const botConfig = { name: "test", dir: "/tmp/test-bot" };
+  const okGrade: KnowledgeGrade = { verdict: "correct", reason: "covered" };
+
+  function grader(...grades: KnowledgeGrade[]) {
+    let i = 0;
+    return async () => grades[Math.min(i++, grades.length - 1)]!;
+  }
+
+  test("returns null for a tool error result", async () => {
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "x" },
+      toolResult: { textResultForLlm: "Knowledge API server is not running", resultType: "failure" },
+      botConfig,
+      budget: 1,
+      userQuestion: "q",
+      gradeFn: grader(okGrade),
+      searchFn: async () => ({ results: [] }),
+    });
+    expect(out).toBeNull();
+  });
+
+  test("returns null for an empty result", async () => {
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "x" },
+      toolResult: { textResultForLlm: "", resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "q",
+      gradeFn: grader(okGrade),
+      searchFn: async () => ({ results: [] }),
+    });
+    expect(out).toBeNull();
+  });
+
+  test("verdict 'correct' → metadata only, no modifiedResult", async () => {
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "x", collection: "wiki" },
+      toolResult: { textResultForLlm: "## Doc (80% relevant · high)\ncollection: `wiki` doc_id: `1`\n\nbody", resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "q",
+      gradeFn: grader(okGrade),
+      searchFn: async () => ({ results: [] }),
+    });
+    expect(out).not.toBeNull();
+    expect(out!.modifiedResult).toBeUndefined();
+    expect(out!.metadata.retries).toBe(0);
+  });
+
+  test("low-confidence result → exactly one re-query, merged, trace fence preserved at the end", async () => {
+    const original =
+      "## Old doc (15% relevant · low)\ncollection: `wiki` doc_id: `1`\n\nweak body\n\n```huginn-trace\n{\"schemaVersion\":1,\"totalMs\":42}\n```";
+    let searchCalls = 0;
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "LA_BUC_02", collection: "wiki" },
+      toolResult: { textResultForLlm: original, resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "what SEDs belong to LA_BUC_02?",
+      gradeFn: grader(
+        { verdict: "insufficient", rewrittenQuery: "LA_BUC_02 structured electronic documents", reason: "off-topic" },
+        { verdict: "correct", reason: "now covered" },
+      ),
+      searchFn: async (query: string) => {
+        searchCalls++;
+        expect(query).toBe("LA_BUC_02 structured electronic documents");
+        return {
+          results: [
+            { collection: "wiki", id: "1", title: "Old doc", relevance: 0.7, confidenceBand: "high", matchedChunks: [{ content: "x" }] }, // dupe
+            { collection: "wiki", id: "2", title: "Right doc", relevance: 0.8, confidenceBand: "high", matchedChunks: [{ content: "the answer" }] },
+          ],
+        };
+      },
+    });
+    expect(searchCalls).toBe(1);
+    expect(out).not.toBeNull();
+    expect(out!.modifiedResult).toBeDefined();
+    const text = out!.modifiedResult!.textResultForLlm;
+    expect(text).toContain("Old doc");
+    expect(text).toContain("Right doc");
+    expect(text).toContain("[corrective retrieval — re-query #1");
+    expect(text.match(/doc_id: `1`/g)?.length).toBe(1); // dupe dropped
+    expect(text.trimEnd().endsWith("```")).toBe(true); // trace fence re-appended at the very end
+    expect(text).toContain("\"schemaVersion\":1");
+    expect(out!.metadata.queriesTried).toEqual(["LA_BUC_02 structured electronic documents"]);
+  });
+});
diff --git a/src/ai/corrective-config.test.ts b/src/ai/corrective-config.test.ts
new file mode 100644
index 0000000..b3996fa
--- /dev/null
+++ b/src/ai/corrective-config.test.ts
@@ -0,0 +1,48 @@
+import { test, expect, describe } from "bun:test";
+import { resolveCorrectiveConfig, clampBudget } from "./corrective-config.ts";
+
+describe("clampBudget", () => {
+  test("clamps to the 1–2 range and floors", () => {
+    expect(clampBudget(0)).toBe(1);
+    expect(clampBudget(1)).toBe(1);
+    expect(clampBudget(2)).toBe(2);
+    expect(clampBudget(5)).toBe(2);
+    expect(clampBudget(1.9)).toBe(1);
+    expect(clampBudget(NaN)).toBe(1);
+  });
+});
+
+describe("resolveCorrectiveConfig", () => {
+  test("off by default when nothing is configured", () => {
+    expect(resolveCorrectiveConfig({}, {})).toEqual({ enabled: false, retryBudget: 1 });
+  });
+
+  test("per-bot config enables it and clamps the budget", () => {
+    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: true, retryBudget: 9 } }, {})).toEqual({
+      enabled: true,
+      retryBudget: 2,
+    });
+  });
+
+  test("global env default enables it when the bot doesn't say otherwise", () => {
+    const env = { CORRECTIVE_RETRIEVAL_ENABLED: "true", CORRECTIVE_RETRIEVAL_BUDGET: "2" };
+    expect(resolveCorrectiveConfig({}, env)).toEqual({ enabled: true, retryBudget: 2 });
+  });
+
+  test("per-bot config overrides the global default (disable wins too)", () => {
+    const env = { CORRECTIVE_RETRIEVAL_ENABLED: "true" };
+    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: false } }, env).enabled).toBe(false);
+  });
+
+  test("kill-switch overrides everything", () => {
+    const env = { CORRECTIVE_RETRIEVAL_DISABLED: "1", CORRECTIVE_RETRIEVAL_ENABLED: "true" };
+    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: true, retryBudget: 2 } }, env)).toEqual({
+      enabled: false,
+      retryBudget: 1,
+    });
+  });
+
+  test("a bare global enable defaults the budget to 1", () => {
+    expect(resolveCorrectiveConfig({}, { CORRECTIVE_RETRIEVAL_ENABLED: "true" })).toEqual({ enabled: true, retryBudget: 1 });
+  });
+});
diff --git a/src/ai/corrective-config.ts b/src/ai/corrective-config.ts
new file mode 100644
index 0000000..765cf1b
--- /dev/null
+++ b/src/ai/corrective-config.ts
@@ -0,0 +1,46 @@
+import type { BotConfig } from "../bots/config.ts";
+
+/**
+ * Resolved per-bot corrective-retrieval settings. Precedence:
+ *   1. `CORRECTIVE_RETRIEVAL_DISABLED=1` (hard kill-switch) → always off.
+ *   2. The bot's `config.json` `correctiveRetrieval` block.
+ *   3. The global env defaults (`CORRECTIVE_RETRIEVAL_ENABLED` /
+ *      `CORRECTIVE_RETRIEVAL_BUDGET`).
+ * `retryBudget` is clamped to 1–2 regardless of source.
+ *
+ * Reads `process.env` directly (rather than going through `loadConfig()`) so it
+ * has no hard `DATABASE_URL` dependency and behaves the same in tests.
+ */
+export interface ResolvedCorrectiveConfig {
+  enabled: boolean;
+  /** Max corrective re-queries per knowledge search (1 or 2). */
+  retryBudget: number;
+}
+
+export function resolveCorrectiveConfig(
+  botConfig: Pick<BotConfig, "correctiveRetrieval">,
+  env: NodeJS.ProcessEnv = process.env,
+): ResolvedCorrectiveConfig {
+  if (env.CORRECTIVE_RETRIEVAL_DISABLED === "1") {
+    return { enabled: false, retryBudget: 1 };
+  }
+
+  const bot = botConfig.correctiveRetrieval;
+  const globalEnabled = env.CORRECTIVE_RETRIEVAL_ENABLED === "true";
+  const enabled = bot?.enabled ?? globalEnabled;
+
+  const globalBudget = parseBudgetEnv(env.CORRECTIVE_RETRIEVAL_BUDGET);
+  const rawBudget = bot?.retryBudget ?? globalBudget ?? 1;
+  return { enabled, retryBudget: clampBudget(rawBudget) };
+}
+
+export function clampBudget(n: number): number {
+  if (!Number.isFinite(n)) return 1;
+  return Math.max(1, Math.min(2, Math.floor(n)));
+}
+
+function parseBudgetEnv(raw: string | undefined): number | undefined {
+  if (!raw) return undefined;
+  const n = parseInt(raw, 10);
+  return Number.isNaN(n) ? undefined : n;
+}
diff --git a/src/ai/corrective-retrieval.test.ts b/src/ai/corrective-retrieval.test.ts
new file mode 100644
index 0000000..3501f84
--- /dev/null
+++ b/src/ai/corrective-retrieval.test.ts
@@ -0,0 +1,225 @@
+import { test, expect, describe } from "bun:test";
+import { runCorrectiveRetrieval } from "./corrective-retrieval.ts";
+import type { KnowledgeGrade } from "./knowledge-grader.ts";
+import type { KnowledgeSearchResponse, KnowledgeSearchResult } from "./knowledge-search-client.ts";
+import { renderSearchResults } from "./knowledge-search-client.ts";
+import { getLog } from "../logging.ts";
+
+const log = getLog("test", "corrective-retrieval");
+
+function result(over: Partial<KnowledgeSearchResult> & { id: string; collection: string }): KnowledgeSearchResult {
+  return {
+    title: `Doc ${over.id}`,
+    relevance: 0.7,
+    confidenceBand: "high",
+    matchedChunks: [{ content: `body of ${over.id}` }],
+    ...over,
+  };
+}
+
+function searchResponse(results: KnowledgeSearchResult[], over: Partial<KnowledgeSearchResponse> = {}): KnowledgeSearchResponse {
+  return { results, bestScore: results[0]?.relevance, ...over };
+}
+
+/** A grader stub that returns the given verdicts in sequence (last one repeats). */
+function gradeSequence(...grades: KnowledgeGrade[]) {
+  let i = 0;
+  return async () => grades[Math.min(i++, grades.length - 1)]!;
+}
+
+/** A search stub that returns the given responses in sequence (last one repeats),
+ *  recording the queries it was called with. */
+function searchSequence(...responses: KnowledgeSearchResponse[]) {
+  const calls: { query: string; collections?: string[] }[] = [];
+  let i = 0;
+  const fn = async (query: string, opts?: { collections?: string[] }) => {
+    calls.push({ query, collections: opts?.collections });
+    return responses[Math.min(i++, responses.length - 1)]!;
+  };
+  return Object.assign(fn, { calls });
+}
+
+const baseCtx = {
+  question: "what SEDs belong to LA_BUC_02?",
+  originalQuery: "LA_BUC_02",
+  botName: "test",
+  log,
+};
+
+describe("runCorrectiveRetrieval", () => {
+  test("verdict 'correct' → no re-query, text unchanged", async () => {
+    const search = searchSequence(searchResponse([result({ id: "1", collection: "wiki" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: "## Original (80% relevant · high)\ncollection: `wiki` doc_id: `1`\n\nbody",
+      budget: 1,
+      gradeFn: gradeSequence({ verdict: "correct", reason: "covered" }),
+      searchFn: search,
+    });
+    expect(out.changed).toBe(false);
+    expect(out.text).toContain("## Original");
+    expect(out.metadata.retries).toBe(0);
+    expect(out.metadata.verdicts).toEqual(["correct"]);
+    expect(out.metadata.queriesTried).toEqual([]);
+    expect(search.calls.length).toBe(0);
+  });
+
+  test("insufficient → one re-query → merged & deduped, then correct", async () => {
+    const original = renderSearchResults([result({ id: "1", collection: "wiki", title: "Old doc" })]);
+    const search = searchSequence(
+      searchResponse([
+        result({ id: "1", collection: "wiki", title: "Old doc" }), // dupe — must be dropped
+        result({ id: "2", collection: "wiki", title: "Fresh doc" }),
+      ]),
+    );
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: original,
+      budget: 1,
+      gradeFn: gradeSequence(
+        { verdict: "insufficient", rewrittenQuery: "LA_BUC_02 structured electronic documents", reason: "off-topic" },
+        { verdict: "correct", reason: "now covered" },
+      ),
+      searchFn: search,
+    });
+    expect(out.changed).toBe(true);
+    expect(out.text).toContain("Old doc"); // original kept
+    expect(out.text).toContain("Fresh doc"); // fresh appended
+    expect(out.text).toContain("[corrective retrieval — re-query #1");
+    // doc_id `1` appears once (original) — the dupe from the re-query was dropped.
+    expect(out.text.match(/doc_id: `1`/g)?.length).toBe(1);
+    expect(out.metadata.retries).toBe(1);
+    expect(out.metadata.verdicts).toEqual(["insufficient", "correct"]);
+    expect(out.metadata.finalVerdict).toBe("correct");
+    expect(out.metadata.queriesTried).toEqual(["LA_BUC_02 structured electronic documents"]);
+    expect(search.calls[0]?.query).toBe("LA_BUC_02 structured electronic documents");
+  });
+
+  test("budget 1 stops after one re-query even if still insufficient", async () => {
+    const search = searchSequence(searchResponse([result({ id: "9", collection: "wiki", title: "Marginal" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: renderSearchResults([result({ id: "1", collection: "wiki" })]),
+      budget: 1,
+      gradeFn: gradeSequence({ verdict: "insufficient", rewrittenQuery: "broader terms", reason: "weak" }),
+      searchFn: search,
+    });
+    expect(out.metadata.retries).toBe(1);
+    expect(out.metadata.verdicts).toEqual(["insufficient", "insufficient"]);
+    expect(out.metadata.finalVerdict).toBe("insufficient");
+    expect(search.calls.length).toBe(1);
+  });
+
+  test("budget is clamped to 2 even when configured higher", async () => {
+    const search = searchSequence(
+      searchResponse([result({ id: "a", collection: "wiki" })]),
+      searchResponse([result({ id: "b", collection: "wiki" })]),
+      searchResponse([result({ id: "c", collection: "wiki" })]),
+    );
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: renderSearchResults([result({ id: "1", collection: "wiki" })]),
+      budget: 5,
+      gradeFn: gradeSequence(
+        { verdict: "insufficient", rewrittenQuery: "q1", reason: "x" },
+        { verdict: "insufficient", rewrittenQuery: "q2", reason: "x" },
+        { verdict: "insufficient", rewrittenQuery: "q3", reason: "x" },
+      ),
+      searchFn: search,
+    });
+    expect(out.metadata.retries).toBe(2);
+    expect(search.calls.map((c) => c.query)).toEqual(["q1", "q2"]);
+  });
+
+  test("re-query throws → loop stops, original unchanged", async () => {
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: "## Original\ncollection: `wiki` doc_id: `1`",
+      budget: 1,
+      gradeFn: gradeSequence({ verdict: "insufficient", rewrittenQuery: "q", reason: "weak" }),
+      searchFn: async () => { throw new Error("knowledge api down"); },
+    });
+    expect(out.changed).toBe(false);
+    expect(out.metadata.retries).toBe(0);
+    expect(out.metadata.verdicts).toEqual(["insufficient"]);
+  });
+
+  test("re-query returns only duplicates → no append, but retry recorded", async () => {
+    const original = renderSearchResults([result({ id: "1", collection: "wiki" })]);
+    const search = searchSequence(searchResponse([result({ id: "1", collection: "wiki" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: original,
+      budget: 1,
+      gradeFn: gradeSequence({ verdict: "ambiguous", rewrittenQuery: "rephrased", reason: "broad" }),
+      searchFn: search,
+    });
+    expect(out.changed).toBe(false);
+    expect(out.metadata.retries).toBe(1);
+    expect(out.metadata.queriesTried).toEqual(["rephrased"]);
+  });
+
+  test("no rewritten query and no footer hints → no re-query", async () => {
+    const search = searchSequence(searchResponse([result({ id: "x", collection: "wiki" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: "## Original\ncollection: `wiki` doc_id: `1`",
+      budget: 1,
+      gradeFn: gradeSequence({ verdict: "insufficient", reason: "nothing on topic" }),
+      searchFn: search,
+    });
+    expect(out.changed).toBe(false);
+    expect(out.metadata.retries).toBe(0);
+    expect(search.calls.length).toBe(0);
+  });
+
+  test("falls back to broaderQuery parsed from the result footer", async () => {
+    const original =
+      "## Original (12% relevant · low)\ncollection: `wiki` doc_id: `1`\n\n*No confident match — try: broader query: \"LA_BUC concepts\"*";
+    const search = searchSequence(searchResponse([result({ id: "2", collection: "wiki", title: "Wider hit" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: original,
+      budget: 1,
+      gradeFn: gradeSequence(
+        { verdict: "insufficient", reason: "weak" }, // no rewrittenQuery — must use the footer hint
+        { verdict: "correct", reason: "ok" },
+      ),
+      searchFn: search,
+    });
+    expect(search.calls[0]?.query).toBe("LA_BUC concepts");
+    expect(out.text).toContain("Wider hit");
+  });
+
+  test("suggestedCollection redirects the re-query scope", async () => {
+    const search = searchSequence(searchResponse([result({ id: "2", collection: "confluence", title: "Conf doc" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalQuery: "LA_BUC_02",
+      originalCollections: ["wiki"],
+      originalResultText: renderSearchResults([result({ id: "1", collection: "wiki" })]),
+      budget: 1,
+      gradeFn: gradeSequence(
+        { verdict: "ambiguous", rewrittenQuery: "LA_BUC_02 details", suggestedCollection: "confluence", reason: "wrong collection" },
+        { verdict: "correct", reason: "ok" },
+      ),
+      searchFn: search,
+    });
+    expect(search.calls[0]?.collections).toEqual(["confluence"]);
+    expect(out.changed).toBe(true);
+  });
+
+  test("grader unavailable (returns 'correct') → no change", async () => {
+    const search = searchSequence(searchResponse([result({ id: "x", collection: "wiki" })]));
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: "## Original",
+      budget: 1,
+      gradeFn: async () => ({ verdict: "correct", reason: "grader unavailable" }),
+      searchFn: search,
+    });
+    expect(out.changed).toBe(false);
+    expect(out.metadata.retries).toBe(0);
+    expect(search.calls.length).toBe(0);
+  });
+});
diff --git a/src/ai/corrective-retrieval.ts b/src/ai/corrective-retrieval.ts
new file mode 100644
index 0000000..925f7ad
--- /dev/null
+++ b/src/ai/corrective-retrieval.ts
@@ -0,0 +1,240 @@
+import type { Logger } from "@logtape/logtape";
+import { gradeKnowledgeResults, type GradeVerdict, type KnowledgeGrade } from "./knowledge-grader.ts";
+import {
+  searchKnowledge,
+  renderSearchResults,
+  renderRetryHintsFooter,
+  extractDocKeysFromRenderedText,
+  parseQueryHintsFromFooter,
+  docKey,
+  type KnowledgeSearchResponse,
+} from "./knowledge-search-client.ts";
+
+/**
+ * CRAG-lite corrective loop around the knowledge search tool. After a bot's
+ * `search_knowledge` call returns, this:
+ *
+ *   1. Grades the result with Haiku ({@link gradeKnowledgeResults}).
+ *   2. If the verdict is "ambiguous" / "insufficient" and the retry budget
+ *      isn't spent, re-queries Huginn's `/api/search` with the grader's
+ *      rewritten query (falling back to the Phase-0 `retryHints.broaderQuery` /
+ *      `narrowerQuery` parsed from the result footer), optionally redirected to
+ *      a `suggestedCollection`, forcing `rerank=true` so the re-query's
+ *      `confidenceBand`s are trustworthy.
+ *   3. Merges the fresh hits into the original result text — deduped against
+ *      it by `collection/doc_id` — with an inline note explaining the retry.
+ *   4. Optionally re-grades and retries again, up to the (clamped 1–2) budget;
+ *      never recursive.
+ *
+ * Returns the consolidated text to feed the model plus a `corrective` metadata
+ * block for tracing (`{retries, verdicts, reasons, queriesTried, finalVerdict}`).
+ *
+ * Fail-soft throughout: a grader that can't be reached returns "correct" (no
+ * change); a re-query HTTP error ends the loop with whatever's accumulated. The
+ * caller is expected to gate on the per-bot toggle — this function assumes the
+ * feature is enabled and `budget >= 1`.
+ *
+ * Plan: `../mimir/plans/huginn-muninn-corrective-rag.md` (Phase 1).
+ */
+
+export interface CorrectiveMetadata {
+  /** Number of re-queries actually issued (0–budget). */
+  retries: number;
+  /** Grader verdict from each grading pass, in order (length = retries + 1). */
+  verdicts: GradeVerdict[];
+  /** Grader reason from each grading pass, parallel to `verdicts`. */
+  reasons: string[];
+  /** The re-query strings actually issued (excludes the original query). */
+  queriesTried: string[];
+  /** Collections each re-query was scoped to (parallel to `queriesTried`);
+   *  `undefined` entry = searched all collections. */
+  collectionsTried: (string[] | undefined)[];
+  /** The verdict from the last grading pass — i.e. whether the corrective
+   *  pass left the result set in good shape. */
+  finalVerdict: GradeVerdict;
+  /** Total wall time spent in the Haiku grader across all passes, ms. */
+  graderMs: number;
+  /** Wall time of each re-query HTTP call, parallel to `queriesTried`, ms. */
+  requeryMs: number[];
+}
+
+export interface CorrectiveOutcome {
+  /** Tool-result text to feed back to the model. Equal to `originalResultText`
+   *  when nothing changed. */
+  text: string;
+  /** True when `text` differs from `originalResultText` (i.e. results were
+   *  merged in). */
+  changed: boolean;
+  metadata: CorrectiveMetadata;
+}
+
+export interface CorrectiveRetrievalContext {
+  /** The user's information need — used to grade relevance. Typically the
+   *  current user turn (trimmed). */
+  question: string;
+  /** The search query the model issued (from the tool call's args). Used to
+   *  avoid re-issuing an identical query. */
+  originalQuery: string;
+  /** Collection(s) the model restricted the original search to, if any. */
+  originalCollections?: string[];
+  /** The rendered, trace-marker-peeled tool result the model would otherwise
+   *  see. */
+  originalResultText: string;
+  /** Max re-queries. Clamped to [1, 2]. The caller gates on the per-bot
+   *  toggle; this function only sees enabled invocations. */
+  budget: number;
+  botName: string;
+  /** Working directory for the grader's Haiku spawn. */
+  cwd?: string;
+  log: Logger;
+  /** Haiku model override for the grader. */
+  graderModel?: string;
+  graderTimeoutMs?: number;
+  /** Injectable for tests. */
+  searchFn?: typeof searchKnowledge;
+  gradeFn?: typeof gradeKnowledgeResults;
+}
+
+export async function runCorrectiveRetrieval(ctx: CorrectiveRetrievalContext): Promise<CorrectiveOutcome> {
+  const budget = Math.max(1, Math.min(2, Math.floor(ctx.budget)));
+  const search = ctx.searchFn ?? searchKnowledge;
+  const grade = ctx.gradeFn ?? gradeKnowledgeResults;
+  const { question, originalQuery, originalResultText, botName, cwd, log } = ctx;
+
+  let currentText = originalResultText;
+  let currentCollections = ctx.originalCollections;
+  let lastQuery = originalQuery;
+
+  const verdicts: GradeVerdict[] = [];
+  const reasons: string[] = [];
+  const queriesTried: string[] = [];
+  const collectionsTried: (string[] | undefined)[] = [];
+  const requeryMs: number[] = [];
+  let graderMs = 0;
+  let retries = 0;
+
+  for (;;) {
+    let g: KnowledgeGrade;
+    const gradeStart = performance.now();
+    try {
+      g = await grade({
+        question,
+        toolResultText: currentText,
+        botName,
+        cwd,
+        log,
+        model: ctx.graderModel,
+        timeoutMs: ctx.graderTimeoutMs,
+      });
+    } catch (err) {
+      log.warn("corrective: grader threw — stopping with current results: {error}", {
+        botName,
+        error: err instanceof Error ? err.message : String(err),
+      });
+      g = { verdict: "correct", reason: "grader error" };
+    }
+    graderMs += performance.now() - gradeStart;
+    verdicts.push(g.verdict);
+    reasons.push(g.reason);
+
+    if (g.verdict === "correct" || retries >= budget) break;
+
+    const nextQuery = pickRetryQuery(g, currentText, { lastQuery, originalQuery, queriesTried });
+    if (!nextQuery) break;
+
+    const collections = g.suggestedCollection ? [g.suggestedCollection] : currentCollections;
+
+    let resp: KnowledgeSearchResponse;
+    const requeryStart = performance.now();
+    try {
+      resp = await search(nextQuery, {
+        collections,
+        rerank: true,
+        limit: 10,
+        maxChunksPerDoc: 2,
+      });
+    } catch (err) {
+      log.warn("corrective: re-query failed for {query} — stopping: {error}", {
+        botName,
+        query: nextQuery,
+        error: err instanceof Error ? err.message : String(err),
+      });
+      break;
+    }
+
+    retries++;
+    queriesTried.push(nextQuery);
+    collectionsTried.push(collections);
+    requeryMs.push(Math.round(performance.now() - requeryStart));
+    lastQuery = nextQuery;
+
+    const existing = extractDocKeysFromRenderedText(currentText);
+    const fresh = resp.results.filter((r) => r.id && r.collection && !existing.has(docKey(r)));
+    if (fresh.length === 0) {
+      // The re-query surfaced nothing new (or nothing at all). Don't append a
+      // confirmation block — keep the model's context clean. The trace still
+      // records the attempt via `queriesTried`.
+      log.info("corrective: re-query {query} added no new documents", { botName, query: nextQuery });
+      break;
+    }
+
+    const note = buildCorrectiveNote({
+      retryNum: retries,
+      verdict: g.verdict,
+      reason: g.reason,
+      query: nextQuery,
+      collections,
+      freshCount: fresh.length,
+    });
+    currentText = `${currentText}\n\n---\n${note}\n\n${renderSearchResults(fresh)}${renderRetryHintsFooter(resp)}`;
+    currentCollections = collections;
+  }
+
+  return {
+    text: currentText,
+    changed: currentText !== originalResultText,
+    metadata: {
+      retries,
+      verdicts,
+      reasons,
+      queriesTried,
+      collectionsTried,
+      finalVerdict: verdicts[verdicts.length - 1] ?? "correct",
+      graderMs: Math.round(graderMs),
+      requeryMs,
+    },
+  };
+}
+
+function pickRetryQuery(
+  grade: KnowledgeGrade,
+  resultText: string,
+  used: { lastQuery: string; originalQuery: string; queriesTried: string[] },
+): string | null {
+  const footer = parseQueryHintsFromFooter(resultText);
+  const candidates = [grade.rewrittenQuery, footer.broaderQuery, footer.narrowerQuery]
+    .map((q) => (typeof q === "string" ? q.trim() : ""))
+    .filter((q) => q.length > 0);
+  for (const q of candidates) {
+    if (q === used.lastQuery || q === used.originalQuery || used.queriesTried.includes(q)) continue;
+    return q;
+  }
+  return null;
+}
+
+function buildCorrectiveNote(args: {
+  retryNum: number;
+  verdict: GradeVerdict;
+  reason: string;
+  query: string;
+  collections?: string[];
+  freshCount: number;
+}): string {
+  const scope = args.collections?.length ? ` in collection${args.collections.length > 1 ? "s" : ""} ${args.collections.join(", ")}` : "";
+  const plural = args.freshCount === 1 ? "result" : "results";
+  return (
+    `[corrective retrieval — re-query #${args.retryNum}: prior results graded "${args.verdict}" ` +
+    `(${args.reason}); re-searched "${args.query}"${scope}; ${args.freshCount} additional ${plural} below, ` +
+    `deduped against the results above]`
+  );
+}
diff --git a/src/ai/huginn-trace-pointer.ts b/src/ai/huginn-trace-pointer.ts
index 3b517a1..49043f6 100644
--- a/src/ai/huginn-trace-pointer.ts
+++ b/src/ai/huginn-trace-pointer.ts
@@ -105,6 +105,35 @@ export function parseHuginnTracePointer(
   return { text: output, fetchUrl: null };
 }
 
+/**
+ * Split a tool-result string into its body and its trailing Huginn trace
+ * marker (pointer line or inline `huginn-trace` fence), reconstructing the
+ * marker as a re-appendable string.
+ *
+ * Used by connectors that rewrite a knowledge-search tool result (e.g. the
+ * corrective-retrieval pass in copilot-sdk) before handing it to the model:
+ * peel the marker, splice new content into the body, then re-append `remainder`
+ * at the very end so the downstream {@link processMcpToolResult} call still
+ * finds and extracts the trace. Does not perform any network fetch.
+ *
+ * `remainder` is `""` when no marker was present (or a pointer-id form whose
+ * URL couldn't be resolved — in which case it wouldn't have been fetched
+ * anyway, so dropping it is harmless). Otherwise it includes the leading
+ * `\n\n` separator.
+ */
+export function peelTraceMarkerForRewrite(text: string): { body: string; remainder: string } {
+  const ptr = parseHuginnTracePointer(text);
+  if (ptr.text !== text) {
+    // A pointer line was stripped. Reconstruct the URL form when we have one.
+    return { body: ptr.text, remainder: ptr.fetchUrl ? `\n\nhuginn-trace-url: ${ptr.fetchUrl}` : "" };
+  }
+  const fence = parseHuginnTrace(text);
+  if (fence.trace !== null) {
+    return { body: fence.text, remainder: `\n\n\`\`\`huginn-trace\n${JSON.stringify(fence.trace)}\n\`\`\`` };
+  }
+  return { body: text, remainder: "" };
+}
+
 interface HuginnTraceChannel {
   /** Tool output with the trace marker stripped, ready to store / forward. */
   text: string;
diff --git a/src/ai/knowledge-grader.test.ts b/src/ai/knowledge-grader.test.ts
new file mode 100644
index 0000000..df42d94
--- /dev/null
+++ b/src/ai/knowledge-grader.test.ts
@@ -0,0 +1,78 @@
+import { test, expect, describe } from "bun:test";
+import { gradeKnowledgeResults, normalizeGrade } from "./knowledge-grader.ts";
+import { getLog } from "../logging.ts";
+import type { HaikuResult } from "../scheduler/executor.ts";
+
+const log = getLog("test", "knowledge-grader");
+
+function fakeSpawn(result: string): () => Promise<HaikuResult> {
+  return async () => ({ result, inputTokens: 0, outputTokens: 0, model: "haiku" });
+}
+
+describe("normalizeGrade", () => {
+  test("passes through a valid 'correct' verdict and drops any query", () => {
+    const g = normalizeGrade({ verdict: "correct", rewrittenQuery: "ignored", reason: "covered" });
+    expect(g.verdict).toBe("correct");
+    expect(g.rewrittenQuery).toBeUndefined();
+    expect(g.suggestedCollection).toBeUndefined();
+    expect(g.reason).toBe("covered");
+  });
+
+  test("keeps rewrittenQuery / suggestedCollection for non-correct verdicts", () => {
+    const g = normalizeGrade({
+      verdict: "ambiguous",
+      rewrittenQuery: "  LA_BUC_02 SED list  ",
+      suggestedCollection: " confluence ",
+      reason: "too broad",
+    });
+    expect(g.verdict).toBe("ambiguous");
+    expect(g.rewrittenQuery).toBe("LA_BUC_02 SED list");
+    expect(g.suggestedCollection).toBe("confluence");
+  });
+
+  test("unknown / missing verdict falls back to 'correct' (fail-soft)", () => {
+    expect(normalizeGrade({}).verdict).toBe("correct");
+    expect(normalizeGrade({ verdict: "garbage" }).verdict).toBe("correct");
+    expect(normalizeGrade({ verdict: 42 }).verdict).toBe("correct");
+  });
+
+  test("blank / non-string rewrittenQuery is dropped", () => {
+    const g = normalizeGrade({ verdict: "insufficient", rewrittenQuery: "   ", reason: "" });
+    expect(g.rewrittenQuery).toBeUndefined();
+    expect(g.reason).toBeTruthy(); // synthesized default
+  });
+});
+
+describe("gradeKnowledgeResults", () => {
+  const base = { question: "what SEDs belong to LA_BUC_02?", toolResultText: "## Some doc (12% relevant · low)", botName: "test", log };
+
+  test("parses a clean JSON verdict from Haiku", async () => {
+    const g = await gradeKnowledgeResults({
+      ...base,
+      spawnFn: fakeSpawn('{"verdict":"insufficient","rewrittenQuery":"LA_BUC_02 structured electronic documents","reason":"off-topic snippets"}'),
+    });
+    expect(g.verdict).toBe("insufficient");
+    expect(g.rewrittenQuery).toBe("LA_BUC_02 structured electronic documents");
+  });
+
+  test("tolerates surrounding prose / markdown fence around the JSON", async () => {
+    const g = await gradeKnowledgeResults({
+      ...base,
+      spawnFn: fakeSpawn('Here is my assessment:\n```json\n{"verdict":"ambiguous","reason":"query too vague"}\n```\n'),
+    });
+    expect(g.verdict).toBe("ambiguous");
+  });
+
+  test("Haiku throwing → verdict 'correct' (no disruption)", async () => {
+    const g = await gradeKnowledgeResults({
+      ...base,
+      spawnFn: async () => { throw new Error("haiku down"); },
+    });
+    expect(g.verdict).toBe("correct");
+  });
+
+  test("unparseable Haiku output → verdict 'correct'", async () => {
+    const g = await gradeKnowledgeResults({ ...base, spawnFn: fakeSpawn("not json at all, sorry") });
+    expect(g.verdict).toBe("correct");
+  });
+});
diff --git a/src/ai/knowledge-grader.ts b/src/ai/knowledge-grader.ts
new file mode 100644
index 0000000..52ed45a
--- /dev/null
+++ b/src/ai/knowledge-grader.ts
@@ -0,0 +1,150 @@
+import { spawnHaiku } from "../scheduler/executor.ts";
+import { extractJson } from "./json-extract.ts";
+import type { Logger } from "@logtape/logtape";
+
+/**
+ * CRAG-style retrieval evaluator for the knowledge search tool. Given the
+ * user's question and the (rendered) search results — which carry per-result
+ * `confidenceBand` annotations and a `*No confident match — try: …*` footer
+ * from Huginn's MCP adapter — a dedicated Haiku call decides whether the
+ * results are good enough to answer from, and if not, proposes a sharper
+ * query and/or a better collection.
+ *
+ * This is an **awaiting** Haiku call (it gates whether a corrective re-query
+ * happens), so it uses {@link spawnHaiku} directly rather than the
+ * fire-and-forget {@link runHaikuExtraction} pattern.
+ *
+ * Fail-soft: any Haiku error or unparseable output yields `verdict: "correct"`
+ * — the corrective loop becomes a no-op and the model sees the original result
+ * unchanged. The corrective feature must never make a search *worse*.
+ *
+ * Plan: `../mimir/plans/huginn-muninn-corrective-rag.md` (Phase 1).
+ */
+
+export type GradeVerdict = "correct" | "ambiguous" | "insufficient";
+
+export interface KnowledgeGrade {
+  verdict: GradeVerdict;
+  /** A single search string (not a question) to re-query with. Present only
+   *  when verdict is "ambiguous" or "insufficient" and the grader had a better
+   *  query to offer. */
+  rewrittenQuery?: string;
+  /** A collection name to try instead — only when the results hint another
+   *  collection is the right home. Never invented. */
+  suggestedCollection?: string;
+  /** One short sentence explaining the verdict. */
+  reason: string;
+}
+
+export interface GradeKnowledgeOptions {
+  question: string;
+  /** The rendered search-result text the model would see (trace markers
+   *  already peeled). */
+  toolResultText: string;
+  botName: string;
+  /** Working directory for the Haiku spawn — keeps the session out of the
+   *  project root and gives it the bot's MCP/settings context. */
+  cwd?: string;
+  log: Logger;
+  /** Haiku model override (defaults to the project's standard Haiku model). */
+  model?: string;
+  timeoutMs?: number;
+  /** Injectable for tests — defaults to {@link spawnHaiku}. */
+  spawnFn?: typeof spawnHaiku;
+}
+
+/** Cap the result text fed into the grader prompt — keeps the Haiku call cheap
+ *  and well under its context window. The trailing footer (retry hints) lives
+ *  at the end of the text, so prefer keeping the head + tail. */
+const MAX_RESULT_CHARS = 12_000;
+
+export async function gradeKnowledgeResults(opts: GradeKnowledgeOptions): Promise<KnowledgeGrade> {
+  const { question, botName, cwd, log } = opts;
+  const resultText = clampResultText(opts.toolResultText);
+
+  const prompt = buildGraderPrompt(question, resultText);
+
+  const spawn = opts.spawnFn ?? spawnHaiku;
+  let raw: string;
+  try {
+    const res = await spawn(prompt, {
+      source: "knowledge-grader",
+      entrypoint: `${botName}-knowledge-grader`,
+      cwd,
+      botName,
+      model: opts.model,
+      timeoutMs: opts.timeoutMs,
+    });
+    raw = res.result;
+  } catch (err) {
+    log.warn("knowledge grader Haiku call failed — treating as 'correct': {error}", {
+      botName,
+      error: err instanceof Error ? err.message : String(err),
+    });
+    return { verdict: "correct", reason: "grader unavailable" };
+  }
+
+  let parsed: Record<string, unknown>;
+  try {
+    parsed = extractJson<Record<string, unknown>>(raw);
+  } catch {
+    log.warn("knowledge grader: unparseable result — treating as 'correct': {raw}", {
+      botName,
+      raw: raw.slice(0, 300),
+    });
+    return { verdict: "correct", reason: "grader output unparseable" };
+  }
+
+  return normalizeGrade(parsed);
+}
+
+export function normalizeGrade(parsed: Record<string, unknown>): KnowledgeGrade {
+  const verdict = parsed.verdict;
+  const safeVerdict: GradeVerdict =
+    verdict === "ambiguous" || verdict === "insufficient" ? verdict : "correct";
+
+  const reason = typeof parsed.reason === "string" && parsed.reason.trim()
+    ? parsed.reason.trim()
+    : safeVerdict === "correct"
+      ? "results cover the question"
+      : "results do not clearly cover the question";
+
+  const grade: KnowledgeGrade = { verdict: safeVerdict, reason };
+
+  if (safeVerdict !== "correct") {
+    const rq = typeof parsed.rewrittenQuery === "string" ? parsed.rewrittenQuery.trim() : "";
+    if (rq) grade.rewrittenQuery = rq;
+    const sc = typeof parsed.suggestedCollection === "string" ? parsed.suggestedCollection.trim() : "";
+    if (sc) grade.suggestedCollection = sc;
+  }
+
+  return grade;
+}
+
+function clampResultText(text: string): string {
+  if (text.length <= MAX_RESULT_CHARS) return text;
+  const head = Math.floor(MAX_RESULT_CHARS * 0.7);
+  const tail = MAX_RESULT_CHARS - head;
+  return `${text.slice(0, head)}\n…[${text.length - MAX_RESULT_CHARS} chars omitted]…\n${text.slice(-tail)}`;
+}
+
+function buildGraderPrompt(question: string, resultText: string): string {
+  return `You grade the quality of knowledge-base search results before an assistant answers from them.
+
+USER QUESTION:
+${question}
+
+SEARCH RESULTS (each hit is annotated with a confidence band — high / medium / low; a trailing "No confident match" or "Weak match" line, if present, means the search itself was unsure):
+${resultText || "(no results were returned)"}
+
+Decide whether these results let the question be answered well, then respond with ONLY a JSON object — no prose, no markdown fence:
+{"verdict":"correct"|"ambiguous"|"insufficient","rewrittenQuery":"...","suggestedCollection":"...","reason":"..."}
+
+Guidance:
+- "correct": at least one clearly on-topic, reasonably-confident result covers the question. No re-query needed. Omit rewrittenQuery and suggestedCollection.
+- "ambiguous": results are partially relevant but the query was too broad, too narrow, or worded differently than the indexed content; a sharper query would likely find better hits.
+- "insufficient": nothing on-topic, or only low-confidence / off-topic snippets, or no results at all.
+- rewrittenQuery: a single concise SEARCH STRING (keywords / phrase), NOT a question. Only when verdict is "ambiguous" or "insufficient". If you cannot improve on the query, omit it.
+- suggestedCollection: only set it if the results clearly hint a different collection is the right home for this topic. Never invent a collection name.
+- reason: one short sentence.`;
+}
diff --git a/src/ai/knowledge-search-client.test.ts b/src/ai/knowledge-search-client.test.ts
new file mode 100644
index 0000000..51186c6
--- /dev/null
+++ b/src/ai/knowledge-search-client.test.ts
@@ -0,0 +1,138 @@
+import { test, expect, describe, afterEach } from "bun:test";
+import {
+  searchKnowledge,
+  renderSearchResults,
+  renderRetryHintsFooter,
+  extractDocKeysFromRenderedText,
+  parseQueryHintsFromFooter,
+  docKey,
+  type KnowledgeSearchResult,
+} from "./knowledge-search-client.ts";
+
+describe("renderSearchResults", () => {
+  test("renders header, url, breadcrumb, the doc-id line and chunk bodies", () => {
+    const r: KnowledgeSearchResult = {
+      collection: "wiki",
+      id: "abc-123",
+      title: "Knowledge Graph RAG",
+      url: "https://example.test/kg-rag",
+      breadcrumb: "Architecture / Retrieval",
+      relevance: 0.823,
+      confidenceBand: "high",
+      modifiedTime: "2026-05-01T12:00:00Z",
+      matchedChunks: [{ heading: "Overview", content: "It combines a graph with vector search." }],
+    };
+    const out = renderSearchResults([r]);
+    expect(out).toContain("## Knowledge Graph RAG (82.3% relevant · high) | updated: 2026-05-01");
+    expect(out).toContain("https://example.test/kg-rag");
+    expect(out).toContain("Architecture / Retrieval");
+    expect(out).toContain("collection: `wiki` doc_id: `abc-123`");
+    expect(out).toContain("**Overview**");
+    expect(out).toContain("It combines a graph with vector search.");
+  });
+
+  test("falls back to snippet when there are no matched chunks", () => {
+    const out = renderSearchResults([{ collection: "c", id: "1", title: "T", relevance: 0.5, confidenceBand: "medium", snippet: "a short snippet" }]);
+    expect(out).toContain("a short snippet");
+    expect(out).toContain("collection: `c` doc_id: `1`");
+  });
+
+  test("WIP metadata renders the marker; internal metadata keys are hidden", () => {
+    const out = renderSearchResults([{ collection: "c", id: "1", title: "Draft", metadata: { wip: "true", page_id: "x", owner: "alice" }, matchedChunks: [{ content: "body" }] }]);
+    expect(out).toContain("**[UNDER ARBEID]**");
+    expect(out).not.toContain("page_id");
+  });
+});
+
+describe("extractDocKeysFromRenderedText", () => {
+  test("pulls collection/doc_id pairs out of rendered result text", () => {
+    const text = renderSearchResults([
+      { collection: "wiki", id: "1", title: "A", matchedChunks: [{ content: "x" }] },
+      { collection: "confluence", id: "PAGE-2", title: "B", matchedChunks: [{ content: "y" }] },
+    ]);
+    const keys = extractDocKeysFromRenderedText(text);
+    expect(keys.has("wiki/1")).toBe(true);
+    expect(keys.has("confluence/PAGE-2")).toBe(true);
+    expect(keys.size).toBe(2);
+  });
+
+  test("returns empty set for text with no doc-id lines", () => {
+    expect(extractDocKeysFromRenderedText("just some prose").size).toBe(0);
+  });
+
+  test("docKey matches the rendered line format", () => {
+    expect(docKey({ collection: "wiki", id: "1" })).toBe("wiki/1");
+  });
+});
+
+describe("parseQueryHintsFromFooter", () => {
+  test("extracts broader and narrower query hints", () => {
+    const footer = '*No confident match — try: related terms: A, B · narrower query: "X Y narrow" · broader query: "X wide"*';
+    expect(parseQueryHintsFromFooter(footer)).toEqual({ broaderQuery: "X wide", narrowerQuery: "X Y narrow" });
+  });
+
+  test("returns empty object when no hints present", () => {
+    expect(parseQueryHintsFromFooter("no hints here")).toEqual({});
+  });
+});
+
+describe("renderRetryHintsFooter", () => {
+  test("renders a 'No confident match' line with hints", () => {
+    const out = renderRetryHintsFooter({ noConfidentResults: true, retryHints: { relatedTerms: ["a", "b"], broaderQuery: "wider" } });
+    expect(out).toContain("No confident match — try: related terms: a, b · broader query: \"wider\"");
+  });
+
+  test("renders a bare 'No confident match.' when there are no hints", () => {
+    expect(renderRetryHintsFooter({ noConfidentResults: true })).toBe("\n\n*No confident match.*");
+  });
+
+  test("returns empty string when there's nothing to say", () => {
+    expect(renderRetryHintsFooter({})).toBe("");
+  });
+});
+
+describe("searchKnowledge", () => {
+  const realFetch = globalThis.fetch;
+  afterEach(() => { globalThis.fetch = realFetch; });
+
+  test("builds the query string and normalizes the response", async () => {
+    let seenUrl = "";
+    globalThis.fetch = (async (input: RequestInfo | URL) => {
+      seenUrl = String(input);
+      return new Response(
+        JSON.stringify({
+          results: [{ collection: "wiki", id: "1", title: "T", relevance: 0.7, confidenceBand: "high", matchedChunks: [{ content: "c" }] }],
+          bestScore: 0.7,
+        }),
+        { status: 200, headers: { "content-type": "application/json" } },
+      );
+    }) as unknown as typeof fetch;
+
+    const resp = await searchKnowledge("graph rag", { collections: ["wiki"], rerank: true, minRelevance: 0.4, limit: 5 });
+    expect(seenUrl).toContain("/api/search?");
+    expect(seenUrl).toContain("q=graph+rag");
+    expect(seenUrl).toContain("collection=wiki");
+    expect(seenUrl).toContain("rerank=true");
+    expect(seenUrl).toContain("min_relevance=0.4");
+    expect(resp.results.length).toBe(1);
+    expect(resp.results[0]?.confidenceBand).toBe("high");
+    expect(resp.bestScore).toBe(0.7);
+  });
+
+  test("parses noConfidentResults + retryHints", async () => {
+    globalThis.fetch = (async () =>
+      new Response(JSON.stringify({ results: [], bestScore: 0.75, noConfidentResults: true, retryHints: { detectedEntities: ["RAG"], broaderQuery: "wider" } }), {
+        status: 200,
+        headers: { "content-type": "application/json" },
+      })) as unknown as typeof fetch;
+    const resp = await searchKnowledge("x");
+    expect(resp.noConfidentResults).toBe(true);
+    expect(resp.retryHints?.detectedEntities).toEqual(["RAG"]);
+    expect(resp.retryHints?.broaderQuery).toBe("wider");
+  });
+
+  test("throws on a non-2xx response", async () => {
+    globalThis.fetch = (async () => new Response("nope", { status: 503 })) as unknown as typeof fetch;
+    await expect(searchKnowledge("x")).rejects.toThrow();
+  });
+});
diff --git a/src/ai/knowledge-search-client.ts b/src/ai/knowledge-search-client.ts
new file mode 100644
index 0000000..599685c
--- /dev/null
+++ b/src/ai/knowledge-search-client.ts
@@ -0,0 +1,271 @@
+import { getLog } from "../logging.ts";
+
+const log = getLog("ai", "knowledge-search");
+
+/** Base URL for Huginn's HTTP API — same env Huginn-side uses. Read directly
+ *  (not via `loadConfig()`) so this module has no `DATABASE_URL` dependency. */
+function knowledgeApiBaseUrl(): string {
+  return process.env.KNOWLEDGE_API_URL || "http://localhost:8321";
+}
+
+/**
+ * Thin HTTP client for Huginn's `GET /api/search`, plus a renderer that mirrors
+ * the shape Huginn's MCP adapter produces (so a corrective re-query's hits read
+ * identically to the ones the model already saw) and a parser for the
+ * `collection: \`x\` doc_id: \`y\`` lines those results carry (used to dedupe a
+ * re-query against the original result text).
+ *
+ * Scope: this is the Phase-1 corrective-retrieval consumer of the Phase-0
+ * contract — `bestScore`, per-result `confidenceBand`, `retryHints`,
+ * `noConfidentResults`, `min_relevance`. See
+ * `../mimir/plans/huginn-muninn-corrective-rag.md`.
+ */
+
+export type ConfidenceBand = "high" | "medium" | "low";
+
+export interface KnowledgeMatchedChunk {
+  content?: string;
+  heading?: string;
+  relevance?: number;
+  metadata?: Record<string, unknown>;
+}
+
+export interface KnowledgeSearchResult {
+  collection: string;
+  id: string;
+  title: string;
+  url?: string;
+  snippet?: string;
+  breadcrumb?: string;
+  heading?: string;
+  relevance?: number;
+  confidenceBand?: ConfidenceBand;
+  modifiedTime?: string;
+  matchedChunks?: KnowledgeMatchedChunk[];
+  metadata?: Record<string, unknown>;
+  /** Graph-context annotation lines, when graph augmentation produced any. */
+  graphContext?: string[];
+}
+
+export interface KnowledgeRetryHints {
+  detectedEntities?: string[];
+  relatedTerms?: string[];
+  narrowerQuery?: string;
+  broaderQuery?: string;
+}
+
+export interface KnowledgeSearchResponse {
+  results: KnowledgeSearchResult[];
+  bestScore?: number;
+  noConfidentResults?: boolean;
+  retryHints?: KnowledgeRetryHints;
+  /** Present when Huginn returns a relational graph answer ahead of the hits. */
+  graphAnswer?: string;
+  lowConfidence?: boolean;
+}
+
+export interface SearchKnowledgeOptions {
+  /** Restrict to specific collection(s). Omit to search all available. */
+  collections?: string[];
+  limit?: number;
+  brief?: boolean;
+  /** Force (or disable) cross-encoder reranking. Default: Huginn's default
+   *  (`true` for full, `false` for brief). Corrective re-queries pass `true`
+   *  so `confidenceBand` is trustworthy on the re-query. */
+  rerank?: boolean;
+  /** Drop results below this relevance (0.0–1.0). When it empties the set the
+   *  response carries `noConfidentResults` + `retryHints`. */
+  minRelevance?: number;
+  maxChunksPerDoc?: number;
+  timeoutMs?: number;
+  /** Override the base URL (defaults to `config.knowledgeApiUrl`). */
+  baseUrl?: string;
+}
+
+const GRAPH_CONTEXT_KEY = "graph_context";
+
+/** Call Huginn's `/api/search`. Throws on network error / non-2xx — callers in
+ *  the corrective path treat that as "no re-query" (fail-soft). */
+export async function searchKnowledge(
+  query: string,
+  opts: SearchKnowledgeOptions = {},
+): Promise<KnowledgeSearchResponse> {
+  const baseUrl = (opts.baseUrl ?? knowledgeApiBaseUrl()).replace(/\/+$/, "");
+  const params = new URLSearchParams();
+  params.set("q", query);
+  if (opts.limit !== undefined) params.set("limit", String(opts.limit));
+  if (opts.brief) params.set("brief", "true");
+  if (opts.rerank !== undefined) params.set("rerank", String(opts.rerank));
+  if (opts.minRelevance !== undefined) params.set("min_relevance", String(opts.minRelevance));
+  if (opts.maxChunksPerDoc !== undefined) params.set("max_chunks_per_doc", String(opts.maxChunksPerDoc));
+  for (const c of opts.collections ?? []) params.append("collection", c);
+
+  const url = `${baseUrl}/api/search?${params.toString()}`;
+  const resp = await fetch(url, { signal: AbortSignal.timeout(opts.timeoutMs ?? 8000) });
+  if (!resp.ok) {
+    throw new Error(`knowledge search returned ${resp.status} for ${query}`);
+  }
+  const data = (await resp.json()) as Record<string, unknown>;
+  return normalizeResponse(data);
+}
+
+function normalizeResponse(data: Record<string, unknown>): KnowledgeSearchResponse {
+  const rawResults = Array.isArray(data.results) ? (data.results as Record<string, unknown>[]) : [];
+  const results: KnowledgeSearchResult[] = rawResults.map((r) => ({
+    collection: String(r.collection ?? ""),
+    id: String(r.id ?? ""),
+    title: String(r.title ?? r.id ?? "(untitled)"),
+    url: r.url ? String(r.url) : undefined,
+    snippet: r.snippet ? String(r.snippet) : undefined,
+    breadcrumb: r.breadcrumb ? String(r.breadcrumb) : undefined,
+    heading: r.heading ? String(r.heading) : undefined,
+    relevance: typeof r.relevance === "number" ? r.relevance : undefined,
+    confidenceBand: isBand(r.confidenceBand) ? r.confidenceBand : undefined,
+    modifiedTime: r.modifiedTime ? String(r.modifiedTime) : undefined,
+    matchedChunks: Array.isArray(r.matchedChunks)
+      ? (r.matchedChunks as Record<string, unknown>[]).map((c) => ({
+          content: c.content ? String(c.content) : undefined,
+          heading: c.heading ? String(c.heading) : undefined,
+          relevance: typeof c.relevance === "number" ? c.relevance : undefined,
+          metadata: isRecord(c.metadata) ? c.metadata : undefined,
+        }))
+      : undefined,
+    metadata: isRecord(r.metadata) ? r.metadata : undefined,
+    graphContext: Array.isArray(r[GRAPH_CONTEXT_KEY])
+      ? (r[GRAPH_CONTEXT_KEY] as unknown[]).map(String)
+      : undefined,
+  }));
+
+  return {
+    results,
+    bestScore: typeof data.bestScore === "number" ? data.bestScore : undefined,
+    noConfidentResults: data.noConfidentResults === true,
+    retryHints: parseRetryHints(data.retryHints),
+    graphAnswer: data.graph_answer ? String(data.graph_answer) : undefined,
+    lowConfidence: data.lowConfidence === true,
+  };
+}
+
+function parseRetryHints(raw: unknown): KnowledgeRetryHints | undefined {
+  if (!isRecord(raw)) return undefined;
+  const hints: KnowledgeRetryHints = {};
+  if (Array.isArray(raw.detectedEntities)) hints.detectedEntities = raw.detectedEntities.map(String);
+  if (Array.isArray(raw.relatedTerms)) hints.relatedTerms = raw.relatedTerms.map(String);
+  if (typeof raw.narrowerQuery === "string") hints.narrowerQuery = raw.narrowerQuery;
+  if (typeof raw.broaderQuery === "string") hints.broaderQuery = raw.broaderQuery;
+  return Object.keys(hints).length > 0 ? hints : undefined;
+}
+
+function isBand(v: unknown): v is ConfidenceBand {
+  return v === "high" || v === "medium" || v === "low";
+}
+
+function isRecord(v: unknown): v is Record<string, unknown> {
+  return typeof v === "object" && v !== null && !Array.isArray(v);
+}
+
+const INTERNAL_METADATA_KEYS = new Set(["page_id", "space", "breadcrumb", "title", "wip"]);
+
+function isWip(r: Pick<KnowledgeSearchResult, "metadata">): boolean {
+  return (r.metadata?.wip as unknown) === "true";
+}
+
+function formatRelevanceBand(r: KnowledgeSearchResult): string {
+  if (r.relevance === undefined) return "";
+  const pct = `${(r.relevance * 100).toFixed(1)}% relevant`;
+  return ` (${pct}${r.confidenceBand ? ` · ${r.confidenceBand}` : ""})`;
+}
+
+function formatDate(iso?: string): string {
+  return iso ? iso.slice(0, 10) : "";
+}
+
+function visibleMetaLine(metadata?: Record<string, unknown>): string {
+  if (!metadata) return "";
+  const entries = Object.entries(metadata).filter(([k, v]) => !INTERNAL_METADATA_KEYS.has(k) && v);
+  if (entries.length === 0) return "";
+  return `\n*${entries.map(([k, v]) => `${k}: ${v}`).join(" | ")}*`;
+}
+
+/**
+ * Render search results in (approximately) the same shape Huginn's MCP adapter
+ * uses for `brief=false` searches: `## title (NN% relevant · band)` header,
+ * url, breadcrumb, the `collection: \`x\` doc_id: \`y\`` line, then the matched
+ * chunks. Used to splice a corrective re-query's hits into the tool result the
+ * model sees.
+ */
+export function renderSearchResults(results: KnowledgeSearchResult[]): string {
+  return results
+    .map((r) => {
+      const date = r.modifiedTime ? ` | updated: ${formatDate(r.modifiedTime)}` : "";
+      const wip = isWip(r) ? " **[UNDER ARBEID]**" : "";
+      let header = `## ${r.title}${wip}${formatRelevanceBand(r)}${date}`;
+      if (r.url) header += `\n${r.url}`;
+      if (r.breadcrumb) header += `\n${r.breadcrumb}`;
+      header += `\ncollection: \`${r.collection}\` doc_id: \`${r.id}\``;
+      if (r.graphContext?.length) header += `\n*${r.graphContext.join(" | ")}*`;
+
+      const bodyLines: string[] = [];
+      const chunks = r.matchedChunks ?? [];
+      if (chunks.length > 0) {
+        for (const chunk of chunks) {
+          if (chunk.heading) bodyLines.push(`**${chunk.heading}**`);
+          if (chunk.content) bodyLines.push(chunk.content);
+          const ml = visibleMetaLine(chunk.metadata);
+          if (ml) bodyLines.push(ml.replace(/^\n/, ""));
+        }
+      } else if (r.snippet) {
+        bodyLines.push(r.snippet);
+      }
+      return bodyLines.length > 0 ? `${header}\n\n${bodyLines.join("\n\n")}` : header;
+    })
+    .join("\n\n");
+}
+
+/** Render the Phase-0 `retryHints` / `noConfidentResults` footer, mirroring
+ *  the MCP adapter — used when a re-query itself comes back empty/weak so the
+ *  consolidated result still surfaces the next move. Returns "" when nothing
+ *  useful applies. */
+export function renderRetryHintsFooter(resp: Pick<KnowledgeSearchResponse, "retryHints" | "noConfidentResults">): string {
+  const hints = resp.retryHints ?? {};
+  const bits: string[] = [];
+  if (hints.relatedTerms?.length) bits.push(`related terms: ${hints.relatedTerms.join(", ")}`);
+  if (hints.narrowerQuery) bits.push(`narrower query: "${hints.narrowerQuery}"`);
+  if (hints.broaderQuery) bits.push(`broader query: "${hints.broaderQuery}"`);
+  if (bits.length === 0 && !resp.noConfidentResults) return "";
+  const prefix = resp.noConfidentResults ? "No confident match" : "Weak match";
+  return bits.length > 0 ? `\n\n*${prefix} — try: ${bits.join(" · ")}*` : `\n\n*${prefix}.*`;
+}
+
+const DOC_ID_LINE_RE = /collection:\s*`([^`]+)`\s+doc_id:\s*`([^`]+)`/g;
+
+/** Extract `collection/doc_id` keys from rendered search-result text — used to
+ *  dedupe a corrective re-query against the original result the model already
+ *  has, since (per the chosen Phase-1 approach) we don't re-fetch the original
+ *  in structured form. The `collection: \`…\` doc_id: \`…\`` line is emitted by
+ *  Huginn's MCP adapter for every hit and is stable. */
+export function extractDocKeysFromRenderedText(text: string): Set<string> {
+  const keys = new Set<string>();
+  for (const m of text.matchAll(DOC_ID_LINE_RE)) {
+    keys.add(`${m[1]}/${m[2]}`);
+  }
+  return keys;
+}
+
+export function docKey(r: Pick<KnowledgeSearchResult, "collection" | "id">): string {
+  return `${r.collection}/${r.id}`;
+}
+
+/** Parse a `broader query: "..."` / `narrower query: "..."` hint out of a
+ *  rendered "*No confident match — try: …*" footer. Belt-and-suspenders for the
+ *  corrective re-query when the Haiku grader didn't supply a rewritten query. */
+export function parseQueryHintsFromFooter(text: string): { broaderQuery?: string; narrowerQuery?: string } {
+  const out: { broaderQuery?: string; narrowerQuery?: string } = {};
+  const broader = text.match(/broader query:\s*"([^"]+)"/);
+  if (broader) out.broaderQuery = broader[1];
+  const narrower = text.match(/narrower query:\s*"([^"]+)"/);
+  if (narrower) out.narrowerQuery = narrower[1];
+  return out;
+}
+
+export { log as knowledgeSearchLog };
diff --git a/src/ai/tool-status.ts b/src/ai/tool-status.ts
index f1247f2..7d807bf 100644
--- a/src/ai/tool-status.ts
+++ b/src/ai/tool-status.ts
@@ -315,6 +315,16 @@ export function parseToolName(name: string): { server: string; tool: string } |
   return undefined;
 }
 
+/**
+ * True when `toolName` is Huginn's knowledge search tool (`search_knowledge`),
+ * in any connector's naming format. Used to gate the corrective-retrieval pass.
+ */
+export function isKnowledgeSearchTool(toolName: string): boolean {
+  const parsed = parseToolName(toolName);
+  const tool = parsed?.tool ?? toolName;
+  return tool === "search_knowledge";
+}
+
 /**
  * Get human-friendly status text for a tool call.
  * Returns undefined for tools that should not show status (e.g. report_intent).
diff --git a/src/bots/config.ts b/src/bots/config.ts
index 7a47b6b..0af4a4b 100644
--- a/src/bots/config.ts
+++ b/src/bots/config.ts
@@ -77,6 +77,16 @@ export interface BotConfig {
   hivemind?: HivemindBotConfig;
   /** MCP status probing config — controls cache TTL and which servers are critical */
   mcpStatus?: McpStatusConfig;
+  /** CRAG-lite corrective retrieval around the knowledge search tool (Phase 1).
+   *  Off unless `enabled: true` here or the global default is on. `retryBudget`
+   *  is clamped to 1–2. Only the copilot-sdk connector honours this. */
+  correctiveRetrieval?: CorrectiveRetrievalBotConfig;
+}
+
+export interface CorrectiveRetrievalBotConfig {
+  enabled?: boolean;
+  /** Max corrective re-queries per knowledge search. Clamped to 1–2. Default 1. */
+  retryBudget?: number;
 }
 
 export interface BotPrompts {
@@ -164,7 +174,7 @@ function discoverBotsInternal(opts: { requireTokens: boolean }): BotConfig[] {
       try {
         botSettings = JSON.parse(readFileSync(configJsonPath, "utf-8"));
         // Warn about unknown keys to catch typos
-        const knownKeys = new Set(["connector", "model", "thinkingMaxTokens", "timeoutMs", "restrictedTools", "channelListening", "serena", "baseUrl", "showWaterfall", "prompts", "contextWindow", "hivemind", "mcpStatus"]);
+        const knownKeys = new Set(["connector", "model", "thinkingMaxTokens", "timeoutMs", "restrictedTools", "channelListening", "serena", "baseUrl", "showWaterfall", "prompts", "contextWindow", "hivemind", "mcpStatus", "correctiveRetrieval"]);
         const unknownKeys = Object.keys(botSettings).filter((k) => !knownKeys.has(k));
         if (unknownKeys.length > 0) {
           log.warn("Bot \"{name}\" config.json has unknown keys: {keys} — possible typo?", { name, keys: unknownKeys.join(", ") });
@@ -208,6 +218,7 @@ function discoverBotsInternal(opts: { requireTokens: boolean }): BotConfig[] {
       contextWindow: botSettings.contextWindow as number | undefined,
       hivemind: parseHivemindConfig(botSettings.hivemind) ?? undefined,
       mcpStatus: botSettings.mcpStatus as McpStatusConfig | undefined,
+      correctiveRetrieval: botSettings.correctiveRetrieval as CorrectiveRetrievalBotConfig | undefined,
     });
 
     const configParts: string[] = [];
diff --git a/src/core/corrective-trace-spans.test.ts b/src/core/corrective-trace-spans.test.ts
new file mode 100644
index 0000000..be64f3c
--- /dev/null
+++ b/src/core/corrective-trace-spans.test.ts
@@ -0,0 +1,63 @@
+import { test, expect, describe } from "bun:test";
+import { planCorrectiveSpans } from "./corrective-trace-spans.ts";
+import type { CorrectiveToolMeta } from "../types.ts";
+
+describe("planCorrectiveSpans", () => {
+  test("returns empty when there's no corrective metadata", () => {
+    expect(planCorrectiveSpans(undefined, 100)).toEqual([]);
+    expect(planCorrectiveSpans({ retries: 0, verdicts: [], reasons: [], queriesTried: [], finalVerdict: "correct" }, 100)).toEqual([]);
+  });
+
+  test("one knowledge_grade span when graded but not re-queried", () => {
+    const corr: CorrectiveToolMeta = {
+      retries: 0,
+      verdicts: ["correct"],
+      reasons: ["covered"],
+      queriesTried: [],
+      finalVerdict: "correct",
+      graderMs: 1200,
+    };
+    const spans = planCorrectiveSpans(corr, 200);
+    expect(spans.map((s) => s.name)).toEqual(["knowledge_grade"]);
+    expect(spans[0]!.startOffsetMs).toBe(200);
+    expect(spans[0]!.durationMs).toBe(1200);
+    expect(spans[0]!.attributes.model).toBe("haiku");
+    expect(spans[0]!.attributes.finalVerdict).toBe("correct");
+    expect(spans[0]!.attributes.passes).toBe(1);
+  });
+
+  test("grade span + one requery span per re-query, laid out sequentially after the tool", () => {
+    const corr: CorrectiveToolMeta = {
+      retries: 2,
+      verdicts: ["insufficient", "ambiguous", "correct"],
+      reasons: ["off-topic", "broad", "ok"],
+      queriesTried: ["q1", "q2"],
+      collectionsTried: [null, ["confluence"]],
+      finalVerdict: "correct",
+      graderMs: 900,
+      requeryMs: [150, 220],
+    };
+    const spans = planCorrectiveSpans(corr, 300);
+    expect(spans.map((s) => s.name)).toEqual(["knowledge_grade", "knowledge_requery", "knowledge_requery"]);
+    // grade [300, 1200), requery#1 [1200, 1350), requery#2 [1350, 1570)
+    expect(spans[0]!.startOffsetMs).toBe(300);
+    expect(spans[1]!.startOffsetMs).toBe(1200);
+    expect(spans[1]!.durationMs).toBe(150);
+    expect(spans[1]!.attributes.query).toBe("q1");
+    expect(spans[1]!.attributes.collection).toBe("(all)");
+    expect(spans[2]!.startOffsetMs).toBe(1350);
+    expect(spans[2]!.durationMs).toBe(220);
+    expect(spans[2]!.attributes.query).toBe("q2");
+    expect(spans[2]!.attributes.collection).toBe("confluence");
+  });
+
+  test("uses a 1ms floor when timings are missing", () => {
+    const spans = planCorrectiveSpans(
+      { retries: 1, verdicts: ["insufficient", "correct"], reasons: ["x", "y"], queriesTried: ["q"], finalVerdict: "correct" },
+      0,
+    );
+    expect(spans[0]!.durationMs).toBe(1);
+    expect(spans[1]!.durationMs).toBe(1);
+    expect(spans[1]!.startOffsetMs).toBe(1);
+  });
+});
diff --git a/src/core/corrective-trace-spans.ts b/src/core/corrective-trace-spans.ts
new file mode 100644
index 0000000..f4ca5c0
--- /dev/null
+++ b/src/core/corrective-trace-spans.ts
@@ -0,0 +1,94 @@
+import type { Tracer } from "../tracing/index.ts";
+import type { CorrectiveToolMeta } from "../types.ts";
+
+/**
+ * Synthesize waterfall spans for a knowledge-search tool call's CRAG-lite
+ * corrective pass (see src/ai/corrective-retrieval.ts):
+ *
+ *   - one `knowledge_grade` span — the Haiku retrieval evaluator (attrs:
+ *     verdicts per pass, the triggering reason, the final verdict, model)
+ *   - one `knowledge_requery` span per corrective re-query (attrs: the rewritten
+ *     query, the collection scope)
+ *
+ * They're nested under the tool span and laid out sequentially starting at the
+ * tool span's nominal end (the corrective work runs *after* Huginn's search, in
+ * the connector's onPostToolUse hook), so they extend just past the tool bar's
+ * right edge — which is the honest picture of the added latency.
+ *
+ * Mirrors the structure of {@link emitSearchTraceSpans} in search-trace-spans.ts.
+ */
+
+export interface SynthesizedCorrectiveSpan {
+  name: string;
+  durationMs: number;
+  attributes: Record<string, unknown>;
+  /** Offset from the *tool span's* start. */
+  startOffsetMs: number;
+}
+
+/** Pure planner — returns the spans we'd emit for a tool call with corrective
+ *  metadata. `toolDurationMs` is where the corrective spans begin (just after
+ *  the tool's own work). Exposed for testing. */
+export function planCorrectiveSpans(
+  corrective: CorrectiveToolMeta | undefined,
+  toolDurationMs: number,
+): SynthesizedCorrectiveSpan[] {
+  if (!corrective || !Array.isArray(corrective.verdicts) || corrective.verdicts.length === 0) return [];
+
+  const out: SynthesizedCorrectiveSpan[] = [];
+  let cursor = Math.max(0, toolDurationMs);
+
+  const graderMs = typeof corrective.graderMs === "number" && corrective.graderMs > 0 ? corrective.graderMs : 1;
+  out.push({
+    name: "knowledge_grade",
+    durationMs: graderMs,
+    startOffsetMs: cursor,
+    attributes: {
+      model: "haiku",
+      passes: corrective.verdicts.length,
+      verdicts: corrective.verdicts,
+      finalVerdict: corrective.finalVerdict,
+      reason: corrective.reasons?.[0],
+      retries: corrective.retries,
+      synthesized: true,
+    },
+  });
+  cursor += graderMs;
+
+  const requeryMs = corrective.requeryMs ?? [];
+  corrective.queriesTried.forEach((query, i) => {
+    const ms = typeof requeryMs[i] === "number" && requeryMs[i]! > 0 ? requeryMs[i]! : 1;
+    const collection = corrective.collectionsTried?.[i] ?? null;
+    out.push({
+      name: "knowledge_requery",
+      durationMs: ms,
+      startOffsetMs: cursor,
+      attributes: {
+        query,
+        collection: collection && collection.length > 0 ? collection.join(", ") : "(all)",
+        index: i + 1,
+        synthesized: true,
+      },
+    });
+    cursor += ms;
+  });
+
+  return out;
+}
+
+/** Emit the corrective spans under the given tool span. No-op when there's no
+ *  corrective metadata. */
+export function emitCorrectiveSpans(opts: {
+  tracer: Tracer;
+  toolSpanId: string;
+  toolStartedAt: Date;
+  toolDurationMs: number;
+  corrective: CorrectiveToolMeta | undefined;
+}): void {
+  for (const s of planCorrectiveSpans(opts.corrective, opts.toolDurationMs)) {
+    opts.tracer.addSubSpan(opts.toolSpanId, s.name, s.durationMs, s.attributes, {
+      parentStartedAt: opts.toolStartedAt,
+      startOffsetMs: s.startOffsetMs,
+    });
+  }
+}
diff --git a/src/core/tool-spans.ts b/src/core/tool-spans.ts
index a9a4751..9b7789d 100644
--- a/src/core/tool-spans.ts
+++ b/src/core/tool-spans.ts
@@ -3,6 +3,7 @@ import type { ToolCall } from "../types.ts";
 import { getToolStatus } from "../ai/tool-status.ts";
 import { parseHuginnTrace } from "../ai/huginn-trace.ts";
 import { emitSearchTraceSpans } from "./search-trace-spans.ts";
+import { emitCorrectiveSpans } from "./corrective-trace-spans.ts";
 
 /**
  * Trace-marker-emitting MCP tools whose spans benefit from an env snapshot.
@@ -107,22 +108,39 @@ export async function attachToolSpans(
     if (captureOutputs && toolOutput !== undefined) {
       attrs.output = toolOutput;
     }
+    // CRAG-lite corrective-retrieval metadata, when the connector ran a
+    // grade-and-requery pass on this knowledge-search tool result.
+    if (tool.corrective !== undefined) attrs.corrective = tool.corrective;
+
     const toolSpanId = tracer.addChildSpan("claude", tool.displayName, tool.durationMs, attrs, tool.startOffsetMs);
 
+    const claudeStart = tracer.spanStartedAt("claude");
+    const toolStart = claudeStart
+      ? new Date(claudeStart.getTime() + (tool.startOffsetMs ?? 0))
+      : undefined;
+
     // If the tool call carries a v1 Huginn search trace, synthesize per-stage
     // child spans so the waterfall shows where the time went without the
     // operator having to expand the trace JSON.
-    if (attrs.searchTrace !== undefined) {
-      const claudeStart = tracer.spanStartedAt("claude");
-      if (claudeStart) {
-        const toolStart = new Date(claudeStart.getTime() + (tool.startOffsetMs ?? 0));
-        emitSearchTraceSpans({
-          tracer,
-          toolSpanId,
-          toolStartedAt: toolStart,
-          searchTrace: attrs.searchTrace,
-        });
-      }
+    if (attrs.searchTrace !== undefined && toolStart) {
+      emitSearchTraceSpans({
+        tracer,
+        toolSpanId,
+        toolStartedAt: toolStart,
+        searchTrace: attrs.searchTrace,
+      });
+    }
+
+    // If the connector ran a corrective pass, synthesize knowledge_grade /
+    // knowledge_requery child spans after the tool's own work.
+    if (tool.corrective !== undefined && toolStart) {
+      emitCorrectiveSpans({
+        tracer,
+        toolSpanId,
+        toolStartedAt: toolStart,
+        toolDurationMs: tool.durationMs,
+        corrective: tool.corrective,
+      });
     }
   }
 }
diff --git a/src/dashboard/views/components/span-label.ts b/src/dashboard/views/components/span-label.ts
index 7455fb5..3c18414 100644
--- a/src/dashboard/views/components/span-label.ts
+++ b/src/dashboard/views/components/span-label.ts
@@ -9,6 +9,12 @@ interface SpanLike {
     toolId?: unknown;
     input?: unknown;
     output?: unknown;
+    corrective?: {
+      retries?: unknown;
+      finalVerdict?: unknown;
+      verdicts?: unknown;
+      queriesTried?: unknown;
+    } | unknown;
     searchTrace?:
       | {
           collections?: Array<{
@@ -46,6 +52,11 @@ export function deriveSpanLabelHtml(span: SpanLike): { html: string; tooltip: st
     ? `<span class="wf-chip wf-verb wf-verb-${escAttr(verbClass)}">${escHtml(verb)}</span>`
     : '';
 
+  // Corrective-retrieval chip — present only on knowledge-search tool spans that
+  // went through a CRAG-lite grade/requery pass. Shows the final verdict and the
+  // retry count so a corrected search is visible at a glance.
+  const corr = correctiveChipFromAttrs(attrs.corrective);
+
   // Search-tool path: collection chips + counts chip, derived from searchTrace
   // or input.collection.
   let collections = collectionsFor(attrs);
@@ -74,24 +85,58 @@ export function deriveSpanLabelHtml(span: SpanLike): { html: string; tooltip: st
       if (summary.totalMs != null) tooltipLines.push("total: " + summary.totalMs + "ms");
       if (summary.lowConfidence) tooltipLines.push("⚠ low confidence");
     }
+    if (corr) tooltipLines.push(...corr.tooltipLines);
     return {
-      html: verbChip + firstChip + moreChip + countsChip,
+      html: verbChip + (corr ? corr.html : "") + firstChip + moreChip + countsChip,
       tooltip: tooltipLines.join("\n"),
     };
   }
 
   // Per-tool extras path: graph_node / symbol_context / list_files /
-  // read_source / search_pattern.
+  // read_source / search_pattern (also the knowledge-search fallback).
   const extras = toolLabelExtras(canonName, attrs);
   if (extras) {
     return {
-      html: verbChip + extras.chips,
-      tooltip: [span.name, ...extras.tooltipLines].join("\n"),
+      html: verbChip + (corr ? corr.html : "") + extras.chips,
+      tooltip: [span.name, ...(corr ? corr.tooltipLines : []), ...extras.tooltipLines].join("\n"),
+    };
+  }
+  if (corr) {
+    return {
+      html: verbChip + corr.html,
+      tooltip: [span.name ?? "", ...corr.tooltipLines].join("\n"),
     };
   }
   return null;
 }
 
+/** Build the corrective-retrieval chip from a tool span's `attributes.corrective`.
+ *  Returns null when the attribute is absent or malformed. Chip text is the
+ *  final verdict's symbol + retry count (e.g. `⟲1 ✓`); color reflects whether
+ *  the corrective pass left the result set usable. */
+function correctiveChipFromAttrs(raw: unknown): { html: string; tooltipLines: string[] } | null {
+  if (!raw || typeof raw !== "object") return null;
+  const c = raw as { retries?: unknown; finalVerdict?: unknown; verdicts?: unknown; queriesTried?: unknown };
+  const finalVerdict = typeof c.finalVerdict === "string" ? c.finalVerdict : undefined;
+  const verdicts = Array.isArray(c.verdicts) ? c.verdicts.map(String) : [];
+  if (!finalVerdict && verdicts.length === 0) return null;
+  const retries = typeof c.retries === "number" ? c.retries : 0;
+  const queries = Array.isArray(c.queriesTried) ? c.queriesTried.map(String) : [];
+
+  const cls =
+    finalVerdict === "correct" ? "wf-corrective wf-corrective-ok"
+      : finalVerdict === "ambiguous" ? "wf-corrective wf-corrective-warn"
+        : "wf-corrective wf-corrective-bad";
+  const sym = finalVerdict === "correct" ? "✓" : finalVerdict === "ambiguous" ? "≈" : "✗";
+  const text = retries > 0 ? `⟲${retries} ${sym}` : `grade ${sym}`;
+  const tip = `corrective retrieval: ${verdicts.join(" → ") || finalVerdict}` +
+    (queries.length ? `; re-queried: ${queries.map((q) => `"${q}"`).join(", ")}` : "; no re-query");
+  return {
+    html: `<span class="wf-chip ${cls}" title="${escAttr(tip)}">${escHtml(text)}</span>`,
+    tooltipLines: [tip],
+  };
+}
+
 interface ToolLabelExtras { chips: string; tooltipLines: string[]; }
 
 type ExtrasRecipe = {
diff --git a/src/dashboard/views/components/traces-waterfall.ts b/src/dashboard/views/components/traces-waterfall.ts
index 8d03a57..73e9e61 100644
--- a/src/dashboard/views/components/traces-waterfall.ts
+++ b/src/dashboard/views/components/traces-waterfall.ts
@@ -128,6 +128,24 @@ export function tracesWaterfallStyles(): string {
       color: var(--status-warning);
       border-color: color-mix(in srgb, var(--status-warning) 35%, transparent);
     }
+    /* Corrective-retrieval chip — marks a knowledge search that went through a
+       CRAG-lite grade/requery pass. Color = whether the result set ended usable. */
+    .wf-chip.wf-corrective { font-variant-numeric: tabular-nums; font-weight: 600; }
+    .wf-chip.wf-corrective-ok {
+      background: color-mix(in srgb, var(--status-ok, var(--status-cyan)) 14%, transparent);
+      color: var(--status-ok, var(--status-cyan));
+      border: 1px solid color-mix(in srgb, var(--status-ok, var(--status-cyan)) 35%, transparent);
+    }
+    .wf-chip.wf-corrective-warn {
+      background: color-mix(in srgb, var(--status-warning) 14%, transparent);
+      color: var(--status-warning);
+      border: 1px solid color-mix(in srgb, var(--status-warning) 35%, transparent);
+    }
+    .wf-chip.wf-corrective-bad {
+      background: color-mix(in srgb, var(--status-error, var(--status-magenta)) 14%, transparent);
+      color: var(--status-error, var(--status-magenta));
+      border: 1px solid color-mix(in srgb, var(--status-error, var(--status-magenta)) 35%, transparent);
+    }
     .waterfall-bar-container {
       position: relative;
       height: 16px;
diff --git a/src/types.ts b/src/types.ts
index 0f5beb7..08507ca 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -73,6 +73,32 @@ export interface ToolCall {
    * (404, timeout, network) — see {@link fetchHuginnTrace}.
    */
   searchTraceFetch?: Promise<unknown | null>;
+  /**
+   * CRAG-lite corrective-retrieval metadata, set when the copilot-sdk connector
+   * ran a grade-and-requery pass on this knowledge-search tool result. Drives
+   * the synthesized `knowledge_grade` / `knowledge_requery` waterfall spans.
+   * See src/ai/corrective-retrieval.ts.
+   */
+  corrective?: CorrectiveToolMeta;
+}
+
+export interface CorrectiveToolMeta {
+  /** Number of corrective re-queries actually issued (0–budget). */
+  retries: number;
+  /** Grader verdict from each grading pass, in order ("correct" | "ambiguous" | "insufficient"). */
+  verdicts: string[];
+  /** Grader reason per pass, parallel to `verdicts`. */
+  reasons: string[];
+  /** Re-query strings actually issued (excludes the original query). */
+  queriesTried: string[];
+  /** Collections each re-query was scoped to, parallel to `queriesTried`; `null` = all. */
+  collectionsTried?: (string[] | null)[];
+  /** Verdict from the final grading pass — whether the result set ended up usable. */
+  finalVerdict: string;
+  /** Total Haiku grader wall time across all passes, ms. */
+  graderMs?: number;
+  /** Wall time of each re-query HTTP call, parallel to `queriesTried`, ms. */
+  requeryMs?: number[];
 }
 
 export interface ClaudeResult {

From 1ed5046a6d3eb0e175dc47aad191dd5ce0fa7866 Mon Sep 17 00:00:00 2001
From: RuneLind <rulind@gmail.com>
Date: Tue, 12 May 2026 21:22:18 +0200
Subject: [PATCH 2/4] Add a no-model "signal" grader and make it the default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The awaiting Haiku grader (`claude` CLI per knowledge search, ~11s on a 12 KB
result prompt) is too slow for the hot path. Add a second grader mode and make
it the default:

- `"signal"` (default) — no model call. Reads the cheap signal Huginn already
  emits (a `*Weak match …*` / `*No confident match …*` footer, or a "No results
  found" body) and, when present, re-queries with the `broaderQuery` /
  `narrowerQuery` from Huginn's own `retryHints`. ~0ms for confident searches;
  ~one extra HTTP call when weak. A fully uneventful signal-mode check emits no
  trace span.
- `"haiku"` (opt-in via `correctiveRetrieval.grader: "haiku"` /
  `CORRECTIVE_RETRIEVAL_GRADER=haiku`) — the previous behaviour, but the result
  text is now digested down to the top hits' titles + bands + a short body
  prefix before being sent to Haiku, so it's ~3–5s instead of ~11s.

Also: on a corrective merge, the now-obsolete `*Weak match — try: …*` footer is
stripped from the prior result before the fresh hits are spliced in (keeps the
model's context clean and stops signal-mode re-grading from re-detecting an
already-handled weak signal). `corrective` metadata + the `knowledge_grade`
span gain a `graderMode` / `mode` field.

Tests updated for the new shape; signal-grader paths covered (confident → no-op,
weak footer → re-query with hint, related-terms-only → no re-query, budget 2
doesn't loop, "No results" body); Haiku digest covered.
---
 .env.example                              |   1 +
 CLAUDE.md                                 |  19 ++--
 src/ai/CLAUDE.md                          |   8 +-
 src/ai/connectors/copilot-sdk.ts          |  34 ++++--
 src/ai/connectors/corrective-hook.test.ts | 111 +++++++++++++++++-
 src/ai/corrective-config.test.ts          |  45 +++++---
 src/ai/corrective-config.ts               |  25 ++++-
 src/ai/corrective-retrieval.test.ts       |  94 ++++++++++++++++
 src/ai/corrective-retrieval.ts            |  86 ++++++++------
 src/ai/knowledge-grader.test.ts           |  56 ++++++++-
 src/ai/knowledge-grader.ts                | 131 ++++++++++++++++------
 src/ai/knowledge-search-client.ts         |  13 +++
 src/bots/config.ts                        |   5 +
 src/core/corrective-trace-spans.test.ts   |  19 ++--
 src/core/corrective-trace-spans.ts        |   2 +-
 src/types.ts                              |   4 +-
 16 files changed, 533 insertions(+), 120 deletions(-)

diff --git a/.env.example b/.env.example
index cc5d2c4..d50c288 100644
--- a/.env.example
+++ b/.env.example
@@ -39,4 +39,5 @@ WHISPER_MODEL_PATH=./models/ggml-base.en.bin
 # default. Opt in per-bot via config.json `correctiveRetrieval`, or globally here.
 # CORRECTIVE_RETRIEVAL_ENABLED=true
 # CORRECTIVE_RETRIEVAL_BUDGET=1          # max corrective re-queries per search (1–2)
+# CORRECTIVE_RETRIEVAL_GRADER=signal     # result judge: "signal" (no model call) or "haiku" (~3–5s/search)
 # CORRECTIVE_RETRIEVAL_DISABLED=1        # hard kill-switch — overrides per-bot config
diff --git a/CLAUDE.md b/CLAUDE.md
index ca9e53e..c6bd3f6 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -217,7 +217,7 @@ All fields are optional — falls back to global `.env` values:
 | `showWaterfall` | boolean | `true` | Show request progress waterfall overlay in web chat |
 | `contextWindow` | number | — | Context window size in tokens (e.g. `32768`). Shown as usage in web chat and percentage in Telegram footer |
 | `prompts` | object | — | Configurable prompts: `jiraAnalysis` (Jira research instruction, content appended automatically), `investigateCode` (follow-up code investigation prompt) |
-| `correctiveRetrieval` | object | off | CRAG-lite corrective loop around the knowledge search tool — `{ enabled?: boolean, retryBudget?: 1\|2 }`. Only the `copilot-sdk` connector honours it; off by default. See "Corrective retrieval" below. |
+| `correctiveRetrieval` | object | off | CRAG-lite corrective loop around the knowledge search tool — `{ enabled?: boolean, retryBudget?: 1\|2, grader?: "signal"\|"haiku" }`. Only the `copilot-sdk` connector honours it; off by default. `grader` defaults to `"signal"` (no model call). See "Corrective retrieval" below. |
 
 ### Database
 
@@ -256,6 +256,7 @@ PostgreSQL + pgvector via Docker (single container).
 | `LOG_DIR` | No | `./logs` | Log file directory (set `none` to disable file logging) |
 | `CORRECTIVE_RETRIEVAL_ENABLED` | No | `false` | Global default for the CRAG-lite corrective loop (per-bot `correctiveRetrieval.enabled` overrides) |
 | `CORRECTIVE_RETRIEVAL_BUDGET` | No | `1` | Default max corrective re-queries per knowledge search (clamped to 1–2) |
+| `CORRECTIVE_RETRIEVAL_GRADER` | No | `signal` | Default result-quality judge: `signal` (no model call) or `haiku` (slimmed awaiting Haiku call, ~3–5s/search) |
 | `CORRECTIVE_RETRIEVAL_DISABLED` | No | — | Set to `1` to hard-disable corrective retrieval everywhere, regardless of per-bot config |
 | `GOAL_CHECK_INTERVAL_MS` | No | — | Legacy alias for `SCHEDULER_INTERVAL_MS` |
 | `GOAL_CHECK_ENABLED` | No | — | Legacy alias for `SCHEDULER_ENABLED` |
@@ -358,13 +359,17 @@ uvx --from "git+https://github.com/oraios/serena" serena project index /path/to/
 
 ## Corrective Retrieval (CRAG-lite)
 
-A CRAG-style "grade the search results, re-query if they're weak" loop wrapped around Huginn's `search_knowledge` MCP tool. **Off by default**; enable per-bot in `config.json` (`"correctiveRetrieval": { "enabled": true, "retryBudget": 1 }`), globally via `CORRECTIVE_RETRIEVAL_ENABLED=true`, or hard-disable everywhere with `CORRECTIVE_RETRIEVAL_DISABLED=1`.
+A CRAG-style "judge the search results, re-query if they're weak" loop wrapped around Huginn's `search_knowledge` MCP tool. **Off by default**; enable per-bot in `config.json` (`"correctiveRetrieval": { "enabled": true, "retryBudget": 1, "grader": "signal" }`), globally via `CORRECTIVE_RETRIEVAL_ENABLED=true`, or hard-disable everywhere with `CORRECTIVE_RETRIEVAL_DISABLED=1`.
 
-How it works (copilot-sdk connector only):
-1. The connector registers a Copilot SDK `onPostToolUse` hook. When a bot calls `search_knowledge`, the hook intercepts the result before the model sees it.
-2. `src/ai/knowledge-grader.ts` — a dedicated **awaiting** Haiku call grades the results (`correct` / `ambiguous` / `insufficient`) and, if weak, proposes a rewritten query and/or a better collection. Fail-soft: any Haiku error → `correct` (no change).
-3. `src/ai/corrective-retrieval.ts` — if not `correct` and the retry budget (1, configurable to 2) isn't spent, re-queries Huginn's `/api/search` (`src/ai/knowledge-search-client.ts`) with `rerank=true`, merges the fresh hits into the original result text (deduped by `collection/doc_id` parsed from the rendered output), and appends an inline note. Never recursive.
-4. Traces: `knowledge_grade` + `knowledge_requery` spans synthesized under the tool span (`src/core/corrective-trace-spans.ts`), rendered in the dashboard waterfall with a corrective chip on the parent tool span.
+How it works (copilot-sdk connector only): the connector registers a Copilot SDK `onPostToolUse` hook that intercepts each `search_knowledge` result before the model sees it (`src/ai/connectors/copilot-sdk.ts` → `applyCorrectiveRetrieval` → `runCorrectiveRetrieval`).
+
+**Two grader modes** (`src/ai/knowledge-grader.ts`):
+- `"signal"` (**default — no model call, ~0ms for confident searches**): reads the cheap signal Huginn already emits — a `*Weak match …*` / `*No confident match …*` footer or a "No results found" body — and, when present, re-queries with the `broaderQuery` / `narrowerQuery` from Huginn's own `retryHints` (parsed from that footer). Most searches add zero latency; a weak one costs ~one extra HTTP call.
+- `"haiku"` (opt-in, `grader: "haiku"`): a *slimmed* **awaiting** Haiku call that also reads the result snippets and can propose a semantic rewrite / a better collection. ~3–5s per search (the result text is digested to titles + bands + a taste of each hit before being sent), so it's not the default. Fail-soft: any Haiku error → `correct` (no change).
+
+On a non-`correct` verdict, `src/ai/corrective-retrieval.ts` re-queries Huginn's `/api/search` (`src/ai/knowledge-search-client.ts`) with `rerank=true`, merges the fresh hits into the original result text (deduped by `collection/doc_id` parsed from the rendered output; the now-obsolete `*Weak match*` footer is stripped), and appends an inline note. Retry budget 1 (configurable to 2); never recursive.
+
+Traces: `knowledge_grade` (attrs include `mode`, `verdicts`, `finalVerdict`) + `knowledge_requery` spans synthesized under the tool span (`src/core/corrective-trace-spans.ts`), rendered in the dashboard waterfall with a corrective chip on the parent tool span. A fully uneventful signal-mode check (confident, no re-query) emits no span.
 
 **Connector asymmetry:** Claude-CLI bots run the MCP tool inside their own process, so the result can't be intercepted — they get nothing here (Phase 3 will add prompt-level corrective guidance instead). When the toggle is off, the hook isn't registered and behaviour is byte-identical to before.
 
diff --git a/src/ai/CLAUDE.md b/src/ai/CLAUDE.md
index 04b2d42..515ab9a 100644
--- a/src/ai/CLAUDE.md
+++ b/src/ai/CLAUDE.md
@@ -16,10 +16,10 @@
 | `haiku-extraction.ts` | Shared Haiku executor for async extraction tasks (memories, goals, tasks) |
 | `huginn-trace.ts` | Inline-fence Huginn trace handling (legacy mode) — `parseHuginnTrace`, `extractMcpResultText`, oversized-CLI-divert recovery |
 | `huginn-trace-pointer.ts` | Phase 2 out-of-band trace channel — parses `huginn-trace-url:` line and fetches the trace from Huginn's `/api/trace/<id>` endpoint. Preferred when `HUGINN_TRACE_POINTER=1` is set on Huginn. Also exports `processMcpToolResult()` — the unwrap → peel → fetch pipeline connectors run on every tool result — and `peelTraceMarkerForRewrite()` for connectors that rewrite a tool result and need to re-append the trace marker |
-| `knowledge-grader.ts` | CRAG-lite retrieval evaluator — an awaiting Haiku call that grades knowledge-search results (`correct`/`ambiguous`/`insufficient`) and proposes a rewritten query/collection. Fail-soft to `correct`. |
-| `corrective-retrieval.ts` | Corrective grade-and-requery orchestrator — `runCorrectiveRetrieval()`: grade → bounded re-query Huginn → merge+dedupe → consolidated text + `corrective` metadata. ≤1 retry (configurable to 2), non-recursive. |
-| `knowledge-search-client.ts` | HTTP client for Huginn's `/api/search` + a renderer mirroring the MCP adapter's result format, used by the corrective re-query path. |
-| `corrective-config.ts` | Resolves the per-bot corrective-retrieval toggle + retry budget (kill-switch > per-bot config.json > global env defaults). |
+| `knowledge-grader.ts` | CRAG-lite retrieval evaluators — `gradeFromSignal()` (default, no model call: reads Huginn's `*Weak match*` / "No results" signal) and `gradeKnowledgeResults()` (opt-in: a *slimmed* awaiting Haiku call that also reads snippets and can propose a rewrite). Both fail-soft to `correct`. |
+| `corrective-retrieval.ts` | Corrective grade-and-requery orchestrator — `runCorrectiveRetrieval()`: grade (signal or haiku) → bounded re-query Huginn → merge+dedupe → consolidated text + `corrective` metadata. ≤1 retry (configurable to 2), non-recursive. |
+| `knowledge-search-client.ts` | HTTP client for Huginn's `/api/search` + a renderer mirroring the MCP adapter's result format + footer/doc-id parsers, used by the corrective re-query path. |
+| `corrective-config.ts` | Resolves the per-bot corrective-retrieval toggle + retry budget + grader mode (kill-switch > per-bot config.json > global env defaults; grader defaults to `"signal"`). |
 | `connectors/` | Three connector implementations (see below) |
 
 ## Connector Abstraction
diff --git a/src/ai/connectors/copilot-sdk.ts b/src/ai/connectors/copilot-sdk.ts
index 72fe97d..247bf34 100644
--- a/src/ai/connectors/copilot-sdk.ts
+++ b/src/ai/connectors/copilot-sdk.ts
@@ -84,10 +84,12 @@ export async function executePrompt(
   const customAgents = buildCustomAgents(botConfig);
 
   // Corrective retrieval (CRAG-lite): when enabled for this bot, an onPostToolUse
-  // hook grades each knowledge-search result with Haiku and, if it's weak, does a
-  // bounded re-query — splicing the fresh hits into the result before the model
-  // sees it. Off by default (see src/ai/corrective-config.ts); when off, the hook
-  // isn't registered at all and behaviour is byte-identical to before.
+  // hook judges each knowledge-search result and, if it's weak, does a bounded
+  // re-query — splicing the fresh hits into the result before the model sees it.
+  // Default judge is `"signal"` (no model call — re-query only when Huginn
+  // already flags the result weak, using Huginn's `retryHints`); `"haiku"` is
+  // an opt-in slower/smarter alternative. Off by default (see corrective-config.ts);
+  // when off, the hook isn't registered and behaviour is byte-identical to before.
   const correctiveCfg = resolveCorrectiveConfig(botConfig);
   const correctiveOutcomes: CorrectiveMetadata[] = [];
   const correctiveEnabled = correctiveCfg.enabled && hasMcp;
@@ -103,6 +105,7 @@ export async function executePrompt(
               toolResult: input.toolResult,
               botConfig,
               budget: correctiveCfg.retryBudget,
+              grader: correctiveCfg.grader,
               userQuestion,
             });
             if (result) {
@@ -387,6 +390,7 @@ export interface ApplyCorrectiveArgs {
   toolResult: ToolResultObject;
   botConfig: Pick<BotConfig, "name" | "dir">;
   budget: number;
+  grader?: CorrectiveRetrievalContext["grader"];
   userQuestion: string;
   /** Injectable for tests — forwarded to {@link runCorrectiveRetrieval}. */
   searchFn?: CorrectiveRetrievalContext["searchFn"];
@@ -395,11 +399,12 @@ export interface ApplyCorrectiveArgs {
 
 /**
  * Run the corrective grade-and-requery pass on a knowledge-search tool result.
- * Returns `null` when there's nothing to act on (empty result, tool error);
- * otherwise always returns the `metadata` (for tracing) and, when results were
- * merged in, a `modifiedResult` to hand back to the model. The trailing Huginn
- * trace marker, if any, is peeled off the body before splicing and re-appended
- * after, so downstream trace extraction is unaffected.
+ * Returns `null` when there's nothing to act on (empty result, tool error, or a
+ * fully uneventful signal-mode check — judged confident, no re-query — which
+ * isn't worth a trace span); otherwise returns the `metadata` (for tracing) and,
+ * when results were merged in, a `modifiedResult` to hand back to the model. The
+ * trailing Huginn trace marker, if any, is peeled off the body before splicing
+ * and re-appended after, so downstream trace extraction is unaffected.
  */
 export async function applyCorrectiveRetrieval(
   args: ApplyCorrectiveArgs,
@@ -423,6 +428,7 @@ export async function applyCorrectiveRetrieval(
     originalCollections,
     originalResultText: body,
     budget,
+    grader: args.grader,
     botName: botConfig.name,
     cwd: botConfig.dir,
     log,
@@ -431,7 +437,14 @@ export async function applyCorrectiveRetrieval(
     gradeFn: args.gradeFn,
   });
 
-  if (!outcome.changed) return { metadata: outcome.metadata };
+  if (!outcome.changed) {
+    // A signal-mode check that found nothing wrong is a free no-op — don't
+    // clutter the trace with a span for every confident search. A Haiku-mode
+    // check, or any pass that graded something non-"correct", is worth recording.
+    const uneventful =
+      outcome.metadata.graderMode === "signal" && outcome.metadata.verdicts.every((v) => v === "correct");
+    return uneventful ? null : { metadata: outcome.metadata };
+  }
 
   return {
     metadata: outcome.metadata,
@@ -480,6 +493,7 @@ function correctiveMetaToToolMeta(m: CorrectiveMetadata): CorrectiveToolMeta {
     queriesTried: m.queriesTried,
     collectionsTried: m.collectionsTried.map((c) => c ?? null),
     finalVerdict: m.finalVerdict,
+    graderMode: m.graderMode,
     graderMs: m.graderMs,
     requeryMs: m.requeryMs,
   };
diff --git a/src/ai/connectors/corrective-hook.test.ts b/src/ai/connectors/corrective-hook.test.ts
index 56b66c1..eaefc64 100644
--- a/src/ai/connectors/corrective-hook.test.ts
+++ b/src/ai/connectors/corrective-hook.test.ts
@@ -26,13 +26,24 @@ describe("attachCorrectiveOutcomes", () => {
     return { id: name, name, displayName: name, durationMs: 1, startOffsetMs: 0 };
   }
   function meta(finalVerdict: string): CorrectiveMetadata {
-    return { retries: 1, verdicts: ["insufficient", finalVerdict] as KnowledgeGrade["verdict"][], reasons: ["x", "y"], queriesTried: ["q"], collectionsTried: [undefined], finalVerdict: finalVerdict as KnowledgeGrade["verdict"], graderMs: 100, requeryMs: [50] };
+    return {
+      retries: 1,
+      verdicts: ["insufficient", finalVerdict] as KnowledgeGrade["verdict"][],
+      reasons: ["x", "y"],
+      queriesTried: ["q"],
+      collectionsTried: [undefined],
+      finalVerdict: finalVerdict as KnowledgeGrade["verdict"],
+      graderMode: "signal",
+      graderMs: 0,
+      requeryMs: [50],
+    };
   }
 
   test("maps the i-th outcome to the i-th knowledge-search tool call, skipping others", () => {
     const calls = [tc("knowledge-search_knowledge"), tc("yggdrasil-symbol_context"), tc("knowledge-search_knowledge")];
     attachCorrectiveOutcomes(calls, [meta("correct"), meta("ambiguous")]);
     expect(calls[0]!.corrective?.finalVerdict).toBe("correct");
+    expect(calls[0]!.corrective?.graderMode).toBe("signal");
     expect(calls[1]!.corrective).toBeUndefined();
     expect(calls[2]!.corrective?.finalVerdict).toBe("ambiguous");
     expect(calls[2]!.corrective?.collectionsTried).toEqual([null]);
@@ -45,7 +56,7 @@ describe("attachCorrectiveOutcomes", () => {
   });
 });
 
-describe("applyCorrectiveRetrieval", () => {
+describe("applyCorrectiveRetrieval — haiku mode", () => {
   const botConfig = { name: "test", dir: "/tmp/test-bot" };
   const okGrade: KnowledgeGrade = { verdict: "correct", reason: "covered" };
 
@@ -61,6 +72,7 @@ describe("applyCorrectiveRetrieval", () => {
       toolResult: { textResultForLlm: "Knowledge API server is not running", resultType: "failure" },
       botConfig,
       budget: 1,
+      grader: "haiku",
       userQuestion: "q",
       gradeFn: grader(okGrade),
       searchFn: async () => ({ results: [] }),
@@ -75,6 +87,7 @@ describe("applyCorrectiveRetrieval", () => {
       toolResult: { textResultForLlm: "", resultType: "success" },
       botConfig,
       budget: 1,
+      grader: "haiku",
       userQuestion: "q",
       gradeFn: grader(okGrade),
       searchFn: async () => ({ results: [] }),
@@ -82,13 +95,14 @@ describe("applyCorrectiveRetrieval", () => {
     expect(out).toBeNull();
   });
 
-  test("verdict 'correct' → metadata only, no modifiedResult", async () => {
+  test("verdict 'correct' → metadata only, no modifiedResult (haiku still recorded)", async () => {
     const out = await applyCorrectiveRetrieval({
       toolName: "knowledge-search_knowledge",
       toolArgs: { query: "x", collection: "wiki" },
       toolResult: { textResultForLlm: "## Doc (80% relevant · high)\ncollection: `wiki` doc_id: `1`\n\nbody", resultType: "success" },
       botConfig,
       budget: 1,
+      grader: "haiku",
       userQuestion: "q",
       gradeFn: grader(okGrade),
       searchFn: async () => ({ results: [] }),
@@ -96,6 +110,7 @@ describe("applyCorrectiveRetrieval", () => {
     expect(out).not.toBeNull();
     expect(out!.modifiedResult).toBeUndefined();
     expect(out!.metadata.retries).toBe(0);
+    expect(out!.metadata.graderMode).toBe("haiku");
   });
 
   test("low-confidence result → exactly one re-query, merged, trace fence preserved at the end", async () => {
@@ -108,6 +123,7 @@ describe("applyCorrectiveRetrieval", () => {
       toolResult: { textResultForLlm: original, resultType: "success" },
       botConfig,
       budget: 1,
+      grader: "haiku",
       userQuestion: "what SEDs belong to LA_BUC_02?",
       gradeFn: grader(
         { verdict: "insufficient", rewrittenQuery: "LA_BUC_02 structured electronic documents", reason: "off-topic" },
@@ -137,3 +153,92 @@ describe("applyCorrectiveRetrieval", () => {
     expect(out!.metadata.queriesTried).toEqual(["LA_BUC_02 structured electronic documents"]);
   });
 });
+
+describe("applyCorrectiveRetrieval — signal mode (default)", () => {
+  const botConfig = { name: "test", dir: "/tmp/test-bot" };
+
+  test("confident search (no weak footer) → returns null (uneventful free check, no span)", async () => {
+    let searchCalls = 0;
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "x", collection: "wiki" },
+      toolResult: { textResultForLlm: "## Doc (80% relevant · high)\ncollection: `wiki` doc_id: `1`\n\nbody", resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "q",
+      searchFn: async () => { searchCalls++; return { results: [] }; },
+    });
+    expect(out).toBeNull();
+    expect(searchCalls).toBe(0); // no grader call, no re-query
+  });
+
+  test("Huginn-flagged weak result → re-queries with the footer hint, merges", async () => {
+    const original =
+      "## Marginal hit (12% relevant · low)\ncollection: `wiki` doc_id: `1`\n\nmeh\n\n*Weak match — try: broader query: \"LA_BUC concepts\"*";
+    let seenQuery = "";
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "LA_BUC_02 obscure phrasing", collection: "wiki" },
+      toolResult: { textResultForLlm: original, resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "what about LA_BUC_02?",
+      searchFn: async (query: string) => {
+        seenQuery = query;
+        return {
+          results: [{ collection: "wiki", id: "2", title: "Wider hit", relevance: 0.6, confidenceBand: "medium", matchedChunks: [{ content: "useful" }] }],
+        };
+      },
+    });
+    expect(seenQuery).toBe("LA_BUC concepts");
+    expect(out).not.toBeNull();
+    expect(out!.modifiedResult).toBeDefined();
+    const text = out!.modifiedResult!.textResultForLlm;
+    expect(text).toContain("Wider hit");
+    expect(text).toContain("[corrective retrieval — re-query #1");
+    expect(text).not.toContain("Weak match — try"); // obsolete footer stripped
+    expect(out!.metadata.graderMode).toBe("signal");
+    expect(out!.metadata.verdicts).toEqual(["insufficient", "correct"]);
+    expect(out!.metadata.queriesTried).toEqual(["LA_BUC concepts"]);
+  });
+
+  test("weak result but no usable hint → metadata recorded, no re-query, not null", async () => {
+    const original =
+      "## Marginal hit (12% relevant · low)\ncollection: `wiki` doc_id: `1`\n\nmeh\n\n*Weak match — try: related terms: foo, bar*";
+    let searchCalls = 0;
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "x", collection: "wiki" },
+      toolResult: { textResultForLlm: original, resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "q",
+      searchFn: async () => { searchCalls++; return { results: [] }; },
+    });
+    expect(searchCalls).toBe(0);
+    expect(out).not.toBeNull();
+    expect(out!.modifiedResult).toBeUndefined();
+    expect(out!.metadata.retries).toBe(0);
+    expect(out!.metadata.verdicts).toEqual(["insufficient"]);
+  });
+
+  test("'No results found' body → re-queries with the footer hint if present", async () => {
+    const original = "No results found for 'xyz'.\n\n*No confident match — try: narrower query: \"xyz precise term\"*";
+    let seenQuery = "";
+    const out = await applyCorrectiveRetrieval({
+      toolName: "knowledge-search_knowledge",
+      toolArgs: { query: "xyz", collection: "wiki" },
+      toolResult: { textResultForLlm: original, resultType: "success" },
+      botConfig,
+      budget: 1,
+      userQuestion: "q",
+      searchFn: async (query: string) => {
+        seenQuery = query;
+        return { results: [{ collection: "wiki", id: "9", title: "Found it", relevance: 0.7, confidenceBand: "high", matchedChunks: [{ content: "yes" }] }] };
+      },
+    });
+    expect(seenQuery).toBe("xyz precise term");
+    expect(out!.modifiedResult).toBeDefined();
+    expect(out!.modifiedResult!.textResultForLlm).toContain("Found it");
+  });
+});
diff --git a/src/ai/corrective-config.test.ts b/src/ai/corrective-config.test.ts
index b3996fa..8acde74 100644
--- a/src/ai/corrective-config.test.ts
+++ b/src/ai/corrective-config.test.ts
@@ -1,5 +1,5 @@
 import { test, expect, describe } from "bun:test";
-import { resolveCorrectiveConfig, clampBudget } from "./corrective-config.ts";
+import { resolveCorrectiveConfig, clampBudget, normalizeGraderMode } from "./corrective-config.ts";
 
 describe("clampBudget", () => {
   test("clamps to the 1–2 range and floors", () => {
@@ -12,37 +12,56 @@ describe("clampBudget", () => {
   });
 });
 
+describe("normalizeGraderMode", () => {
+  test("only 'haiku' opts into the model grader; everything else is 'signal'", () => {
+    expect(normalizeGraderMode("haiku")).toBe("haiku");
+    expect(normalizeGraderMode("signal")).toBe("signal");
+    expect(normalizeGraderMode(undefined)).toBe("signal");
+    expect(normalizeGraderMode("nonsense")).toBe("signal");
+  });
+});
+
 describe("resolveCorrectiveConfig", () => {
-  test("off by default when nothing is configured", () => {
-    expect(resolveCorrectiveConfig({}, {})).toEqual({ enabled: false, retryBudget: 1 });
+  test("off, budget 1, signal grader by default", () => {
+    expect(resolveCorrectiveConfig({}, {})).toEqual({ enabled: false, retryBudget: 1, grader: "signal" });
   });
 
-  test("per-bot config enables it and clamps the budget", () => {
-    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: true, retryBudget: 9 } }, {})).toEqual({
+  test("per-bot config enables it, clamps the budget, and selects the grader", () => {
+    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: true, retryBudget: 9, grader: "haiku" } }, {})).toEqual({
       enabled: true,
       retryBudget: 2,
+      grader: "haiku",
     });
   });
 
-  test("global env default enables it when the bot doesn't say otherwise", () => {
-    const env = { CORRECTIVE_RETRIEVAL_ENABLED: "true", CORRECTIVE_RETRIEVAL_BUDGET: "2" };
-    expect(resolveCorrectiveConfig({}, env)).toEqual({ enabled: true, retryBudget: 2 });
+  test("global env defaults apply when the bot doesn't say otherwise", () => {
+    const env = { CORRECTIVE_RETRIEVAL_ENABLED: "true", CORRECTIVE_RETRIEVAL_BUDGET: "2", CORRECTIVE_RETRIEVAL_GRADER: "haiku" };
+    expect(resolveCorrectiveConfig({}, env)).toEqual({ enabled: true, retryBudget: 2, grader: "haiku" });
   });
 
   test("per-bot config overrides the global default (disable wins too)", () => {
-    const env = { CORRECTIVE_RETRIEVAL_ENABLED: "true" };
-    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: false } }, env).enabled).toBe(false);
+    const env = { CORRECTIVE_RETRIEVAL_ENABLED: "true", CORRECTIVE_RETRIEVAL_GRADER: "haiku" };
+    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: false, grader: "signal" } }, env)).toEqual({
+      enabled: false,
+      retryBudget: 1,
+      grader: "signal",
+    });
   });
 
   test("kill-switch overrides everything", () => {
     const env = { CORRECTIVE_RETRIEVAL_DISABLED: "1", CORRECTIVE_RETRIEVAL_ENABLED: "true" };
-    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: true, retryBudget: 2 } }, env)).toEqual({
+    expect(resolveCorrectiveConfig({ correctiveRetrieval: { enabled: true, retryBudget: 2, grader: "haiku" } }, env)).toEqual({
       enabled: false,
       retryBudget: 1,
+      grader: "signal",
     });
   });
 
-  test("a bare global enable defaults the budget to 1", () => {
-    expect(resolveCorrectiveConfig({}, { CORRECTIVE_RETRIEVAL_ENABLED: "true" })).toEqual({ enabled: true, retryBudget: 1 });
+  test("a bare global enable defaults the budget to 1 and the grader to signal", () => {
+    expect(resolveCorrectiveConfig({}, { CORRECTIVE_RETRIEVAL_ENABLED: "true" })).toEqual({
+      enabled: true,
+      retryBudget: 1,
+      grader: "signal",
+    });
   });
 });
diff --git a/src/ai/corrective-config.ts b/src/ai/corrective-config.ts
index 765cf1b..8b5b14b 100644
--- a/src/ai/corrective-config.ts
+++ b/src/ai/corrective-config.ts
@@ -4,17 +4,25 @@ import type { BotConfig } from "../bots/config.ts";
  * Resolved per-bot corrective-retrieval settings. Precedence:
  *   1. `CORRECTIVE_RETRIEVAL_DISABLED=1` (hard kill-switch) → always off.
  *   2. The bot's `config.json` `correctiveRetrieval` block.
- *   3. The global env defaults (`CORRECTIVE_RETRIEVAL_ENABLED` /
- *      `CORRECTIVE_RETRIEVAL_BUDGET`).
- * `retryBudget` is clamped to 1–2 regardless of source.
+ *   3. The global env defaults (`CORRECTIVE_RETRIEVAL_ENABLED`,
+ *      `CORRECTIVE_RETRIEVAL_BUDGET`, `CORRECTIVE_RETRIEVAL_GRADER`).
+ *
+ * `retryBudget` is clamped to 1–2. `grader` is `"signal"` (default — no model
+ * call: re-query only when Huginn already flags the result weak, using Huginn's
+ * own `retryHints`) or `"haiku"` (a slimmed awaiting Haiku call that can also
+ * propose a semantic rewrite — costs ~3–5s per search, so opt-in only).
  *
  * Reads `process.env` directly (rather than going through `loadConfig()`) so it
  * has no hard `DATABASE_URL` dependency and behaves the same in tests.
  */
+export type GraderMode = "signal" | "haiku";
+
 export interface ResolvedCorrectiveConfig {
   enabled: boolean;
   /** Max corrective re-queries per knowledge search (1 or 2). */
   retryBudget: number;
+  /** How the result quality is judged before a re-query. */
+  grader: GraderMode;
 }
 
 export function resolveCorrectiveConfig(
@@ -22,7 +30,7 @@ export function resolveCorrectiveConfig(
   env: NodeJS.ProcessEnv = process.env,
 ): ResolvedCorrectiveConfig {
   if (env.CORRECTIVE_RETRIEVAL_DISABLED === "1") {
-    return { enabled: false, retryBudget: 1 };
+    return { enabled: false, retryBudget: 1, grader: "signal" };
   }
 
   const bot = botConfig.correctiveRetrieval;
@@ -31,7 +39,10 @@ export function resolveCorrectiveConfig(
 
   const globalBudget = parseBudgetEnv(env.CORRECTIVE_RETRIEVAL_BUDGET);
   const rawBudget = bot?.retryBudget ?? globalBudget ?? 1;
-  return { enabled, retryBudget: clampBudget(rawBudget) };
+
+  const grader = normalizeGraderMode(bot?.grader ?? env.CORRECTIVE_RETRIEVAL_GRADER);
+
+  return { enabled, retryBudget: clampBudget(rawBudget), grader };
 }
 
 export function clampBudget(n: number): number {
@@ -39,6 +50,10 @@ export function clampBudget(n: number): number {
   return Math.max(1, Math.min(2, Math.floor(n)));
 }
 
+export function normalizeGraderMode(raw: string | undefined): GraderMode {
+  return raw === "haiku" ? "haiku" : "signal";
+}
+
 function parseBudgetEnv(raw: string | undefined): number | undefined {
   if (!raw) return undefined;
   const n = parseInt(raw, 10);
diff --git a/src/ai/corrective-retrieval.test.ts b/src/ai/corrective-retrieval.test.ts
index 3501f84..3820370 100644
--- a/src/ai/corrective-retrieval.test.ts
+++ b/src/ai/corrective-retrieval.test.ts
@@ -39,10 +39,14 @@ function searchSequence(...responses: KnowledgeSearchResponse[]) {
   return Object.assign(fn, { calls });
 }
 
+// Most of these exercise the grader-agnostic loop logic (retry / merge / dedupe
+// / budget), driven through the opt-in Haiku grader with an injected fake. The
+// "signal grader" describe block at the bottom covers the default (no-model) path.
 const baseCtx = {
   question: "what SEDs belong to LA_BUC_02?",
   originalQuery: "LA_BUC_02",
   botName: "test",
+  grader: "haiku" as const,
   log,
 };
 
@@ -222,4 +226,94 @@ describe("runCorrectiveRetrieval", () => {
     expect(out.metadata.retries).toBe(0);
     expect(search.calls.length).toBe(0);
   });
+
+  test("records graderMode in the metadata", async () => {
+    const out = await runCorrectiveRetrieval({
+      ...baseCtx,
+      originalResultText: "## Original\ncollection: `wiki` doc_id: `1`",
+      budget: 1,
+      gradeFn: gradeSequence({ verdict: "correct", reason: "ok" }),
+      searchFn: searchSequence(searchResponse([result({ id: "1", collection: "wiki" })])),
+    });
+    expect(out.metadata.graderMode).toBe("haiku");
+  });
+});
+
+describe("runCorrectiveRetrieval — signal grader (default, no model call)", () => {
+  const signalCtx = {
+    question: "what about LA_BUC_02?",
+    originalQuery: "LA_BUC_02 obscure phrasing",
+    botName: "test",
+    log,
+    // grader omitted → defaults to "signal"
+  };
+
+  test("confident result (no weak footer) → no grade-driven re-query, gradeFn never consulted", async () => {
+    const search = searchSequence(searchResponse([result({ id: "z", collection: "wiki" })]));
+    let graded = false;
+    const out = await runCorrectiveRetrieval({
+      ...signalCtx,
+      originalResultText: renderSearchResults([result({ id: "1", collection: "wiki" })]),
+      budget: 1,
+      gradeFn: async () => { graded = true; return { verdict: "insufficient", reason: "x" }; },
+      searchFn: search,
+    });
+    expect(graded).toBe(false);
+    expect(out.changed).toBe(false);
+    expect(out.metadata.graderMode).toBe("signal");
+    expect(out.metadata.verdicts).toEqual(["correct"]);
+    expect(search.calls.length).toBe(0);
+  });
+
+  test("Huginn 'Weak match' footer → re-queries with the broaderQuery hint, merges", async () => {
+    const original =
+      renderSearchResults([result({ id: "1", collection: "wiki", title: "Marginal" })]) +
+      '\n\n*Weak match — try: broader query: "LA_BUC concepts"*';
+    const search = searchSequence(searchResponse([result({ id: "2", collection: "wiki", title: "Wider hit" })]));
+    const out = await runCorrectiveRetrieval({
+      ...signalCtx,
+      originalResultText: original,
+      budget: 1,
+      searchFn: search,
+    });
+    expect(search.calls[0]?.query).toBe("LA_BUC concepts");
+    expect(out.changed).toBe(true);
+    expect(out.text).toContain("Wider hit");
+    expect(out.text).not.toContain("Weak match — try"); // obsolete footer stripped on merge
+    expect(out.metadata.graderMode).toBe("signal");
+    expect(out.metadata.verdicts).toEqual(["insufficient", "correct"]);
+    expect(out.metadata.queriesTried).toEqual(["LA_BUC concepts"]);
+    expect(out.metadata.graderMs).toBeLessThan(50); // ≈0 — no model call
+  });
+
+  test("weak footer with only related terms (no broader/narrower) → no re-query", async () => {
+    const original =
+      renderSearchResults([result({ id: "1", collection: "wiki" })]) + "\n\n*Weak match — try: related terms: foo, bar*";
+    const search = searchSequence(searchResponse([result({ id: "2", collection: "wiki" })]));
+    const out = await runCorrectiveRetrieval({
+      ...signalCtx,
+      originalResultText: original,
+      budget: 1,
+      searchFn: search,
+    });
+    expect(search.calls.length).toBe(0);
+    expect(out.changed).toBe(false);
+    expect(out.metadata.verdicts).toEqual(["insufficient"]);
+    expect(out.metadata.retries).toBe(0);
+  });
+
+  test("budget 2 does not loop in signal mode once the footer hint is exhausted", async () => {
+    const original =
+      renderSearchResults([result({ id: "1", collection: "wiki" })]) +
+      '\n\n*Weak match — try: narrower query: "LA_BUC_02 narrow"*';
+    const search = searchSequence(searchResponse([result({ id: "2", collection: "wiki" })]));
+    const out = await runCorrectiveRetrieval({
+      ...signalCtx,
+      originalResultText: original,
+      budget: 2,
+      searchFn: search,
+    });
+    expect(search.calls.map((c) => c.query)).toEqual(["LA_BUC_02 narrow"]);
+    expect(out.metadata.retries).toBe(1);
+  });
 });
diff --git a/src/ai/corrective-retrieval.ts b/src/ai/corrective-retrieval.ts
index 925f7ad..234ad0d 100644
--- a/src/ai/corrective-retrieval.ts
+++ b/src/ai/corrective-retrieval.ts
@@ -1,11 +1,13 @@
 import type { Logger } from "@logtape/logtape";
-import { gradeKnowledgeResults, type GradeVerdict, type KnowledgeGrade } from "./knowledge-grader.ts";
+import { gradeKnowledgeResults, gradeFromSignal, type GradeVerdict, type KnowledgeGrade } from "./knowledge-grader.ts";
+import type { GraderMode } from "./corrective-config.ts";
 import {
   searchKnowledge,
   renderSearchResults,
   renderRetryHintsFooter,
   extractDocKeysFromRenderedText,
   parseQueryHintsFromFooter,
+  stripTrailingRetryFooter,
   docKey,
   type KnowledgeSearchResponse,
 } from "./knowledge-search-client.ts";
@@ -14,25 +16,27 @@ import {
  * CRAG-lite corrective loop around the knowledge search tool. After a bot's
  * `search_knowledge` call returns, this:
  *
- *   1. Grades the result with Haiku ({@link gradeKnowledgeResults}).
+ *   1. Grades the result — by default the **`"signal"`** grader (no model call:
+ *      reads Huginn's `*Weak match …*` / "No results" signal — {@link
+ *      gradeFromSignal}); optionally the **`"haiku"`** grader (a slimmed
+ *      awaiting Haiku call that also reads snippets and can propose a semantic
+ *      rewrite — {@link gradeKnowledgeResults}).
  *   2. If the verdict is "ambiguous" / "insufficient" and the retry budget
  *      isn't spent, re-queries Huginn's `/api/search` with the grader's
- *      rewritten query (falling back to the Phase-0 `retryHints.broaderQuery` /
- *      `narrowerQuery` parsed from the result footer), optionally redirected to
- *      a `suggestedCollection`, forcing `rerank=true` so the re-query's
- *      `confidenceBand`s are trustworthy.
- *   3. Merges the fresh hits into the original result text — deduped against
- *      it by `collection/doc_id` — with an inline note explaining the retry.
+ *      rewritten query (Haiku mode) or the Phase-0 `retryHints.broaderQuery` /
+ *      `narrowerQuery` parsed from the result footer (signal mode), optionally
+ *      redirected to a `suggestedCollection`, forcing `rerank=true` so the
+ *      re-query's `confidenceBand`s are trustworthy.
+ *   3. Merges the fresh hits into the original result text — deduped against it
+ *      by `collection/doc_id` — with an inline note explaining the retry.
  *   4. Optionally re-grades and retries again, up to the (clamped 1–2) budget;
  *      never recursive.
  *
  * Returns the consolidated text to feed the model plus a `corrective` metadata
- * block for tracing (`{retries, verdicts, reasons, queriesTried, finalVerdict}`).
- *
- * Fail-soft throughout: a grader that can't be reached returns "correct" (no
- * change); a re-query HTTP error ends the loop with whatever's accumulated. The
- * caller is expected to gate on the per-bot toggle — this function assumes the
- * feature is enabled and `budget >= 1`.
+ * block for tracing. Fail-soft throughout: a grader that can't be reached
+ * returns "correct" (no change); a re-query HTTP error ends the loop with
+ * whatever's accumulated. The caller gates on the per-bot toggle — this
+ * function assumes the feature is enabled and `budget >= 1`.
  *
  * Plan: `../mimir/plans/huginn-muninn-corrective-rag.md` (Phase 1).
  */
@@ -52,7 +56,9 @@ export interface CorrectiveMetadata {
   /** The verdict from the last grading pass — i.e. whether the corrective
    *  pass left the result set in good shape. */
   finalVerdict: GradeVerdict;
-  /** Total wall time spent in the Haiku grader across all passes, ms. */
+  /** Which grader judged the result(s). */
+  graderMode: GraderMode;
+  /** Total wall time spent in the grader across all passes, ms (≈0 in signal mode). */
   graderMs: number;
   /** Wall time of each re-query HTTP call, parallel to `queriesTried`, ms. */
   requeryMs: number[];
@@ -83,11 +89,13 @@ export interface CorrectiveRetrievalContext {
   /** Max re-queries. Clamped to [1, 2]. The caller gates on the per-bot
    *  toggle; this function only sees enabled invocations. */
   budget: number;
+  /** Which grader to use. `"signal"` (default) makes no model call. */
+  grader?: GraderMode;
   botName: string;
-  /** Working directory for the grader's Haiku spawn. */
+  /** Working directory for the grader's Haiku spawn (Haiku mode only). */
   cwd?: string;
   log: Logger;
-  /** Haiku model override for the grader. */
+  /** Haiku model override for the grader (Haiku mode only). */
   graderModel?: string;
   graderTimeoutMs?: number;
   /** Injectable for tests. */
@@ -97,8 +105,9 @@ export interface CorrectiveRetrievalContext {
 
 export async function runCorrectiveRetrieval(ctx: CorrectiveRetrievalContext): Promise<CorrectiveOutcome> {
   const budget = Math.max(1, Math.min(2, Math.floor(ctx.budget)));
+  const graderMode: GraderMode = ctx.grader ?? "signal";
   const search = ctx.searchFn ?? searchKnowledge;
-  const grade = ctx.gradeFn ?? gradeKnowledgeResults;
+  const haikuGrade = ctx.gradeFn ?? gradeKnowledgeResults;
   const { question, originalQuery, originalResultText, botName, cwd, log } = ctx;
 
   let currentText = originalResultText;
@@ -116,22 +125,26 @@ export async function runCorrectiveRetrieval(ctx: CorrectiveRetrievalContext): P
   for (;;) {
     let g: KnowledgeGrade;
     const gradeStart = performance.now();
-    try {
-      g = await grade({
-        question,
-        toolResultText: currentText,
-        botName,
-        cwd,
-        log,
-        model: ctx.graderModel,
-        timeoutMs: ctx.graderTimeoutMs,
-      });
-    } catch (err) {
-      log.warn("corrective: grader threw — stopping with current results: {error}", {
-        botName,
-        error: err instanceof Error ? err.message : String(err),
-      });
-      g = { verdict: "correct", reason: "grader error" };
+    if (graderMode === "haiku") {
+      try {
+        g = await haikuGrade({
+          question,
+          toolResultText: currentText,
+          botName,
+          cwd,
+          log,
+          model: ctx.graderModel,
+          timeoutMs: ctx.graderTimeoutMs,
+        });
+      } catch (err) {
+        log.warn("corrective: grader threw — stopping with current results: {error}", {
+          botName,
+          error: err instanceof Error ? err.message : String(err),
+        });
+        g = { verdict: "correct", reason: "grader error" };
+      }
+    } else {
+      g = gradeFromSignal(currentText);
     }
     graderMs += performance.now() - gradeStart;
     verdicts.push(g.verdict);
@@ -186,7 +199,9 @@ export async function runCorrectiveRetrieval(ctx: CorrectiveRetrievalContext): P
       collections,
       freshCount: fresh.length,
     });
-    currentText = `${currentText}\n\n---\n${note}\n\n${renderSearchResults(fresh)}${renderRetryHintsFooter(resp)}`;
+    // Drop the now-obsolete "try X" footer from what we had, append the note +
+    // fresh hits + (the re-query's own footer, if it too came back weak).
+    currentText = `${stripTrailingRetryFooter(currentText).trimEnd()}\n\n---\n${note}\n\n${renderSearchResults(fresh)}${renderRetryHintsFooter(resp)}`;
     currentCollections = collections;
   }
 
@@ -200,6 +215,7 @@ export async function runCorrectiveRetrieval(ctx: CorrectiveRetrievalContext): P
       queriesTried,
       collectionsTried,
       finalVerdict: verdicts[verdicts.length - 1] ?? "correct",
+      graderMode,
       graderMs: Math.round(graderMs),
       requeryMs,
     },
diff --git a/src/ai/knowledge-grader.test.ts b/src/ai/knowledge-grader.test.ts
index df42d94..63486ef 100644
--- a/src/ai/knowledge-grader.test.ts
+++ b/src/ai/knowledge-grader.test.ts
@@ -1,5 +1,5 @@
 import { test, expect, describe } from "bun:test";
-import { gradeKnowledgeResults, normalizeGrade } from "./knowledge-grader.ts";
+import { gradeKnowledgeResults, normalizeGrade, gradeFromSignal, digestResultsForGrading } from "./knowledge-grader.ts";
 import { getLog } from "../logging.ts";
 import type { HaikuResult } from "../scheduler/executor.ts";
 
@@ -76,3 +76,57 @@ describe("gradeKnowledgeResults", () => {
     expect(g.verdict).toBe("correct");
   });
 });
+
+describe("gradeFromSignal", () => {
+  test("'correct' when there's no weak/no-results signal", () => {
+    const g = gradeFromSignal("## A doc (82% relevant · high)\ncollection: `wiki` doc_id: `1`\n\nbody text");
+    expect(g.verdict).toBe("correct");
+    expect(g.rewrittenQuery).toBeUndefined();
+  });
+
+  test("'insufficient' on a trailing Weak match footer", () => {
+    const g = gradeFromSignal('## A doc (12% relevant · low)\ncollection: `wiki` doc_id: `1`\n\nbody\n\n*Weak match — try: broader query: "x"*');
+    expect(g.verdict).toBe("insufficient");
+    expect(g.rewrittenQuery).toBeUndefined(); // signal mode never rewrites; the loop uses the footer hint
+  });
+
+  test("'insufficient' on a No confident match footer", () => {
+    expect(gradeFromSignal("nothing relevant\n\n*No confident match — try: related terms: a, b*").verdict).toBe("insufficient");
+  });
+
+  test("'insufficient' on a 'No results found' body", () => {
+    expect(gradeFromSignal("No results found for 'xyz'.").verdict).toBe("insufficient");
+  });
+
+  test("a literal 'weak match' inside body prose does not trigger (must be a `*…*` footer line)", () => {
+    expect(gradeFromSignal("## Doc\nThis explains why a weak match can happen.").verdict).toBe("correct");
+  });
+
+  test("empty input → 'correct'", () => {
+    expect(gradeFromSignal("").verdict).toBe("correct");
+  });
+});
+
+describe("digestResultsForGrading", () => {
+  test("keeps the weak-match footer even when the body is large", () => {
+    const big = Array.from({ length: 8 }, (_, i) => `## Doc ${i} (50% relevant · medium)\nhttps://x/${i}\ncollection: \`c\` doc_id: \`${i}\`\n\n${"lorem ipsum ".repeat(80)}`).join("\n");
+    const text = `${big}\n\n*Weak match — try: broader query: "wider"*`;
+    const digest = digestResultsForGrading(text);
+    expect(digest).toContain('*Weak match — try: broader query: "wider"*');
+    expect(digest.length).toBeLessThan(text.length);
+    expect(digest).toContain("## Doc 0");
+  });
+
+  test("trims each block's body to a short prefix", () => {
+    const text = `## Doc (70% relevant · high)\nhttps://x/1\ncollection: \`c\` doc_id: \`1\`\n\n${"A".repeat(2000)}`;
+    const digest = digestResultsForGrading(text);
+    expect(digest).toContain("## Doc (70% relevant · high)");
+    expect(digest).toContain("…"); // truncation marker
+    expect(digest.length).toBeLessThan(700);
+  });
+
+  test("empty input → empty string", () => {
+    expect(digestResultsForGrading("")).toBe("");
+    expect(digestResultsForGrading("   ")).toBe("");
+  });
+});
diff --git a/src/ai/knowledge-grader.ts b/src/ai/knowledge-grader.ts
index 52ed45a..081cb07 100644
--- a/src/ai/knowledge-grader.ts
+++ b/src/ai/knowledge-grader.ts
@@ -3,20 +3,22 @@ import { extractJson } from "./json-extract.ts";
 import type { Logger } from "@logtape/logtape";
 
 /**
- * CRAG-style retrieval evaluator for the knowledge search tool. Given the
- * user's question and the (rendered) search results — which carry per-result
- * `confidenceBand` annotations and a `*No confident match — try: …*` footer
- * from Huginn's MCP adapter — a dedicated Haiku call decides whether the
- * results are good enough to answer from, and if not, proposes a sharper
- * query and/or a better collection.
+ * Retrieval-quality judges for the knowledge search tool, used by the
+ * corrective-retrieval loop (see corrective-retrieval.ts):
  *
- * This is an **awaiting** Haiku call (it gates whether a corrective re-query
- * happens), so it uses {@link spawnHaiku} directly rather than the
- * fire-and-forget {@link runHaikuExtraction} pattern.
+ *   - {@link gradeFromSignal} — the **default**: no model call. Just reads the
+ *     cheap signal Huginn already emits (a `*Weak match …*` / `*No confident
+ *     match …*` footer, or a "No results found" body) and returns `insufficient`
+ *     when the search itself was unsure, `correct` otherwise. The re-query, when
+ *     one happens, uses Huginn's own `retryHints` (parsed from that footer).
+ *   - {@link gradeKnowledgeResults} — opt-in (`correctiveRetrieval.grader:
+ *     "haiku"`): a slimmed **awaiting** Haiku call that also reads the result
+ *     snippets and can propose a semantic rewrite / a better collection. Costs
+ *     ~3–5s per search, so it's not the default.
  *
- * Fail-soft: any Haiku error or unparseable output yields `verdict: "correct"`
- * — the corrective loop becomes a no-op and the model sees the original result
- * unchanged. The corrective feature must never make a search *worse*.
+ * Both are fail-soft: a Haiku error or unparseable output → `verdict: "correct"`
+ * (the corrective loop becomes a no-op and the model sees the original result
+ * unchanged). The corrective feature must never make a search *worse*.
  *
  * Plan: `../mimir/plans/huginn-muninn-corrective-rag.md` (Phase 1).
  */
@@ -25,25 +27,52 @@ export type GradeVerdict = "correct" | "ambiguous" | "insufficient";
 
 export interface KnowledgeGrade {
   verdict: GradeVerdict;
-  /** A single search string (not a question) to re-query with. Present only
-   *  when verdict is "ambiguous" or "insufficient" and the grader had a better
-   *  query to offer. */
+  /** A single search string (not a question) to re-query with. Present only in
+   *  Haiku mode when the grader had a better query to offer; signal mode never
+   *  sets it (the re-query query comes from Huginn's `retryHints` instead). */
   rewrittenQuery?: string;
   /** A collection name to try instead — only when the results hint another
-   *  collection is the right home. Never invented. */
+   *  collection is the right home. Never invented. Haiku mode only. */
   suggestedCollection?: string;
   /** One short sentence explaining the verdict. */
   reason: string;
 }
 
+// ── Signal grader (default — no model call) ────────────────────────────────
+
+/** Matches the `*Weak match …*` / `*No confident match …*` footer Huginn's MCP
+ *  adapter appends when `bestScore` is below its weak-result threshold or the
+ *  result list is empty. */
+const WEAK_FOOTER_RE = /(^|\n)\s*\*(?:No confident match|Weak match)\b/;
+const NO_RESULTS_RE = /(^|\n)No results found for /;
+
+/**
+ * Judge a search result purely from Huginn's emitted signal — no LLM. Returns
+ * `insufficient` (no rewritten query — the corrective loop will fall back to
+ * the `retryHints.broaderQuery` / `narrowerQuery` parsed from the footer) when
+ * Huginn flagged the result weak/empty, `correct` otherwise.
+ */
+export function gradeFromSignal(resultText: string): KnowledgeGrade {
+  const text = resultText ?? "";
+  if (NO_RESULTS_RE.test(text)) {
+    return { verdict: "insufficient", reason: "search returned no results" };
+  }
+  if (WEAK_FOOTER_RE.test(text)) {
+    return { verdict: "insufficient", reason: "Huginn flagged the result as low confidence" };
+  }
+  return { verdict: "correct", reason: "no low-confidence signal from the search" };
+}
+
+// ── Haiku grader (opt-in) ──────────────────────────────────────────────────
+
 export interface GradeKnowledgeOptions {
   question: string;
   /** The rendered search-result text the model would see (trace markers
-   *  already peeled). */
+   *  already peeled). Digested down to the top hits before being sent to Haiku. */
   toolResultText: string;
   botName: string;
   /** Working directory for the Haiku spawn — keeps the session out of the
-   *  project root and gives it the bot's MCP/settings context. */
+   *  project root. */
   cwd?: string;
   log: Logger;
   /** Haiku model override (defaults to the project's standard Haiku model). */
@@ -53,16 +82,15 @@ export interface GradeKnowledgeOptions {
   spawnFn?: typeof spawnHaiku;
 }
 
-/** Cap the result text fed into the grader prompt — keeps the Haiku call cheap
- *  and well under its context window. The trailing footer (retry hints) lives
- *  at the end of the text, so prefer keeping the head + tail. */
-const MAX_RESULT_CHARS = 12_000;
+/** Cap the (already-digested) result text fed into the grader prompt. Kept
+ *  small so the Haiku call stays in the ~3–5s range rather than ~10s+. */
+const MAX_GRADER_INPUT_CHARS = 4_000;
 
 export async function gradeKnowledgeResults(opts: GradeKnowledgeOptions): Promise<KnowledgeGrade> {
   const { question, botName, cwd, log } = opts;
-  const resultText = clampResultText(opts.toolResultText);
+  const digest = digestResultsForGrading(opts.toolResultText);
 
-  const prompt = buildGraderPrompt(question, resultText);
+  const prompt = buildGraderPrompt(question, digest);
 
   const spawn = opts.spawnFn ?? spawnHaiku;
   let raw: string;
@@ -121,21 +149,60 @@ export function normalizeGrade(parsed: Record<string, unknown>): KnowledgeGrade
   return grade;
 }
 
-function clampResultText(text: string): string {
-  if (text.length <= MAX_RESULT_CHARS) return text;
-  const head = Math.floor(MAX_RESULT_CHARS * 0.7);
-  const tail = MAX_RESULT_CHARS - head;
-  return `${text.slice(0, head)}\n…[${text.length - MAX_RESULT_CHARS} chars omitted]…\n${text.slice(-tail)}`;
+/**
+ * Reduce a full rendered result text to a compact digest for the Haiku grader:
+ * the top result blocks (header line with title + confidence band, the
+ * breadcrumb/url line, and a short prefix of the body) plus the trailing
+ * weak-match footer if present. Keeps the prompt small without dropping the
+ * signal the grader needs (titles + bands + a taste of each hit + whether the
+ * search flagged itself unsure).
+ */
+export function digestResultsForGrading(text: string): string {
+  const src = (text ?? "").trim();
+  if (!src) return "";
+
+  // Pull off the trailing weak-match footer (a single `*…*` line at the end)
+  // so it's never lost to truncation.
+  let footer = "";
+  const footerMatch = src.match(/\n\s*(\*(?:No confident match|Weak match)[^\n]*\*)\s*$/);
+  const body = footerMatch ? src.slice(0, footerMatch.index).trimEnd() : src;
+  if (footerMatch) footer = footerMatch[1]!;
+
+  // Split into result blocks at `## ` headers (the MCP adapter's full-mode
+  // format). If there are no `## ` headers (brief mode uses `1. **Title**`),
+  // just take the head of the body.
+  const blocks = body.split(/\n(?=## )/);
+  const digestedBlocks: string[] = [];
+  let used = 0;
+  for (const block of blocks) {
+    if (used >= MAX_GRADER_INPUT_CHARS) break;
+    const lines = block.split("\n");
+    // Header + the next couple of lines (url / breadcrumb / collection), then a
+    // short prefix of whatever follows.
+    const headLines = lines.slice(0, 4).join("\n");
+    const rest = lines.slice(4).join("\n").replace(/\n{2,}/g, "\n").trim();
+    const restPrefix = rest.length > 240 ? rest.slice(0, 240) + "…" : rest;
+    const piece = restPrefix ? `${headLines}\n${restPrefix}` : headLines;
+    digestedBlocks.push(piece);
+    used += piece.length;
+  }
+
+  let out = digestedBlocks.join("\n\n");
+  if (out.length > MAX_GRADER_INPUT_CHARS) {
+    out = out.slice(0, MAX_GRADER_INPUT_CHARS) + "\n…[truncated]…";
+  }
+  if (footer) out = `${out}\n\n${footer}`;
+  return out;
 }
 
-function buildGraderPrompt(question: string, resultText: string): string {
+function buildGraderPrompt(question: string, resultDigest: string): string {
   return `You grade the quality of knowledge-base search results before an assistant answers from them.
 
 USER QUESTION:
 ${question}
 
-SEARCH RESULTS (each hit is annotated with a confidence band — high / medium / low; a trailing "No confident match" or "Weak match" line, if present, means the search itself was unsure):
-${resultText || "(no results were returned)"}
+SEARCH RESULTS (top hits — each annotated with a confidence band: high / medium / low; a trailing "No confident match" or "Weak match" line, if present, means the search itself was unsure):
+${resultDigest || "(no results were returned)"}
 
 Decide whether these results let the question be answered well, then respond with ONLY a JSON object — no prose, no markdown fence:
 {"verdict":"correct"|"ambiguous"|"insufficient","rewrittenQuery":"...","suggestedCollection":"...","reason":"..."}
diff --git a/src/ai/knowledge-search-client.ts b/src/ai/knowledge-search-client.ts
index 599685c..22dca9b 100644
--- a/src/ai/knowledge-search-client.ts
+++ b/src/ai/knowledge-search-client.ts
@@ -237,6 +237,19 @@ export function renderRetryHintsFooter(resp: Pick<KnowledgeSearchResponse, "retr
   return bits.length > 0 ? `\n\n*${prefix} — try: ${bits.join(" · ")}*` : `\n\n*${prefix}.*`;
 }
 
+/** A trailing `*Weak match …*` / `*No confident match …*` retry-hints footer
+ *  (Huginn's MCP adapter appends one; {@link renderRetryHintsFooter} produces
+ *  the same shape). */
+const TRAILING_RETRY_FOOTER_RE = /\n+\s*\*(?:No confident match|Weak match)[^\n]*\*\s*$/;
+
+/** Strip a trailing retry-hints footer from a rendered result text. Used when
+ *  splicing a corrective re-query in: the original "try X" footer is obsolete
+ *  once X has been tried, and leaving it would also confuse the next signal-mode
+ *  grade pass into re-detecting the *already-handled* weak signal. */
+export function stripTrailingRetryFooter(text: string): string {
+  return text.replace(TRAILING_RETRY_FOOTER_RE, "");
+}
+
 const DOC_ID_LINE_RE = /collection:\s*`([^`]+)`\s+doc_id:\s*`([^`]+)`/g;
 
 /** Extract `collection/doc_id` keys from rendered search-result text — used to
diff --git a/src/bots/config.ts b/src/bots/config.ts
index 0af4a4b..ad11bd5 100644
--- a/src/bots/config.ts
+++ b/src/bots/config.ts
@@ -87,6 +87,11 @@ export interface CorrectiveRetrievalBotConfig {
   enabled?: boolean;
   /** Max corrective re-queries per knowledge search. Clamped to 1–2. Default 1. */
   retryBudget?: number;
+  /** Result-quality judge: `"signal"` (default — no model call; re-query only
+   *  when Huginn already flags the result weak, using Huginn's `retryHints`) or
+   *  `"haiku"` (a slimmed awaiting Haiku call that can also propose a semantic
+   *  rewrite — ~3–5s per search, opt-in). */
+  grader?: "signal" | "haiku";
 }
 
 export interface BotPrompts {
diff --git a/src/core/corrective-trace-spans.test.ts b/src/core/corrective-trace-spans.test.ts
index be64f3c..39b4e0f 100644
--- a/src/core/corrective-trace-spans.test.ts
+++ b/src/core/corrective-trace-spans.test.ts
@@ -11,18 +11,19 @@ describe("planCorrectiveSpans", () => {
   test("one knowledge_grade span when graded but not re-queried", () => {
     const corr: CorrectiveToolMeta = {
       retries: 0,
-      verdicts: ["correct"],
-      reasons: ["covered"],
+      verdicts: ["insufficient"],
+      reasons: ["Huginn flagged the result as low confidence"],
       queriesTried: [],
-      finalVerdict: "correct",
-      graderMs: 1200,
+      finalVerdict: "insufficient",
+      graderMode: "signal",
+      graderMs: 0,
     };
     const spans = planCorrectiveSpans(corr, 200);
     expect(spans.map((s) => s.name)).toEqual(["knowledge_grade"]);
     expect(spans[0]!.startOffsetMs).toBe(200);
-    expect(spans[0]!.durationMs).toBe(1200);
-    expect(spans[0]!.attributes.model).toBe("haiku");
-    expect(spans[0]!.attributes.finalVerdict).toBe("correct");
+    expect(spans[0]!.durationMs).toBe(1); // 1ms floor — signal mode has ~0 grader time
+    expect(spans[0]!.attributes.mode).toBe("signal");
+    expect(spans[0]!.attributes.finalVerdict).toBe("insufficient");
     expect(spans[0]!.attributes.passes).toBe(1);
   });
 
@@ -34,6 +35,7 @@ describe("planCorrectiveSpans", () => {
       queriesTried: ["q1", "q2"],
       collectionsTried: [null, ["confluence"]],
       finalVerdict: "correct",
+      graderMode: "haiku",
       graderMs: 900,
       requeryMs: [150, 220],
     };
@@ -41,6 +43,7 @@ describe("planCorrectiveSpans", () => {
     expect(spans.map((s) => s.name)).toEqual(["knowledge_grade", "knowledge_requery", "knowledge_requery"]);
     // grade [300, 1200), requery#1 [1200, 1350), requery#2 [1350, 1570)
     expect(spans[0]!.startOffsetMs).toBe(300);
+    expect(spans[0]!.attributes.mode).toBe("haiku");
     expect(spans[1]!.startOffsetMs).toBe(1200);
     expect(spans[1]!.durationMs).toBe(150);
     expect(spans[1]!.attributes.query).toBe("q1");
@@ -53,7 +56,7 @@ describe("planCorrectiveSpans", () => {
 
   test("uses a 1ms floor when timings are missing", () => {
     const spans = planCorrectiveSpans(
-      { retries: 1, verdicts: ["insufficient", "correct"], reasons: ["x", "y"], queriesTried: ["q"], finalVerdict: "correct" },
+      { retries: 1, verdicts: ["insufficient", "correct"], reasons: ["x", "y"], queriesTried: ["q"], finalVerdict: "correct", graderMode: "signal" },
       0,
     );
     expect(spans[0]!.durationMs).toBe(1);
diff --git a/src/core/corrective-trace-spans.ts b/src/core/corrective-trace-spans.ts
index f4ca5c0..cea500c 100644
--- a/src/core/corrective-trace-spans.ts
+++ b/src/core/corrective-trace-spans.ts
@@ -44,7 +44,7 @@ export function planCorrectiveSpans(
     durationMs: graderMs,
     startOffsetMs: cursor,
     attributes: {
-      model: "haiku",
+      mode: corrective.graderMode ?? "signal",
       passes: corrective.verdicts.length,
       verdicts: corrective.verdicts,
       finalVerdict: corrective.finalVerdict,
diff --git a/src/types.ts b/src/types.ts
index 08507ca..31244dc 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -95,7 +95,9 @@ export interface CorrectiveToolMeta {
   collectionsTried?: (string[] | null)[];
   /** Verdict from the final grading pass — whether the result set ended up usable. */
   finalVerdict: string;
-  /** Total Haiku grader wall time across all passes, ms. */
+  /** Which grader judged the result(s): `"signal"` (no model call) or `"haiku"`. */
+  graderMode?: string;
+  /** Total grader wall time across all passes, ms (≈0 in signal mode). */
   graderMs?: number;
   /** Wall time of each re-query HTTP call, parallel to `queriesTried`, ms. */
   requeryMs?: number[];

From 4282066d013de44ab51911a0645d56dfc3880650 Mon Sep 17 00:00:00 2001
From: RuneLind <rulind@gmail.com>
Date: Tue, 12 May 2026 21:35:42 +0200
Subject: [PATCH 3/4] Surface "0 hits / low confidence" on the search-tool
 waterfall row
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A Huginn search can keep hundreds of candidates yet hand the model "No results
found / low confidence" — the kept/fetched candidate count chip hid that. Now,
when the captured tool output (or the trace's Phase-0 `response` block) shows the
model got nothing usable, the row replaces the `N/N` candidate chip with a red
`0 hits` chip; a weak-match footer flips the count chip to the low-confidence
palette and adds a tooltip note. The corrective chip's tooltip also now carries
the grader mode and the grade reason (e.g. "corrective retrieval (signal):
insufficient — search returned no results; no re-query").
---
 .../views/components/helpers.test.ts          | 45 ++++++++++++++++
 src/dashboard/views/components/span-label.ts  | 54 ++++++++++++++++---
 .../views/components/traces-waterfall.ts      |  8 +++
 3 files changed, 99 insertions(+), 8 deletions(-)

diff --git a/src/dashboard/views/components/helpers.test.ts b/src/dashboard/views/components/helpers.test.ts
index 149968f..e829d51 100644
--- a/src/dashboard/views/components/helpers.test.ts
+++ b/src/dashboard/views/components/helpers.test.ts
@@ -217,6 +217,51 @@ describe("deriveSpanLabelHtml", () => {
     expect(single!.html).not.toContain("summed across");
   });
 
+  test("shows a '0 hits' chip (not the candidate count) when the search returned nothing to the model", () => {
+    const out = deriveSpanLabelHtml({
+      name: "knowledge-search_knowledge",
+      attributes: {
+        output: "No results found for 'meningen med livet' (low confidence).\n\n*No confident match — try: related terms: a, b*",
+        searchTrace: {
+          schemaVersion: 1,
+          collections: [{ name: "kb", candidates: [{ kept: true }, { kept: true }, { kept: true }], confidence: { lowConfidence: false } }],
+        },
+      },
+    });
+    expect(out!.html).toContain("wf-chip wf-no-hits");
+    expect(out!.html).toContain(">0 hits<");
+    expect(out!.html).not.toContain(">2/3<"); // candidate count suppressed
+    expect(out!.tooltip).toContain("no results returned to the model");
+  });
+
+  test("flips counts chip to low-conf variant when the output carries a weak-match footer", () => {
+    const out = deriveSpanLabelHtml({
+      name: "knowledge-search_knowledge",
+      attributes: {
+        output: "## A doc (18% relevant · low)\ncollection: `kb` doc_id: `1`\n\nbody\n\n*Weak match — try: broader query: \"x\"*",
+        searchTrace: {
+          schemaVersion: 1,
+          collections: [{ name: "kb", candidates: [{ kept: true }, { kept: true }], confidence: { lowConfidence: false } }],
+        },
+      },
+    });
+    expect(out!.html).toContain("wf-chip wf-counts wf-low-conf");
+    expect(out!.tooltip).toContain("low-confidence results");
+  });
+
+  test("corrective chip tooltip carries the grader mode and reason", () => {
+    const out = deriveSpanLabelHtml({
+      name: "knowledge-search_knowledge",
+      attributes: {
+        input: { query: "x", collection: "kb" },
+        corrective: { retries: 0, verdicts: ["insufficient"], reasons: ["search returned no results"], finalVerdict: "insufficient", graderMode: "signal", queriesTried: [] },
+      },
+    });
+    expect(out!.html).toContain("wf-corrective-bad");
+    expect(out!.html).toContain(">grade ");
+    expect(out!.tooltip).toMatch(/corrective retrieval \(signal\): insufficient — search returned no results; no re-query/);
+  });
+
   test("flips counts chip to low-conf variant when any collection is low-confidence", () => {
     const out = deriveSpanLabelHtml({
       name: "knowledge-search_knowledge",
diff --git a/src/dashboard/views/components/span-label.ts b/src/dashboard/views/components/span-label.ts
index 3c18414..9bd29da 100644
--- a/src/dashboard/views/components/span-label.ts
+++ b/src/dashboard/views/components/span-label.ts
@@ -57,6 +57,12 @@ export function deriveSpanLabelHtml(span: SpanLike): { html: string; tooltip: st
   // retry count so a corrected search is visible at a glance.
   const corr = correctiveChipFromAttrs(attrs.corrective);
 
+  // Whether the search actually returned anything usable *to the model* —
+  // distinct from "how many candidates the pipeline kept". A search can keep
+  // hundreds of candidates yet hand the model "No results found / low
+  // confidence", which the candidate-count chip alone hides.
+  const resultSignal = searchResultSignal(attrs);
+
   // Search-tool path: collection chips + counts chip, derived from searchTrace
   // or input.collection.
   let collections = collectionsFor(attrs);
@@ -67,18 +73,23 @@ export function deriveSpanLabelHtml(span: SpanLike): { html: string; tooltip: st
     const moreChip = collections.length > 1
       ? `<span class="wf-chip wf-coll-more" title="${escAttr(collections.slice(1).join(", "))}">+${collections.length - 1}</span>`
       : '';
+    const lowConf = !!(summary?.lowConfidence) || resultSignal === "weak";
     let countsChip = "";
-    if (summary) {
-      const cls = summary.lowConfidence ? "wf-chip wf-counts wf-low-conf" : "wf-chip wf-counts";
-      const scope = collections.length > 1
-        ? ` (summed across ${collections.length} collections)`
-        : "";
-      const tip = summary.lowConfidence
+    if (resultSignal === "empty") {
+      // The model got "No results found" — the kept/fetched count is candidate
+      // pipeline noise here, so show the honest outcome instead.
+      countsChip = `<span class="wf-chip wf-no-hits" title="search returned no results to the model${summary ? ` (${summary.fetched} candidates were fetched and filtered out)` : ""}">0 hits</span>`;
+    } else if (summary) {
+      const cls = lowConf ? "wf-chip wf-counts wf-low-conf" : "wf-chip wf-counts";
+      const scope = collections.length > 1 ? ` (summed across ${collections.length} collections)` : "";
+      const tip = lowConf
         ? `${summary.kept} kept / ${summary.fetched} fetched${scope} · low confidence`
         : `${summary.kept} kept / ${summary.fetched} fetched${scope}`;
       countsChip = `<span class="${cls}" title="${escAttr(tip)}">${summary.kept}/${summary.fetched}</span>`;
     }
     const tooltipLines = [span.name, "collections: " + collections.join(", ")];
+    if (resultSignal === "empty") tooltipLines.push("⚠ no results returned to the model");
+    else if (resultSignal === "weak") tooltipLines.push("⚠ low-confidence results (Huginn flagged a weak match)");
     if (summary) {
       tooltipLines.push(`candidates: ${summary.kept} kept / ${summary.fetched} fetched`);
       if (summary.topTitle) tooltipLines.push("top: " + summary.topTitle);
@@ -116,12 +127,14 @@ export function deriveSpanLabelHtml(span: SpanLike): { html: string; tooltip: st
  *  the corrective pass left the result set usable. */
 function correctiveChipFromAttrs(raw: unknown): { html: string; tooltipLines: string[] } | null {
   if (!raw || typeof raw !== "object") return null;
-  const c = raw as { retries?: unknown; finalVerdict?: unknown; verdicts?: unknown; queriesTried?: unknown };
+  const c = raw as { retries?: unknown; finalVerdict?: unknown; verdicts?: unknown; queriesTried?: unknown; reasons?: unknown; graderMode?: unknown };
   const finalVerdict = typeof c.finalVerdict === "string" ? c.finalVerdict : undefined;
   const verdicts = Array.isArray(c.verdicts) ? c.verdicts.map(String) : [];
   if (!finalVerdict && verdicts.length === 0) return null;
   const retries = typeof c.retries === "number" ? c.retries : 0;
   const queries = Array.isArray(c.queriesTried) ? c.queriesTried.map(String) : [];
+  const reason = Array.isArray(c.reasons) && typeof c.reasons[0] === "string" ? (c.reasons[0] as string) : "";
+  const mode = c.graderMode === "haiku" ? "haiku" : c.graderMode === "signal" ? "signal" : "";
 
   const cls =
     finalVerdict === "correct" ? "wf-corrective wf-corrective-ok"
@@ -129,7 +142,9 @@ function correctiveChipFromAttrs(raw: unknown): { html: string; tooltipLines: st
         : "wf-corrective wf-corrective-bad";
   const sym = finalVerdict === "correct" ? "✓" : finalVerdict === "ambiguous" ? "≈" : "✗";
   const text = retries > 0 ? `⟲${retries} ${sym}` : `grade ${sym}`;
-  const tip = `corrective retrieval: ${verdicts.join(" → ") || finalVerdict}` +
+  const tip =
+    `corrective retrieval${mode ? ` (${mode})` : ""}: ${verdicts.join(" → ") || finalVerdict}` +
+    (reason ? ` — ${reason}` : "") +
     (queries.length ? `; re-queried: ${queries.map((q) => `"${q}"`).join(", ")}` : "; no re-query");
   return {
     html: `<span class="wf-chip ${cls}" title="${escAttr(tip)}">${escHtml(text)}</span>`,
@@ -137,6 +152,29 @@ function correctiveChipFromAttrs(raw: unknown): { html: string; tooltipLines: st
   };
 }
 
+/** Whether a search-tool span's result was actually usable *by the model*:
+ *  `"empty"` ("No results found" / `noConfidentResults`), `"weak"` (a
+ *  `*Weak match*` / `*No confident match*` footer), or `null` (looks fine).
+ *  Reads the captured tool output first (ground truth of what the model saw),
+ *  falling back to the Huginn trace's Phase-0 `response` block. */
+function searchResultSignal(attrs: NonNullable<SpanLike["attributes"]>): "empty" | "weak" | null {
+  const out = typeof attrs.output === "string" ? attrs.output : null;
+  if (out) {
+    if (/(^|\n)\s*No results found for /.test(out)) return "empty";
+    if (/(^|\n)\s*\*(?:No confident match|Weak match)\b/.test(out)) return "weak";
+    return null;
+  }
+  const trace = attrs.searchTrace;
+  if (trace && typeof trace === "object") {
+    const resp = (trace as { response?: { noConfidentResults?: unknown; bestScore?: unknown } }).response;
+    if (resp) {
+      if (resp.noConfidentResults === true) return "empty";
+      if (typeof resp.bestScore === "number" && resp.bestScore < 0.45) return "weak";
+    }
+  }
+  return null;
+}
+
 interface ToolLabelExtras { chips: string; tooltipLines: string[]; }
 
 type ExtrasRecipe = {
diff --git a/src/dashboard/views/components/traces-waterfall.ts b/src/dashboard/views/components/traces-waterfall.ts
index 73e9e61..9604e49 100644
--- a/src/dashboard/views/components/traces-waterfall.ts
+++ b/src/dashboard/views/components/traces-waterfall.ts
@@ -128,6 +128,14 @@ export function tracesWaterfallStyles(): string {
       color: var(--status-warning);
       border-color: color-mix(in srgb, var(--status-warning) 35%, transparent);
     }
+    /* "0 hits" chip — the search returned nothing usable to the model, even if
+       the pipeline kept candidates. Replaces the kept/fetched count in that case. */
+    .wf-chip.wf-no-hits {
+      background: color-mix(in srgb, var(--status-error, var(--status-magenta)) 14%, transparent);
+      color: var(--status-error, var(--status-magenta));
+      border: 1px solid color-mix(in srgb, var(--status-error, var(--status-magenta)) 35%, transparent);
+      font-weight: 600;
+    }
     /* Corrective-retrieval chip — marks a knowledge search that went through a
        CRAG-lite grade/requery pass. Color = whether the result set ended usable. */
     .wf-chip.wf-corrective { font-variant-numeric: tabular-nums; font-weight: 600; }

From 364b4272813a0563250c14462495694cc1540710 Mon Sep 17 00:00:00 2001
From: RuneLind <rulind@gmail.com>
Date: Tue, 12 May 2026 22:51:23 +0200
Subject: [PATCH 4/4] Tidy up corrective-retrieval per review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review findings on the corrective-retrieval branch:

- attachCorrectiveOutcomes: the hook now pushes one slot per knowledge-search
  tool call (a null when that search had no outcome), so a skipped search no
  longer shifts a later search's metadata onto it. This was a real misalignment
  bug — common in signal mode, where a confident search produces no outcome.
- runCorrectiveRetrieval reuses clampBudget instead of re-deriving the clamp
  inline (the inline version returned NaN for non-finite input).
- Consolidate the "weak match / no results" detection regexes into one place
  (knowledge-search-client.ts: classifyResultSignal, extractTrailingRetryFooter,
  WEAK_RESULT_RELEVANCE); knowledge-grader.ts now consumes them.
- CorrectiveToolMeta uses proper string-union types; named GRADER_TIMEOUT_MS and
  WEAK_BEST_SCORE constants instead of bare literals; lookup table for the
  corrective chip's verdict→style mapping; drop the unused KnowledgeSearchResponse.lowConfidence field; trim a few rot-prone "Phase N" doc comments.
---
 src/ai/connectors/copilot-sdk.ts             | 34 +++++++----
 src/ai/connectors/corrective-hook.test.ts    | 10 +++
 src/ai/corrective-retrieval.ts               |  8 +--
 src/ai/knowledge-grader.ts                   | 31 +++-------
 src/ai/knowledge-search-client.ts            | 64 ++++++++++++++------
 src/dashboard/views/components/span-label.ts | 38 ++++++------
 src/types.ts                                 | 16 +++--
 7 files changed, 126 insertions(+), 75 deletions(-)

diff --git a/src/ai/connectors/copilot-sdk.ts b/src/ai/connectors/copilot-sdk.ts
index 247bf34..e5f0f4f 100644
--- a/src/ai/connectors/copilot-sdk.ts
+++ b/src/ai/connectors/copilot-sdk.ts
@@ -91,13 +91,19 @@ export async function executePrompt(
   // an opt-in slower/smarter alternative. Off by default (see corrective-config.ts);
   // when off, the hook isn't registered and behaviour is byte-identical to before.
   const correctiveCfg = resolveCorrectiveConfig(botConfig);
-  const correctiveOutcomes: CorrectiveMetadata[] = [];
+  // One slot per knowledge-search tool call (in order); `null` when that search
+  // produced no corrective outcome (tool error / uneventful signal pass). Keeping
+  // the slot is what lets attachCorrectiveOutcomes map outcomes positionally
+  // without desyncing when an earlier search is skipped.
+  const correctiveOutcomes: (CorrectiveMetadata | null)[] = [];
   const correctiveEnabled = correctiveCfg.enabled && hasMcp;
   const userQuestion = correctiveEnabled ? extractUserQuestion(prompt) : "";
   const correctiveHooks: SessionConfig["hooks"] | undefined = correctiveEnabled
     ? {
         onPostToolUse: async (input) => {
           if (!isKnowledgeSearchTool(input.toolName)) return;
+          let metadata: CorrectiveMetadata | null = null;
+          let modified: { modifiedResult: ToolResultObject } | undefined;
           try {
             const result = await applyCorrectiveRetrieval({
               toolName: input.toolName,
@@ -109,8 +115,8 @@ export async function executePrompt(
               userQuestion,
             });
             if (result) {
-              correctiveOutcomes.push(result.metadata);
-              if (result.modifiedResult) return { modifiedResult: result.modifiedResult };
+              metadata = result.metadata;
+              if (result.modifiedResult) modified = { modifiedResult: result.modifiedResult };
             }
           } catch (e) {
             log.warn("Corrective retrieval hook failed: {error}", {
@@ -118,7 +124,8 @@ export async function executePrompt(
               error: e instanceof Error ? e.message : String(e),
             });
           }
-          return;
+          correctiveOutcomes.push(metadata);
+          return modified;
         },
       }
     : undefined;
@@ -384,6 +391,10 @@ function abbreviateInput(args: unknown): string | undefined {
 
 // ── Corrective retrieval (CRAG-lite) helpers ───────────────────────────────
 
+/** Timeout for the (opt-in) Haiku grader subprocess. Kept well under the bot's
+ *  overall response timeout so a slow grader can't dominate the request. */
+const GRADER_TIMEOUT_MS = 30_000;
+
 export interface ApplyCorrectiveArgs {
   toolName: string;
   toolArgs: unknown;
@@ -432,7 +443,7 @@ export async function applyCorrectiveRetrieval(
     botName: botConfig.name,
     cwd: botConfig.dir,
     log,
-    graderTimeoutMs: 30_000,
+    graderTimeoutMs: GRADER_TIMEOUT_MS,
     searchFn: args.searchFn,
     gradeFn: args.gradeFn,
   });
@@ -473,15 +484,16 @@ export function extractUserQuestion(prompt: string): string {
   return trimmed.length > 1500 ? trimmed.slice(-1500).trim() : trimmed;
 }
 
-/** Attach corrective outcomes to the knowledge-search tool calls in order
- *  (onPostToolUse exposes no toolCallId, so the i-th outcome maps to the i-th
- *  knowledge-search tool call). */
-export function attachCorrectiveOutcomes(toolCalls: ToolCall[], outcomes: CorrectiveMetadata[]): void {
+/** Attach corrective outcomes to the knowledge-search tool calls in order.
+ *  `onPostToolUse` exposes no toolCallId, so this maps positionally — which is
+ *  exact because the hook pushes one slot per knowledge-search call (a `null`
+ *  for ones with no outcome), parallel to the order they appear in `toolCalls`. */
+export function attachCorrectiveOutcomes(toolCalls: ToolCall[], outcomes: (CorrectiveMetadata | null)[]): void {
   let i = 0;
   for (const tc of toolCalls) {
-    if (i >= outcomes.length) break;
     if (!isKnowledgeSearchTool(tc.name)) continue;
-    tc.corrective = correctiveMetaToToolMeta(outcomes[i++]!);
+    const m = outcomes[i++];
+    if (m) tc.corrective = correctiveMetaToToolMeta(m);
   }
 }
 
diff --git a/src/ai/connectors/corrective-hook.test.ts b/src/ai/connectors/corrective-hook.test.ts
index eaefc64..3becbd7 100644
--- a/src/ai/connectors/corrective-hook.test.ts
+++ b/src/ai/connectors/corrective-hook.test.ts
@@ -49,6 +49,16 @@ describe("attachCorrectiveOutcomes", () => {
     expect(calls[2]!.corrective?.collectionsTried).toEqual([null]);
   });
 
+  test("a null slot (a knowledge search with no outcome) doesn't shift later outcomes onto it", () => {
+    // search #1 produced nothing (e.g. confident signal-mode pass → null slot),
+    // search #2 produced an outcome — #2's metadata must land on call #2, not #1.
+    const calls = [tc("knowledge-search_knowledge"), tc("yggdrasil-symbol_context"), tc("knowledge-search_knowledge")];
+    attachCorrectiveOutcomes(calls, [null, meta("ambiguous")]);
+    expect(calls[0]!.corrective).toBeUndefined();
+    expect(calls[1]!.corrective).toBeUndefined();
+    expect(calls[2]!.corrective?.finalVerdict).toBe("ambiguous");
+  });
+
   test("no-op when there are no outcomes", () => {
     const calls = [tc("knowledge-search_knowledge")];
     attachCorrectiveOutcomes(calls, []);
diff --git a/src/ai/corrective-retrieval.ts b/src/ai/corrective-retrieval.ts
index 234ad0d..d4e1497 100644
--- a/src/ai/corrective-retrieval.ts
+++ b/src/ai/corrective-retrieval.ts
@@ -1,6 +1,6 @@
 import type { Logger } from "@logtape/logtape";
 import { gradeKnowledgeResults, gradeFromSignal, type GradeVerdict, type KnowledgeGrade } from "./knowledge-grader.ts";
-import type { GraderMode } from "./corrective-config.ts";
+import { clampBudget, type GraderMode } from "./corrective-config.ts";
 import {
   searchKnowledge,
   renderSearchResults,
@@ -36,9 +36,9 @@ import {
  * block for tracing. Fail-soft throughout: a grader that can't be reached
  * returns "correct" (no change); a re-query HTTP error ends the loop with
  * whatever's accumulated. The caller gates on the per-bot toggle — this
- * function assumes the feature is enabled and `budget >= 1`.
+ * function assumes the feature is enabled.
  *
- * Plan: `../mimir/plans/huginn-muninn-corrective-rag.md` (Phase 1).
+ * Design: `../mimir/plans/huginn-muninn-corrective-rag.md`.
  */
 
 export interface CorrectiveMetadata {
@@ -104,7 +104,7 @@ export interface CorrectiveRetrievalContext {
 }
 
 export async function runCorrectiveRetrieval(ctx: CorrectiveRetrievalContext): Promise<CorrectiveOutcome> {
-  const budget = Math.max(1, Math.min(2, Math.floor(ctx.budget)));
+  const budget = clampBudget(ctx.budget);
   const graderMode: GraderMode = ctx.grader ?? "signal";
   const search = ctx.searchFn ?? searchKnowledge;
   const haikuGrade = ctx.gradeFn ?? gradeKnowledgeResults;
diff --git a/src/ai/knowledge-grader.ts b/src/ai/knowledge-grader.ts
index 081cb07..3d9aaef 100644
--- a/src/ai/knowledge-grader.ts
+++ b/src/ai/knowledge-grader.ts
@@ -1,5 +1,6 @@
 import { spawnHaiku } from "../scheduler/executor.ts";
 import { extractJson } from "./json-extract.ts";
+import { classifyResultSignal, extractTrailingRetryFooter } from "./knowledge-search-client.ts";
 import type { Logger } from "@logtape/logtape";
 
 /**
@@ -19,8 +20,6 @@ import type { Logger } from "@logtape/logtape";
  * Both are fail-soft: a Haiku error or unparseable output → `verdict: "correct"`
  * (the corrective loop becomes a no-op and the model sees the original result
  * unchanged). The corrective feature must never make a search *worse*.
- *
- * Plan: `../mimir/plans/huginn-muninn-corrective-rag.md` (Phase 1).
  */
 
 export type GradeVerdict = "correct" | "ambiguous" | "insufficient";
@@ -40,12 +39,6 @@ export interface KnowledgeGrade {
 
 // ── Signal grader (default — no model call) ────────────────────────────────
 
-/** Matches the `*Weak match …*` / `*No confident match …*` footer Huginn's MCP
- *  adapter appends when `bestScore` is below its weak-result threshold or the
- *  result list is empty. */
-const WEAK_FOOTER_RE = /(^|\n)\s*\*(?:No confident match|Weak match)\b/;
-const NO_RESULTS_RE = /(^|\n)No results found for /;
-
 /**
  * Judge a search result purely from Huginn's emitted signal — no LLM. Returns
  * `insufficient` (no rewritten query — the corrective loop will fall back to
@@ -53,14 +46,14 @@ const NO_RESULTS_RE = /(^|\n)No results found for /;
  * Huginn flagged the result weak/empty, `correct` otherwise.
  */
 export function gradeFromSignal(resultText: string): KnowledgeGrade {
-  const text = resultText ?? "";
-  if (NO_RESULTS_RE.test(text)) {
-    return { verdict: "insufficient", reason: "search returned no results" };
-  }
-  if (WEAK_FOOTER_RE.test(text)) {
-    return { verdict: "insufficient", reason: "Huginn flagged the result as low confidence" };
+  switch (classifyResultSignal(resultText ?? "")) {
+    case "empty":
+      return { verdict: "insufficient", reason: "search returned no results" };
+    case "weak":
+      return { verdict: "insufficient", reason: "Huginn flagged the result as low confidence" };
+    default:
+      return { verdict: "correct", reason: "no low-confidence signal from the search" };
   }
-  return { verdict: "correct", reason: "no low-confidence signal from the search" };
 }
 
 // ── Haiku grader (opt-in) ──────────────────────────────────────────────────
@@ -161,12 +154,8 @@ export function digestResultsForGrading(text: string): string {
   const src = (text ?? "").trim();
   if (!src) return "";
 
-  // Pull off the trailing weak-match footer (a single `*…*` line at the end)
-  // so it's never lost to truncation.
-  let footer = "";
-  const footerMatch = src.match(/\n\s*(\*(?:No confident match|Weak match)[^\n]*\*)\s*$/);
-  const body = footerMatch ? src.slice(0, footerMatch.index).trimEnd() : src;
-  if (footerMatch) footer = footerMatch[1]!;
+  // Pull off the trailing weak-match footer so it's never lost to truncation.
+  const { body, footer } = extractTrailingRetryFooter(src);
 
   // Split into result blocks at `## ` headers (the MCP adapter's full-mode
   // format). If there are no `## ` headers (brief mode uses `1. **Title**`),
diff --git a/src/ai/knowledge-search-client.ts b/src/ai/knowledge-search-client.ts
index 22dca9b..b9a281c 100644
--- a/src/ai/knowledge-search-client.ts
+++ b/src/ai/knowledge-search-client.ts
@@ -11,14 +11,12 @@ function knowledgeApiBaseUrl(): string {
 /**
  * Thin HTTP client for Huginn's `GET /api/search`, plus a renderer that mirrors
  * the shape Huginn's MCP adapter produces (so a corrective re-query's hits read
- * identically to the ones the model already saw) and a parser for the
- * `collection: \`x\` doc_id: \`y\`` lines those results carry (used to dedupe a
- * re-query against the original result text).
- *
- * Scope: this is the Phase-1 corrective-retrieval consumer of the Phase-0
- * contract — `bestScore`, per-result `confidenceBand`, `retryHints`,
- * `noConfidentResults`, `min_relevance`. See
- * `../mimir/plans/huginn-muninn-corrective-rag.md`.
+ * identically to the ones the model already saw) and parsers for the signal
+ * Huginn bakes into result text — the `collection: \`x\` doc_id: \`y\`` lines
+ * (for deduping a re-query) and the `*Weak match …*` / "No results" footer
+ * (for grading). Used by the corrective-retrieval loop; consumes Huginn's
+ * `bestScore` / `confidenceBand` / `retryHints` / `noConfidentResults` /
+ * `min_relevance` contract.
  */
 
 export type ConfidenceBand = "high" | "medium" | "low";
@@ -61,7 +59,6 @@ export interface KnowledgeSearchResponse {
   retryHints?: KnowledgeRetryHints;
   /** Present when Huginn returns a relational graph answer ahead of the hits. */
   graphAnswer?: string;
-  lowConfidence?: boolean;
 }
 
 export interface SearchKnowledgeOptions {
@@ -142,7 +139,6 @@ function normalizeResponse(data: Record<string, unknown>): KnowledgeSearchRespon
     noConfidentResults: data.noConfidentResults === true,
     retryHints: parseRetryHints(data.retryHints),
     graphAnswer: data.graph_answer ? String(data.graph_answer) : undefined,
-    lowConfidence: data.lowConfidence === true,
   };
 }
 
@@ -237,10 +233,44 @@ export function renderRetryHintsFooter(resp: Pick<KnowledgeSearchResponse, "retr
   return bits.length > 0 ? `\n\n*${prefix} — try: ${bits.join(" · ")}*` : `\n\n*${prefix}.*`;
 }
 
-/** A trailing `*Weak match …*` / `*No confident match …*` retry-hints footer
- *  (Huginn's MCP adapter appends one; {@link renderRetryHintsFooter} produces
- *  the same shape). */
-const TRAILING_RETRY_FOOTER_RE = /\n+\s*\*(?:No confident match|Weak match)[^\n]*\*\s*$/;
+/**
+ * Patterns describing the signal Huginn's MCP adapter emits about result
+ * quality (the renderer above produces the same shapes). Centralised here so
+ * the grader, the orchestrator and the dashboard all read the same thing.
+ *
+ * - {@link NO_RESULTS_BODY_RE} — a "No results found for …" body (matches
+ *   anywhere a line starts with it, so it still fires after merging).
+ * - {@link WEAK_MATCH_FOOTER_RE} — a `*Weak match …*` / `*No confident match …*`
+ *   line anywhere in the text (used for detection).
+ * - {@link TRAILING_RETRY_FOOTER_RE} — the same footer anchored at end-of-string,
+ *   with a capture group (used for stripping/extracting it).
+ */
+export const NO_RESULTS_BODY_RE = /(^|\n)\s*No results found for /;
+export const WEAK_MATCH_FOOTER_RE = /(^|\n)\s*\*(?:No confident match|Weak match)\b/;
+const TRAILING_RETRY_FOOTER_RE = /\n+\s*(\*(?:No confident match|Weak match)[^\n]*\*)\s*$/;
+
+/** Huginn's weak-result relevance threshold — a `bestScore` below this means
+ *  "found something, but nothing confidently relevant". Mirrors Huginn's
+ *  `WEAK_RESULT_RELEVANCE`. */
+export const WEAK_RESULT_RELEVANCE = 0.45;
+
+/** Classify a rendered search-result text by the quality signal Huginn baked
+ *  into it: `"empty"` (no results), `"weak"` (a weak/no-confident-match footer),
+ *  or `null` (looks fine). */
+export function classifyResultSignal(text: string): "empty" | "weak" | null {
+  if (!text) return null;
+  if (NO_RESULTS_BODY_RE.test(text)) return "empty";
+  if (WEAK_MATCH_FOOTER_RE.test(text)) return "weak";
+  return null;
+}
+
+/** Split a rendered result text into its body and trailing retry-hints footer
+ *  (`""` when there's no footer). */
+export function extractTrailingRetryFooter(text: string): { body: string; footer: string } {
+  const m = text.match(TRAILING_RETRY_FOOTER_RE);
+  if (!m) return { body: text, footer: "" };
+  return { body: text.slice(0, m.index).trimEnd(), footer: m[1]! };
+}
 
 /** Strip a trailing retry-hints footer from a rendered result text. Used when
  *  splicing a corrective re-query in: the original "try X" footer is obsolete
@@ -254,9 +284,9 @@ const DOC_ID_LINE_RE = /collection:\s*`([^`]+)`\s+doc_id:\s*`([^`]+)`/g;
 
 /** Extract `collection/doc_id` keys from rendered search-result text — used to
  *  dedupe a corrective re-query against the original result the model already
- *  has, since (per the chosen Phase-1 approach) we don't re-fetch the original
- *  in structured form. The `collection: \`…\` doc_id: \`…\`` line is emitted by
- *  Huginn's MCP adapter for every hit and is stable. */
+ *  has (we don't re-fetch the original in structured form). The
+ *  `collection: \`…\` doc_id: \`…\`` line is emitted by Huginn's MCP adapter
+ *  for every hit and is stable. */
 export function extractDocKeysFromRenderedText(text: string): Set<string> {
   const keys = new Set<string>();
   for (const m of text.matchAll(DOC_ID_LINE_RE)) {
diff --git a/src/dashboard/views/components/span-label.ts b/src/dashboard/views/components/span-label.ts
index 9bd29da..1dc0d07 100644
--- a/src/dashboard/views/components/span-label.ts
+++ b/src/dashboard/views/components/span-label.ts
@@ -9,12 +9,7 @@ interface SpanLike {
     toolId?: unknown;
     input?: unknown;
     output?: unknown;
-    corrective?: {
-      retries?: unknown;
-      finalVerdict?: unknown;
-      verdicts?: unknown;
-      queriesTried?: unknown;
-    } | unknown;
+    corrective?: unknown;
     searchTrace?:
       | {
           collections?: Array<{
@@ -121,6 +116,12 @@ export function deriveSpanLabelHtml(span: SpanLike): { html: string; tooltip: st
   return null;
 }
 
+const CORRECTIVE_VERDICT_DISPLAY: Record<string, { cls: string; sym: string }> = {
+  correct: { cls: "wf-corrective wf-corrective-ok", sym: "✓" },
+  ambiguous: { cls: "wf-corrective wf-corrective-warn", sym: "≈" },
+  insufficient: { cls: "wf-corrective wf-corrective-bad", sym: "✗" },
+};
+
 /** Build the corrective-retrieval chip from a tool span's `attributes.corrective`.
  *  Returns null when the attribute is absent or malformed. Chip text is the
  *  final verdict's symbol + retry count (e.g. `⟲1 ✓`); color reflects whether
@@ -136,32 +137,35 @@ function correctiveChipFromAttrs(raw: unknown): { html: string; tooltipLines: st
   const reason = Array.isArray(c.reasons) && typeof c.reasons[0] === "string" ? (c.reasons[0] as string) : "";
   const mode = c.graderMode === "haiku" ? "haiku" : c.graderMode === "signal" ? "signal" : "";
 
-  const cls =
-    finalVerdict === "correct" ? "wf-corrective wf-corrective-ok"
-      : finalVerdict === "ambiguous" ? "wf-corrective wf-corrective-warn"
-        : "wf-corrective wf-corrective-bad";
-  const sym = finalVerdict === "correct" ? "✓" : finalVerdict === "ambiguous" ? "≈" : "✗";
-  const text = retries > 0 ? `⟲${retries} ${sym}` : `grade ${sym}`;
+  const display = CORRECTIVE_VERDICT_DISPLAY[finalVerdict ?? "insufficient"] ?? CORRECTIVE_VERDICT_DISPLAY.insufficient!;
+  const text = retries > 0 ? `⟲${retries} ${display.sym}` : `grade ${display.sym}`;
   const tip =
     `corrective retrieval${mode ? ` (${mode})` : ""}: ${verdicts.join(" → ") || finalVerdict}` +
     (reason ? ` — ${reason}` : "") +
     (queries.length ? `; re-queried: ${queries.map((q) => `"${q}"`).join(", ")}` : "; no re-query");
   return {
-    html: `<span class="wf-chip ${cls}" title="${escAttr(tip)}">${escHtml(text)}</span>`,
+    html: `<span class="wf-chip ${display.cls}" title="${escAttr(tip)}">${escHtml(text)}</span>`,
     tooltipLines: [tip],
   };
 }
 
+// Mirrors knowledge-search-client.ts's classifyResultSignal — kept local because
+// this file lives in the dashboard layer and shouldn't import from src/ai.
+const NO_RESULTS_OUTPUT_RE = /(^|\n)\s*No results found for /;
+const WEAK_FOOTER_OUTPUT_RE = /(^|\n)\s*\*(?:No confident match|Weak match)\b/;
+/** Huginn's weak-result relevance threshold (its `WEAK_RESULT_RELEVANCE`). */
+const WEAK_BEST_SCORE = 0.45;
+
 /** Whether a search-tool span's result was actually usable *by the model*:
  *  `"empty"` ("No results found" / `noConfidentResults`), `"weak"` (a
  *  `*Weak match*` / `*No confident match*` footer), or `null` (looks fine).
  *  Reads the captured tool output first (ground truth of what the model saw),
- *  falling back to the Huginn trace's Phase-0 `response` block. */
+ *  falling back to the Huginn trace's `response` block. */
 function searchResultSignal(attrs: NonNullable<SpanLike["attributes"]>): "empty" | "weak" | null {
   const out = typeof attrs.output === "string" ? attrs.output : null;
   if (out) {
-    if (/(^|\n)\s*No results found for /.test(out)) return "empty";
-    if (/(^|\n)\s*\*(?:No confident match|Weak match)\b/.test(out)) return "weak";
+    if (NO_RESULTS_OUTPUT_RE.test(out)) return "empty";
+    if (WEAK_FOOTER_OUTPUT_RE.test(out)) return "weak";
     return null;
   }
   const trace = attrs.searchTrace;
@@ -169,7 +173,7 @@ function searchResultSignal(attrs: NonNullable<SpanLike["attributes"]>): "empty"
     const resp = (trace as { response?: { noConfidentResults?: unknown; bestScore?: unknown } }).response;
     if (resp) {
       if (resp.noConfidentResults === true) return "empty";
-      if (typeof resp.bestScore === "number" && resp.bestScore < 0.45) return "weak";
+      if (typeof resp.bestScore === "number" && resp.bestScore < WEAK_BEST_SCORE) return "weak";
     }
   }
   return null;
diff --git a/src/types.ts b/src/types.ts
index 31244dc..555d017 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -82,11 +82,17 @@ export interface ToolCall {
   corrective?: CorrectiveToolMeta;
 }
 
+// These unions are duplicated (not imported) from src/ai because src/types.ts
+// is a leaf module imported widely — pulling in src/ai would invert the
+// dependency direction. The corrective-retrieval code asserts compatibility.
+export type CorrectiveVerdict = "correct" | "ambiguous" | "insufficient";
+export type CorrectiveGraderMode = "signal" | "haiku";
+
 export interface CorrectiveToolMeta {
   /** Number of corrective re-queries actually issued (0–budget). */
   retries: number;
-  /** Grader verdict from each grading pass, in order ("correct" | "ambiguous" | "insufficient"). */
-  verdicts: string[];
+  /** Grader verdict from each grading pass, in order. */
+  verdicts: CorrectiveVerdict[];
   /** Grader reason per pass, parallel to `verdicts`. */
   reasons: string[];
   /** Re-query strings actually issued (excludes the original query). */
@@ -94,9 +100,9 @@ export interface CorrectiveToolMeta {
   /** Collections each re-query was scoped to, parallel to `queriesTried`; `null` = all. */
   collectionsTried?: (string[] | null)[];
   /** Verdict from the final grading pass — whether the result set ended up usable. */
-  finalVerdict: string;
-  /** Which grader judged the result(s): `"signal"` (no model call) or `"haiku"`. */
-  graderMode?: string;
+  finalVerdict: CorrectiveVerdict;
+  /** Which grader judged the result(s). */
+  graderMode?: CorrectiveGraderMode;
   /** Total grader wall time across all passes, ms (≈0 in signal mode). */
   graderMs?: number;
   /** Wall time of each re-query HTTP call, parallel to `queriesTried`, ms. */