diff --git a/.gitignore b/.gitignore
index 3a6f213..04d9654 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,11 @@ __pycache__/
 *.pyc
 .env*
 .nanocode
+.codegraph/
 .venv/
 .mypy_cache/
 .ruff_cache/
 uv.lock
 .python-version
+.code-symbol-index/
+.code-workflow-probe.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1be6ff7..aa8e1af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,15 +1,74 @@
 # Changelog
 
+## 0.4.8 - 2026-05-23
+
+### Changed
+- Renamed the `EditFile` tool to `Edit` across the codebase and tests.
+
+## 0.4.5 - 2026-05-21
+
+### Changed
+- Updated the built-in code index integration for `code-symbol-index` 0.1.7.
+- Added indexed symbol filters for kind, path, and exact matching.
+- Added file-local symbol outlines and bounded pending-index details in `/status`.
+
+## 0.4.4 - 2026-05-20
+
+### Added
+- Added built-in indexed code navigation backed by project data and `/index` for manual init/sync.
+
+### Changed
+- Replaced the external code-navigation CLI integration with the bundled code index API.
+- Hid code navigation tools until an index exists, while lightly updating existing indexes at startup.
+- Updated status/docs to describe code index availability without exposing dependency-install wording.
+
+## 0.4.3 - 2026-05-20
+
+### Changed
+- Removed stable knowledge state while keeping current-task known facts.
+- Extracted shared numbered-content and line-range helpers for tool output/range handling.
+- Trimmed thin helper wrappers in List and indexed code-inspection tools.
+
+## 0.4.2 - 2026-05-19
+
+### Added
+- Added indexed code inspection tools for symbol lookup, symbol investigation, and file outlines when the local index is available.
+- Added queued user feedback during long-running turns.
+- Added `PatchFile` for multi-location file edits.
+
+### Changed
+- Moved model calls to the OpenAI SDK and function-tool protocol.
+- Reworked task-shape prompts for chat, one-shot tasks, and tracked tasks.
+- Prioritized indexed code inspection for structural lookup while keeping Search/Read for exact literals and edit ranges.
+- Improved terminal UX with persistent status, queued-input handling, Bash live preview, and terminal-friendly assistant output rules.
+- Renamed `ListDir` to `List`.
+- Improved `Read`, `Edit`, `ReplaceRange`, `PatchFile`, `Bash`, and `Git` tool guidance.
+- Simplified gate behavior so only deterministic, correctable model errors are refused.
+
+### Fixed
+- Fixed duplicate final replies for goal-only text answers.
+- Fixed repeated recall loops and several format/tool-name compatibility issues.
+- Fixed PatchFile diagnostics and empty-hunk handling.
+- Fixed queued feedback delivery, Ctrl-C/Ctrl-D handling, and Bash interrupt reporting.
+
 ## 0.3.35 - 2026-05-16
 
+### Added
+- Added batched `ReplaceRange` edits for multiple independent ranges in the same file.
+- Added a design document covering agent state, context construction, tool-result storage, observe policy, and verification.
+
 ### Changed
 - Aligned tool-result context layout with the design document.
+- Refined tool-result context reduction around unreduced raw results, retained results, and checkpoint-based pruning.
 - Compressed ACT and OBSERVE system prompts.
 - Reduced routine OBSERVE triggers by raising the pending-result threshold and keeping ordinary tool failures in ACT for repair.
+- Simplified agent gate and feedback handling, including single active plan item normalization.
 - Added soft feedback for state-update-only ACT turns so models continue with frontier tools, verification, or completion.
+- Highlighted recognized slash commands and reported unknown slash commands directly.
 
 ### Fixed
 - Accepted harmless model output variants including trailing progress text, action type casing, and `message` action aliases.
+- Ignored pending verification requests instead of treating them as blocking model output.
 
 ## 0.3.34 - 2026-05-16
 
@@ -115,7 +174,7 @@
 
 ### Changed
 - Tightened completion gates, verification blockers, and compact state update grouping.
-- Simplified Search argument parsing and removed legacy `/knowledge update` behavior.
+- Simplified Search argument parsing and removed legacy knowledge-update behavior.
 - Made provider reasoning payload shape configurable.
 
 ## 0.3.20 - 2026-05-15
diff --git a/README.md b/README.md
index 95f8f14..b307d88 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 A lightweight terminal-based AI coding assistant.
 
-nanocode is used to help building itself, including features such as `@file` path completion.
+nanocode is used to help building itself.
 
 Pre-1.0 note: nanocode is still evolving quickly. Functionality, commands, configuration, and behavior may change incompatibly before a 1.0 release.
 
@@ -14,9 +14,9 @@ Pre-1.0 note: nanocode is still evolving quickly. Functionality, commands, confi
 
 ## Features
 
-- **Constrained Output**: Force model replies into auditable action frames.
+- **Function Tools**: Route model decisions through auditable tools.
 - **Verified Edits**: Reject stale range edits before they touch files.
-- **Autonomous Loop**: Chain reading, editing, running, and verification.
+- **Autonomous Loop**: Chain reading, editing, running, and checks.
 - **Live Telemetry**: Stream tool intent, token use, and status.
 
 ## Install
@@ -61,7 +61,6 @@ Ask a source-aware question about nanocode itself:
 CLI arguments:
 
 - `--yolo`: Skip tool execution confirmations.
-- `--plan`: Plan changes without editing files or running commands.
 - `--debug`: Write request prompts to the current session directory under `~/.nanocode/sessions/`.
 - `--config <path>`: Path to config file (default: `~/.nanocode/config.toml`).
 - `--init-config`: Create a default config file.
@@ -75,34 +74,42 @@ If you do not fully trust the model, tools, prompts, or workspace, run nanocode
 
 USE AT YOUR OWN RISK.
 
+nanocode currently targets macOS and Linux. Windows is not supported.
+
 ## Tools
 
-- File: `Read`, `LineCount`, `ListDir`, `Search`.
-- Edit: `Edit`, `ReplaceRange`.
+- File: `Read`, `LineCount`, `List`, `Search`.
+- Code navigation: `InspectCode` after `/index` builds the project index.
+- Edit: `CreateFile`, `EditFile`.
 - Shell: `Bash`, `Git`.
 - Memory: `Recall` reads stored tool results by key.
 
+`Search`, `Read`, and `InspectCode` mode=inspect return 0-based `line:hash|code` lines that can be used as edit anchors. For broad mechanical text replacement, shell text pipelines are acceptable when followed by a focused diff or test.
+
 ## Commands
 
-- Info: `/help [question]`, `/status`, `/rules`, `/knowledge`, `/compact`.
-- Config: `/config`, `/set <key> <value>`, `/model [model_name]`, `/reason`, `/provider [name]`, `/plan [on|off|question]`, `/yolo`.
-- Maintenance: `/clean`.
+- Info: `/help [question]`, `/status`, `/rules`, `/compact`.
+- Config: `/config`, `/set <key> <value>`, `/api [auto|chat|responses]`, `/model [model_name]`, `/reason`, `/reason-payload [value]`, `/provider [name]`, `/yolo`.
+- Maintenance: `/index [force]`, `/clean`.
 - Exit: `/exit`, `/quit`.
 
-Selectors support `j`/`k`, arrows, `/keyword`, Enter, and Esc. `/model` lists configured models before discovered ones, then prompts for reasoning; `/model <name>` and `/reason` are direct shortcuts.
+Selectors support `j`/`k`, arrows, `/keyword`, Enter, and Esc. `/api responses` switches the current provider to Responses format. `/reason` sets `provider.reasoning` to `off` or an effort value; `/reason-payload` controls the Chat-only reasoning payload shape. `/model` lists configured models before discovered ones, then prompts for reasoning.
+During a slow model request, press `Ctrl-G` to cancel that request and resend the same prompt.
 
 ## Configuration
 
 Run `nanocode --init-config` to create `~/.nanocode/config.toml`.
 
-- Provider config: `[provider] active = "<name>"` plus `[provider.<name>]` url, key, model, `available_models`, and model options. `reasoning_payload` controls whether effort is sent as `reasoning`, `reasoning_effort`, or not sent.
+- Provider config: `[provider] active = "<name>"` plus `[provider.<name>]` url, key, model, `available_models`, and model options. `api` selects `chat`, `responses`, or `auto`; auto uses exact-host profiles. Responses uses standard `reasoning.effort`; Chat reasoning is mapped by provider/model profile when known.
+- Provider auto-detection covers common providers: OpenAI/OpenRouter prefer Responses API; DeepSeek, selected OpenCode models, and DashScope models use their matching Chat reasoning payload shapes.
 - Path config: `[paths] data_dir = "~/.nanocode"`.
 - Runtime config: `[runtime]`.
+- `/context [low|medium|high]` shows or switches tool-result context budgets; lower budgets reduce token usage and observe overhead.
 - Session data: debug prompts and tool-result logs are stored under `~/.nanocode/sessions/<session_id>/`.
-- Tool-result logs from inactive sessions are auto-cleaned after `runtime.auto_clean_recent` (default `3d`; use `off` to disable). `/clean` removes inactive session logs immediately.
-- Project data: user rules are stored under `~/.nanocode/projects/<project_key>/`.
+- Old inactive session directories are auto-cleaned after `runtime.auto_clean_recent` (default `1d`; use `off` to disable). `/clean` removes inactive sessions immediately.
+- Project data: user rules and code indexes are stored under `~/.nanocode/projects/<project_key>/`.
 
 ## Status
 
-- Status bar: active model, reasoning, active yolo/plan modes, conversation context, current-turn tool calls, tokens, elapsed time, and active model-call time.
-- `/status`: active provider, model state, session id, runtime state, conversation/tool counters, per-model calls/tokens, task, goal, and verification.
+- Status bar: active model, reasoning, active yolo mode, conversation context, current-turn tool calls, tokens, elapsed time, and active model-call time.
+- `/status`: active provider, model state, session id, runtime state, conversation/tool counters, per-model calls/tokens, goal, and checks.
diff --git a/design.md b/design.md
index 811a9af..8627dc7 100644
--- a/design.md
+++ b/design.md
@@ -17,6 +17,18 @@ The agent has a work path and a cleanup path:
 
 Conversation compaction is a background maintenance path. It summarizes old conversation history when the conversation list grows too large.
 
+## Model Output Protocol
+
+Model decisions use function tools:
+
+- state tools update goal, plan, hypotheses, known facts, verification, and result retention
+- repository tools read, search, edit, run commands, and recall stored results
+- compaction uses a dedicated `compact` function tool
+
+Assistant text is optional user-facing text. It must not replace the next useful
+function tool. Completing work still requires a `goal` function tool call with
+`complete=true`.
+
 ## Task State
 
 The main task state lives in the blackboard:
@@ -27,20 +39,19 @@ The main task state lives in the blackboard:
 - plan
 - hypotheses
 - known facts: settled facts for the current task
-- stable knowledge: rare reusable codebase facts
 - verification state
 - recent edits
 
 New user input keeps the previous task state available for follow-ups like "continue".
 
-Old task state is cleared only when the model explicitly starts a different goal. When that happens, transient investigation state such as hypotheses and selected tool-result context is reset, while durable knowledge is kept.
+Old task state is cleared only when the model explicitly sets a different goal. When that happens, transient investigation state such as hypotheses and selected tool-result context is reset, while durable knowledge is kept.
 
 ## New Goal Handling
 
 New user input does not immediately clear the previous task. This keeps short
 follow-ups such as "continue" usable.
 
-When the model outputs `start` with a different goal:
+When the model outputs `goal` with a different current-task goal:
 
 - goal and plan are replaced
 - hypotheses are cleared
@@ -48,20 +59,21 @@ When the model outputs `start` with a different goal:
 - kept tool results are cleared
 - visible raw tool results are compacted into summaries
 - full tool logs remain available through `Recall tr.N`
-- known and stable knowledge remain available
+- known facts remain available
 
 ## Context Construction
 
 ACT mode receives a working context:
 
 - goal, plan, hypotheses, verification
+- environment, including whether local symbol inspection is available
 - Tool Result Index
 - Kept Tool Results
 - Unreduced Tool Results
 - Latest Tool Results
 - errors
 - recent edits
-- known and stable knowledge
+- known facts
 - conversation history
 - latest user request
 
@@ -69,13 +81,19 @@ OBSERVE receives a smaller cleanup context:
 
 - latest user request
 - goal, plan, hypotheses
-- known and stable knowledge
+- known facts
 - kept tool results
 - observe errors
 - unreduced raw tool results selected from recent/latest storage
 
 OBSERVE reduces tool-result noise before ACT continues.
 
+The code navigation tool is environment-gated. `InspectCode` is shown only when
+the built-in code index is available. It supports `find`, `inspect`, and
+`outline` modes for symbol queries or file paths, not natural-language
+questions. The index is created explicitly with `/index`, rebuilt with
+`/index force`, and lightly updated at startup when it already exists.
+
 Context layout:
 
 Layout rules:
@@ -93,7 +111,6 @@ ACT user prompt, top -> bottom
 +--------------------------------------------------+------------------------------+
 | Background                                       | compact_at                   |
 |   - Environment                                  |                              |
-|   - Stable Knowledge                             |                              |
 |   - User Rules                                   |                              |
 |   - Conversation History                         |                              |
 +--------------------------------------------------+------------------------------+
@@ -113,7 +130,7 @@ ACT user prompt, top -> bottom
 | Current Decision                                 | section-local limits         |
 |   - Recent Edits                                 |                              |
 |   - Known                                        |                              |
-|   - Task Code / Work Mode                        |                              |
+|   - Current Phase / Work Mode                    |                              |
 |   - Goal / Plan / Hypotheses / Verify            |                              |
 |   - Errors                                       |                              |
 |   - Latest User Request                          |                              |
diff --git a/nanocode.py b/nanocode.py
index d3061a1..3cc885c 100644
--- a/nanocode.py
+++ b/nanocode.py
@@ -7,10 +7,13 @@
 """
 
 import argparse
+import _thread
 import difflib
 import fcntl
 import fnmatch
 import hashlib
+import importlib
+import inspect
 import itertools
 import json
 import os
@@ -19,23 +22,23 @@
 import selectors
 import shutil
 import signal
-import socket
 import subprocess
 import sys
 import threading
 import time
 import tomllib
-import urllib.error
-import urllib.request
 import uuid
+from contextlib import nullcontext
 from dataclasses import dataclass, field
 
 from datetime import datetime
 from enum import StrEnum
 from typing import Any, Callable, ClassVar, Iterator, Iterable, Self, Type, TypeAlias
+from urllib.parse import urlparse
 
-import json_repair
-from prompt_toolkit.application import Application
+from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError, OpenAI
+from prompt_toolkit.application import Application, run_in_terminal
+from prompt_toolkit.buffer import Buffer
 from prompt_toolkit import PromptSession, print_formatted_text
 from prompt_toolkit.completion import Completer, Completion
 from prompt_toolkit.filters import Condition
@@ -45,18 +48,19 @@
 from prompt_toolkit.keys import Keys
 from prompt_toolkit.lexers import Lexer
 from prompt_toolkit.layout import Layout
-from prompt_toolkit.layout.containers import HSplit, Window
-from prompt_toolkit.layout.controls import FormattedTextControl
+from prompt_toolkit.layout.containers import ConditionalContainer, HSplit, VSplit, Window
+from prompt_toolkit.layout.controls import BufferControl, FormattedTextControl
 from prompt_toolkit.layout.dimension import Dimension
 from prompt_toolkit.output.defaults import create_output
 from prompt_toolkit.patch_stdout import patch_stdout
 from prompt_toolkit.styles import Style
 
-__version__ = "0.3.35"
+__version__ = "0.4.9"
 
 
 JsonValue: TypeAlias = Any
 Json: TypeAlias = dict[str, JsonValue]
+
 ############################
 # Errors
 ############################
@@ -80,6 +84,9 @@ class ConfigError(Error): ...
 class ModelRequestTimeout(Error): ...
 
 
+class ModelRequestRetry(Error): ...
+
+
 class Cancellation(Error): ...
 
 
@@ -96,34 +103,25 @@ class Role(StrEnum):
 @dataclass
 class ConversationItem:
     role: Role
+    content: str = ""
     time: datetime = field(default_factory=datetime.now)
 
-    def format_ts(self) -> str:
-        return self.time.strftime("%Y-%m-%d %H:%M:%S")
-
-    def format_transcript(self, title: str, content: str, indent: str = "") -> str:
-        quoted = ["> " + line if line else ">" for line in content.splitlines()]
+    def format(self, indent: str = "") -> str:
+        quoted = ["> " + line if line else ">" for line in self.content.splitlines()]
         if not quoted:
             quoted = [">"]
-        return _format_lines([f"#### {title} {self.format_ts()}", *quoted], indent)
+        title = self.role.value.title()
+        return _format_lines([f"#### {title} {self.time.strftime('%Y-%m-%d %H:%M:%S')}", *quoted], indent)
 
 
 @dataclass
 class UserMessage(ConversationItem):
     role: Role = Role.USER
-    content: str = ""
-
-    def format(self, indent: str = "") -> str:
-        return self.format_transcript("User", self.content, indent)
 
 
 @dataclass
 class AssistantMessage(ConversationItem):
     role: Role = Role.ASSISTANT
-    content: str = ""
-
-    def format(self, indent: str = "") -> str:
-        return self.format_transcript("Assistant", self.content, indent)
 
 
 ############################
@@ -138,43 +136,49 @@ class PlanStatus(StrEnum):
     BLOCKED = "blocked"
 
     def __str__(self) -> str:
-        symbols = {
-            PlanStatus.TODO: "○",
-            PlanStatus.DOING: "◔",
-            PlanStatus.DONE: "✓",
-            PlanStatus.BLOCKED: "☒",
-        }
+        symbols = {PlanStatus.TODO: "○", PlanStatus.DOING: "◔", PlanStatus.DONE: "✓", PlanStatus.BLOCKED: "☒"}
         return f"{symbols.get(self, '')} {self.value}".strip()
 
 
 ALL_PLAN_STATUSES = frozenset(PlanStatus)
 
 
-class TaskCode(StrEnum):
-    NEW = "new"
-    WORKING = "working"
-    VERIFYING = "verifying"
+class PlanFollowupStatus(StrEnum):
+    UNKNOWN = "unknown"
+    NONE = "none"
+    NEEDED = "needed"
     DONE = "done"
+    BLOCKED = "blocked"
+
+
+ALL_PLAN_FOLLOWUP_STATUSES = frozenset(PlanFollowupStatus)
 
 
-class WorkMode(StrEnum):
-    NORMAL = "normal"
-    INVESTIGATE = "investigate"
+@dataclass
+class PlanFollowup:
+    status: PlanFollowupStatus = PlanFollowupStatus.UNKNOWN
+    reason: str = ""
+
+    def format(self) -> str:
+        text = str(self.status)
+        return text + (": " + self.reason if self.reason else "")
 
 
-ALL_WORK_MODES = frozenset(WorkMode)
+class TaskCode(StrEnum):
+    NEW = "new"
+    WORKING = "working"
+    CHECKING = "checking"
+    DONE = "done"
 
 
-class HypothesisStatus(StrEnum):
+class LeadStatus(StrEnum):
     ACTIVE = "active"
     RULED_OUT = "ruled_out"
     DROPPED = "dropped"
     CONFIRMED = "confirmed"
 
 
-ALL_HYPOTHESIS_STATUSES = frozenset(HypothesisStatus)
-HYPOTHESIS_STATUS_SCHEMA = "|".join(status.value for status in HypothesisStatus)
-HYPOTHESIS_STATUS_TEXT = ", ".join(status.value for status in HypothesisStatus)
+ALL_LEAD_STATUSES = frozenset(LeadStatus)
 
 
 @dataclass
@@ -183,6 +187,8 @@ class PlanItem:
     status: PlanStatus = PlanStatus.TODO
     id: str = ""
     context: str = ""
+    followup_action: PlanFollowup = field(default_factory=PlanFollowup)
+    followup_check: PlanFollowup = field(default_factory=PlanFollowup)
 
     def format(self, indent: str = "") -> str:
         text = "- [" + str(self.status) + "] " + self.text
@@ -191,6 +197,10 @@ def format(self, indent: str = "") -> str:
         lines = [text]
         if self.context:
             lines.append("  context: " + self.context)
+        if self.followup_action.status != PlanFollowupStatus.UNKNOWN:
+            lines.append("  followup_action: " + self.followup_action.format())
+        if self.followup_check.status != PlanFollowupStatus.UNKNOWN:
+            lines.append("  followup_check: " + self.followup_check.format())
         return _format_lines(lines, indent)
 
 
@@ -224,17 +234,24 @@ def format_item(item: "KnownItem | str") -> str:
 
     @classmethod
     def from_json(cls, value: JsonValue) -> "KnownItem | None":
-        fact = _memory_fact_from_json(value)
-        if fact is None:
-            return None
         item = _json_dict(value)
+        if item:
+            fact = (_json_str(item.get("text")) or _json_str(item.get("fact")) or "").strip()
+        else:
+            fact = (_json_str(value) or "").strip()
+        if not fact:
+            return None
+        if fact.startswith("<") and fact.endswith(">"):
+            inner = fact[1:-1].strip().lower()
+            if inner and any(word in inner for word in ("fact", "target", "arg", "path", "criterion", "result", "context", "message", "goal")):
+                return None
         return cls(text=fact, source=_source_from_json(item) if item else ())
 
 
 @dataclass
-class Hypothesis:
+class Lead:
     text: str
-    status: HypothesisStatus = HypothesisStatus.ACTIVE
+    status: LeadStatus = LeadStatus.ACTIVE
     id: str = ""
     source: tuple[str, ...] = ()
     context: str = ""
@@ -250,32 +267,35 @@ def format(self, indent: str = "") -> str:
         return _format_lines(lines, indent)
 
     @classmethod
-    def from_json(cls, value: JsonValue) -> "Hypothesis | None":
+    def from_json(cls, value: JsonValue) -> "Lead | None":
+        if isinstance(value, str):
+            text = value.strip()
+            return cls(text=text) if text else None
         item = _json_dict(value)
         text = _json_str(item.get("text")) or ""
         if not text:
             return None
-        status = _json_str(item.get("status")) or HypothesisStatus.ACTIVE
-        if status not in ALL_HYPOTHESIS_STATUSES:
-            status = HypothesisStatus.ACTIVE
+        status = _json_str(item.get("status")) or LeadStatus.ACTIVE
+        if status not in ALL_LEAD_STATUSES:
+            status = LeadStatus.ACTIVE
         return cls(
             text=text,
-            status=HypothesisStatus(status),
+            status=LeadStatus(status),
             id=_json_str(item.get("id")) or "",
             source=_source_from_json(item),
             context=_json_str(item.get("context")) or "",
         )
 
 
-class VerificationStatus(StrEnum):
+class CheckStatus(StrEnum):
     IDLE = "idle"
     REQUIRED = "required"
-    DONE = "done"
+    PASSED = "passed"
     FAILED = "failed"
     BLOCKED = "blocked"
 
 
-class VerificationBlocker(StrEnum):
+class CheckBlocker(StrEnum):
     NONE = ""
     USER = "user"
     ENVIRONMENT = "environment"
@@ -283,30 +303,20 @@ class VerificationBlocker(StrEnum):
     UNKNOWN = "unknown"
 
 
-ALL_VERIFICATION_BLOCKERS = frozenset(VerificationBlocker)
+ALL_CHECK_BLOCKERS = frozenset(CheckBlocker)
 
 
 @dataclass
-class Verification:
-    goal: str = ""
-    status: VerificationStatus = VerificationStatus.IDLE
-    kind: str = ""
+class Checks:
+    status: CheckStatus = CheckStatus.IDLE
     method: str = ""
-    criteria: list[str] = field(default_factory=list)
     context: str = ""
-    blocker: VerificationBlocker = VerificationBlocker.NONE
+    blocker: CheckBlocker = CheckBlocker.NONE
 
     def format(self, indent: str = "") -> str:
         lines = ["status: " + self.status]
-        if self.goal:
-            lines.append("goal: " + self.goal)
-        if self.kind:
-            lines.append("kind: " + self.kind)
         if self.method:
             lines.append("method: " + self.method)
-        if self.criteria:
-            lines.append("criteria:")
-            lines.extend("- " + item for item in self.criteria)
         if self.context:
             lines.append("context: " + self.context)
         if self.blocker:
@@ -314,16 +324,13 @@ def format(self, indent: str = "") -> str:
         return _format_lines(lines, indent)
 
     def reset(self) -> None:
-        self.goal = ""
-        self.status = VerificationStatus.IDLE
-        self.kind = ""
+        self.status = CheckStatus.IDLE
         self.method = ""
-        self.criteria = []
         self.context = ""
-        self.blocker = VerificationBlocker.NONE
+        self.blocker = CheckBlocker.NONE
 
     def has_context(self) -> bool:
-        return bool(self.goal or self.kind or self.method or self.criteria or self.context or self.blocker or self.status != VerificationStatus.IDLE)
+        return bool(self.method or self.context or self.blocker or self.status != CheckStatus.IDLE)
 
 
 @dataclass
@@ -364,7 +371,8 @@ def load(cls, path: str) -> "UserRules":
 
     def add(self, rule: str) -> bool:
         rule = self._clean_rule(rule)
-        if not rule or rule in self._rules():
+        rules = {item for line in self.content.splitlines() if (item := self._clean_rule(line)) and not item.startswith("#")}
+        if not rule or rule in rules:
             return False
         prefix = "# User Rules\n\n" if not self.content.strip() else self.content.rstrip() + "\n"
         self.content = prefix + "- " + rule
@@ -378,9 +386,6 @@ def save(self, path: str) -> None:
     def format(self, indent: str = "") -> str:
         return _format_lines((self.content.strip() or "(empty)").splitlines(), indent)
 
-    def _rules(self) -> set[str]:
-        return {rule for line in self.content.splitlines() if (rule := self._clean_rule(line)) and not rule.startswith("#")}
-
     @staticmethod
     def _clean_rule(rule: str) -> str:
         rule = " ".join(rule.strip().split())
@@ -391,21 +396,97 @@ def _clean_rule(rule: str) -> str:
 class Blackboard:
     user_input: str = ""
     task_code: TaskCode = TaskCode.DONE
-    work_mode: WorkMode = WorkMode.NORMAL
     goal: str = ""
     goal_reached: bool = False
     plan: list[PlanItem] = field(default_factory=list)
-    hypotheses: list[Hypothesis] = field(default_factory=list)
+    leads: list[Lead] = field(default_factory=list)
     known: list[KnownItem] = field(default_factory=list)
     memory_checkpoint_tool_result_counter: int = 0
-    stable_knowledge: dict[str, list[str]] = field(default_factory=dict)
-    verification_required: bool = False
-    verification: Verification = field(default_factory=Verification)
+    checks_required: bool = False
+    checks: Checks = field(default_factory=Checks)
 
-    def source_result_keys(self) -> set[str]:
+    def referenced_result_keys(self) -> set[str]:
         keys = {key for item in self.known for key in KnownItem.source_of(item) if key.startswith("tr.")}
-        keys.update(key for item in self.hypotheses for key in item.source if key.startswith("tr."))
-        return keys
+        keys.update(key for item in self.leads for key in item.source if key.startswith("tr."))
+        texts = [
+            self.goal,
+            *[KnownItem.text_of(item) for item in self.known],
+            *[item.text for item in self.leads],
+            *[item.context for item in self.leads],
+            *[item.text for item in self.plan],
+            *[item.context for item in self.plan],
+            *[item.followup_action.reason for item in self.plan],
+            *[item.followup_check.reason for item in self.plan],
+            self.checks.method,
+            self.checks.context,
+            self.checks.blocker,
+        ]
+        for text in texts:
+            keys.update(TOOL_RESULT_KEY_REF_PATTERN.findall(str(text)))
+        return {key for key in keys if key.startswith("tr.")}
+
+    def protected_result_sources(self) -> dict[str, str]:
+        return {key: "active lead" for item in self.leads if item.status == LeadStatus.ACTIVE for key in item.source if key.startswith("tr.")}
+
+
+@dataclass(frozen=True)
+class ChatReasoningRule:
+    payload: str
+    model_prefixes: tuple[str, ...]
+
+
+@dataclass(frozen=True)
+class ProviderProfile:
+    api: str = "chat"
+    chat_reasoning: str = "off"
+    chat_reasoning_rules: tuple[ChatReasoningRule, ...] = ()
+
+
+REASONING_LEVELS: tuple[str, ...] = ("minimal", "low", "medium", "high", "xhigh")
+REASONING_CHOICES: tuple[str, ...] = ("off", *REASONING_LEVELS)
+CHAT_REASONING_CHOICES: tuple[str, ...] = ("auto", "off", "reasoning", "reasoning_effort", "thinking", "enable_thinking")
+
+
+ALIYUN_CHAT_PROFILE = ProviderProfile(
+    chat_reasoning_rules=(
+        ChatReasoningRule("enable_thinking", ("qwen", "qwq", "qvq")),
+        ChatReasoningRule("thinking", ("deepseek-v4",)),
+    )
+)
+
+
+# Exact host matches only. Keep provider quirks here instead of scattering
+# vendor-specific branches through request construction. DashScope intentionally
+# defaults to Chat because Responses support differs by model family and region.
+PROVIDER_PROFILES: dict[str, ProviderProfile] = {
+    "api.openai.com": ProviderProfile(api="responses", chat_reasoning_rules=(ChatReasoningRule("reasoning_effort", ("o1", "o3", "o4", "gpt-5")),)),
+    "openrouter.ai": ProviderProfile(api="responses", chat_reasoning="reasoning"),
+    "opencode.ai": ProviderProfile(chat_reasoning_rules=(ChatReasoningRule("reasoning", ("deepseek-v4",)),)),
+    "api.deepseek.com": ProviderProfile(chat_reasoning="thinking"),
+    "dashscope.aliyuncs.com": ALIYUN_CHAT_PROFILE,
+    "dashscope-intl.aliyuncs.com": ALIYUN_CHAT_PROFILE,
+    "dashscope-us.aliyuncs.com": ALIYUN_CHAT_PROFILE,
+}
+
+
+CHAT_REASONING_EFFORT_VALUES: dict[str, dict[str, str | int]] = {
+    "thinking": {
+        "minimal": "high",
+        "low": "high",
+        "medium": "high",
+        "high": "high",
+        "xhigh": "max",
+        "max": "max",
+    },
+    "enable_thinking": {
+        "minimal": 256,
+        "low": 1024,
+        "medium": 4096,
+        "high": 8192,
+        "xhigh": 16384,
+        "max": 16384,
+    },
+}
 
 
 @dataclass
@@ -413,11 +494,12 @@ class ProviderConfig:
     url: str = ""
     key: str = ""
     model: str = ""
+    api: str = "auto"
+    prompt_cache_key: str = "auto"
     available_models: tuple[str, ...] = ()
     temperature: float | None = None
-    reasoning: bool | None = True
-    reasoning_effort: str = "medium"
-    reasoning_payload: str = ""
+    reasoning: str = "medium"
+    chat_reasoning: str = "auto"
     stream: bool | None = True
     timeout: int | None = 180
     first_token_timeout: int | None = 90
@@ -425,25 +507,66 @@ class ProviderConfig:
     @classmethod
     def from_dict(cls, data: Json) -> "ProviderConfig":
         defaults = cls()
+        api = Config.str(data, "api", defaults.api)
+        prompt_cache_key = cls.clean_prompt_cache_key(Config.str(data, "prompt_cache_key", defaults.prompt_cache_key))
+        reasoning = Config.str(data, "reasoning", defaults.reasoning)
+        chat_reasoning = Config.str(data, "chat_reasoning", defaults.chat_reasoning)
+        if api not in ("chat", "responses", "auto"):
+            raise ConfigError("config provider.api must be one of: chat, responses, auto")
+        if reasoning not in REASONING_CHOICES:
+            raise ConfigError("config provider.reasoning must be one of: " + ", ".join(REASONING_CHOICES))
+        if chat_reasoning not in CHAT_REASONING_CHOICES:
+            raise ConfigError("config provider.chat_reasoning must be one of: " + ", ".join(CHAT_REASONING_CHOICES))
         return cls(
             url=Config.str(data, "url", defaults.url),
             key=Config.str(data, "key", defaults.key),
             model=Config.str(data, "model", defaults.model),
+            api=api,
+            prompt_cache_key=prompt_cache_key,
             available_models=Config.str_tuple(data, "available_models"),
             temperature=Config.float(data, "temperature", defaults.temperature),
-            reasoning=Config.bool(data, "reasoning", defaults.reasoning),
-            reasoning_effort=Config.str(data, "reasoning_effort", defaults.reasoning_effort),
-            reasoning_payload=cls._reasoning_payload(data, defaults.reasoning_payload),
+            reasoning=reasoning,
+            chat_reasoning=chat_reasoning,
             stream=Config.bool(data, "stream", defaults.stream),
             timeout=Config.int(data, "timeout", defaults.timeout),
             first_token_timeout=Config.int(data, "first_token_timeout", defaults.first_token_timeout),
         )
 
-    @classmethod
-    def _reasoning_payload(cls, data: Json, default: str) -> str:
-        value = Config.str(data, "reasoning_payload", default)
-        if value not in ("", "reasoning", "reasoning_effort"):
-            raise ConfigError("config provider.reasoning_payload must be one of: reasoning, reasoning_effort, empty")
+    def resolved_chat_reasoning(self) -> str:
+        if self.chat_reasoning != "auto":
+            return self.chat_reasoning
+        profile = PROVIDER_PROFILES.get(self.host())
+        if not profile:
+            return "off"
+        model = self.model.lower()
+        for rule in profile.chat_reasoning_rules:
+            if any(model.startswith(prefix) for prefix in rule.model_prefixes):
+                return rule.payload
+        return profile.chat_reasoning
+
+    def host(self) -> str:
+        return (urlparse(self.url).hostname or "").lower()
+
+    def base_url(self) -> str:
+        url = self.url.rstrip("/")
+        return url[: -len("/chat/completions")] if url.endswith("/chat/completions") else url
+
+    def resolved_api(self) -> str:
+        if self.api != "auto":
+            return self.api
+        profile = PROVIDER_PROFILES.get(self.host())
+        return profile.api if profile else "chat"
+
+    @staticmethod
+    def clean_prompt_cache_key(value: str) -> str:
+        value = value.strip()
+        if not value:
+            return "auto"
+        lower = value.lower()
+        if lower in {"auto", "off"}:
+            return lower
+        if len(value) > 64 or any(char.isspace() for char in value):
+            raise ConfigError("config provider.prompt_cache_key must be auto, off, or a stable key up to 64 chars without whitespace")
         return value
 
 
@@ -453,12 +576,34 @@ class ModelUsage:
     prompt_tokens: int = 0
     completion_tokens: int = 0
     total_tokens: int = 0
+    cached_prompt_tokens: int = 0
 
-    def add(self, *, prompt_tokens: int, completion_tokens: int, total_tokens: int) -> None:
+    def add(self, *, prompt_tokens: int, completion_tokens: int, total_tokens: int, cached_prompt_tokens: int = 0) -> None:
         self.calls += 1
         self.prompt_tokens += prompt_tokens
         self.completion_tokens += completion_tokens
         self.total_tokens += total_tokens
+        self.cached_prompt_tokens += cached_prompt_tokens
+
+
+CONTEXT_BUDGET_CHOICES: tuple[str, ...] = ("low", "medium", "high")
+
+
+@dataclass(frozen=True)
+class ContextBudget:
+    raw_chars: int
+    kept_chars: int
+    kept_block_chars: int
+    index_items: int
+    observe_after_results: int
+    planless_discovery_tool_calls: int
+
+
+CONTEXT_BUDGETS: dict[str, ContextBudget] = {
+    "low": ContextBudget(36_000, 16_000, 4_000, 20, 6, 6),
+    "medium": ContextBudget(72_000, 32_000, 6_000, 30, 10, 8),
+    "high": ContextBudget(120_000, 64_000, 8_000, 60, 16, 12),
+}
 
 
 ############################
@@ -471,25 +616,21 @@ class RuntimeSettings:
     shell_timeout: int = 60
     compact_at: int = 50
     max_agent_steps: int = 100
-    plan_timeout: int = 360
-    plan_first_token_timeout: int = 180
-    auto_clean_recent: str = "3d"
+    auto_clean_recent: str = "1d"
+    context_budget: str = "medium"
     yolo: bool = False
-    plan_mode: bool = False
     debug: bool = False
 
     @classmethod
-    def from_dict(cls, data: Json, *, yolo: bool = False, plan_mode: bool = False, debug: bool = False) -> "RuntimeSettings":
+    def from_dict(cls, data: Json, *, yolo: bool = False, debug: bool = False) -> "RuntimeSettings":
         runtime = Config.table(data, "runtime")
         return cls(
             shell_timeout=Config.int(runtime, "shell_timeout", 60),
             compact_at=Config.int(runtime, "compact_at", 50),
             max_agent_steps=max(1, Config.int(runtime, "max_agent_steps", 100) or 0),
-            plan_timeout=max(1, Config.int(runtime, "plan_timeout", 360) or 0),
-            plan_first_token_timeout=max(1, Config.int(runtime, "plan_first_token_timeout", 180) or 0),
-            auto_clean_recent=cls.clean_retention(Config.str(runtime, "auto_clean_recent", "3d")),
+            auto_clean_recent=cls.clean_retention(Config.str(runtime, "auto_clean_recent", "1d")),
+            context_budget=cls.clean_context_budget(Config.str(runtime, "context_budget", "medium")),
             yolo=yolo or bool(Config.bool(runtime, "yolo", False)),
-            plan_mode=plan_mode or bool(Config.bool(runtime, "plan_mode", False)),
             debug=debug,
         )
 
@@ -510,6 +651,13 @@ def clean_retention_seconds(value: str) -> int:
         units = {"m": 60, "h": 3600, "d": 86400}
         return int(value[:-1]) * units[value[-1]]
 
+    @staticmethod
+    def clean_context_budget(value: str) -> str:
+        value = value.strip().lower()
+        if value not in CONTEXT_BUDGET_CHOICES:
+            raise ConfigError("runtime.context_budget must be one of: " + ", ".join(CONTEXT_BUDGET_CHOICES))
+        return value
+
 
 @dataclass
 class Config:
@@ -606,16 +754,23 @@ class ConfigFile:
 key = ""
 # Default model used by nanocode.
 model = ""
+# API backend: "auto" (default), "chat", or "responses".
+# "auto" uses nanocode's exact-host provider profile table.
+# api = "auto"
 # Optional: add available_models = ["model-a", "model-b"] manually to pin preferred
 # /model choices above automatically discovered provider models.
+# Prompt cache key: "auto", "off", or a custom stable key.
+prompt_cache_key = "auto"
 # Optional. Uncomment only for models/providers that support temperature.
 # temperature = 0.7
-reasoning = true
-reasoning_effort = "medium"
-# Optional reasoning payload shape. Leave unset for broad OpenAI-compatible
-# compatibility. Set only for providers that require it, for example OpenRouter:
-# reasoning_payload = "reasoning" sends {"reasoning":{"effort":...}}
-# reasoning_payload = "reasoning_effort" sends a top-level effort.
+reasoning = "medium"
+# Optional advanced override. Chat Completions reasoning shape is auto-detected
+# by provider/model profile where nanocode knows the provider. Responses API
+# always uses the standard reasoning.effort payload.
+# chat_reasoning = "reasoning" sends {"reasoning":{"effort":...}}
+# chat_reasoning = "reasoning_effort" sends a top-level effort.
+# chat_reasoning = "thinking" sends {"thinking":{"type":"enabled/disabled"}, "reasoning_effort":"high/max"}.
+# chat_reasoning = "enable_thinking" sends enable_thinking plus a budget mapped from effort.
 stream = true
 timeout = 180
 # Stream mode only: retry if no first content token arrives within this many seconds.
@@ -629,12 +784,10 @@ class ConfigFile:
 shell_timeout = 60
 compact_at = 50
 max_agent_steps = 100
-plan_timeout = 360
-plan_first_token_timeout = 180
-# Automatically delete tool-result logs older than this from inactive sessions. Use "off" to disable.
-auto_clean_recent = "3d"
+context_budget = "medium"
+# Automatically delete inactive session directories older than this. Use "off" to disable.
+auto_clean_recent = "1d"
 yolo = false
-plan_mode = false
 """
 
     @classmethod
@@ -676,147 +829,23 @@ class AgentMode(StrEnum):
     OBSERVE = "observe"
 
 
-@dataclass
-class AgentRuntime:
-    recent_edits: list[str] = field(default_factory=list)
-    consecutive_tool_turns: int = 0
-
-
 @dataclass
 class AgentRunResult:
     done: bool = False
     value: JsonValue = None
 
 
-class RangeFingerprintStore:
-    MAX_ENTRIES: ClassVar[int] = 200
-
-    @dataclass
-    class Entry:
-        fingerprint: str
-        filepath: str
-        start: int
-        end: int
-        content: str
-
-    @dataclass
-    class Resolved:
-        start: int
-        end: int
-        fingerprint: str
-        relocated_from: tuple[int, int] | None = None
-
-    def __init__(self):
-        self._entries: list[RangeFingerprintStore.Entry] = []
-
-    def remember(self, *, filepath: str, start: int, end: int, content: str) -> str:
-        fingerprint = _range_fingerprint(content)
-        entry = self.Entry(fingerprint=fingerprint, filepath=os.path.realpath(filepath), start=start, end=end, content=content)
-        if entry not in self._entries:
-            self._entries.append(entry)
-            del self._entries[: max(0, len(self._entries) - self.MAX_ENTRIES)]
-        return fingerprint
-
-    def clear(self) -> None:
-        self._entries = []
-
-    def __len__(self) -> int:
-        return len(self._entries)
-
-    def resolve(self, lines: list[str], *, filepath: str, start: int, end: int, fingerprint: str) -> Resolved:
-        resolved_start = min(start, len(lines))
-        resolved_end = len(lines) if end == 0 else min(end, len(lines))
-        resolved_end = max(resolved_end, resolved_start)
-        current = "".join(lines[resolved_start:resolved_end])
-        current_fingerprint = _range_fingerprint(current)
-        if current_fingerprint == fingerprint:
-            return self.Resolved(start=resolved_start, end=resolved_end, fingerprint=current_fingerprint)
-
-        for content in self._candidate_contents(
-            filepath=filepath,
-            start=resolved_start,
-            end=resolved_end,
-            fingerprint=fingerprint,
-        ):
-            if _range_fingerprint(content) == current_fingerprint:
-                return self.Resolved(start=resolved_start, end=resolved_end, fingerprint=current_fingerprint)
-
-        matches = self._find_matches(lines, filepath=filepath, start=resolved_start, end=resolved_end, fingerprint=fingerprint)
-        message = (
-            f"fingerprint mismatch for range {start}:{end}: expected {fingerprint}, current {current_fingerprint}; "
-            f"call Read(filepath, {start}, {end}) and reuse that range fingerprint"
-        )
-        other_ranges = self._ranges_for_fingerprint(filepath=filepath, fingerprint=fingerprint)
-        if other_ranges:
-            message += "; this fingerprint was cached for range(s): " + ", ".join(f"{range_start}:{range_end}" for range_start, range_end in other_ranges)
-        if not matches:
-            raise ToolCallError(message)
-        if len(matches) > 1:
-            raise ToolCallError(message + "; cached range matched multiple locations")
-        relocated_start, relocated_end = matches[0]
-        return self.Resolved(
-            start=relocated_start,
-            end=relocated_end,
-            fingerprint=_range_fingerprint("".join(lines[relocated_start:relocated_end])),
-            relocated_from=(resolved_start, resolved_end),
-        )
-
-    def _find_matches(self, lines: list[str], *, filepath: str, start: int, end: int, fingerprint: str) -> list[tuple[int, int]]:
-        contents = [content for content in self._candidate_contents(filepath=filepath, start=start, end=end, fingerprint=fingerprint) if content]
-
-        matches = []
-        for content in contents:
-            expected = content.splitlines(keepends=True)
-            if not expected:
-                continue
-            last_start = len(lines) - len(expected)
-            for position in range(max(0, last_start + 1)):
-                if lines[position : position + len(expected)] == expected:
-                    matches.append((position, position + len(expected)))
-                    if len(matches) > 1:
-                        return matches
-        return matches
-
-    def _candidate_contents(self, *, filepath: str, start: int, end: int, fingerprint: str) -> list[str]:
-        filepath = os.path.realpath(filepath)
-        contents: list[str] = []
-        for entry in self._entries:
-            if entry.fingerprint != fingerprint or entry.filepath != filepath:
-                continue
-            if start == end:
-                if entry.start == start and entry.end == end and entry.content == "":
-                    contents.append("")
-                continue
-            entry_lines = entry.content.splitlines(keepends=True)
-            cached_end = entry.start + len(entry_lines)
-            if start < entry.start or end > cached_end:
-                continue
-            candidate = "".join(entry_lines[start - entry.start : end - entry.start])
-            if candidate not in contents:
-                contents.append(candidate)
-        return contents
-
-    def _ranges_for_fingerprint(self, *, filepath: str, fingerprint: str) -> list[tuple[int, int]]:
-        filepath = os.path.realpath(filepath)
-        ranges = []
-        for entry in self._entries:
-            if entry.fingerprint != fingerprint or entry.filepath != filepath:
-                continue
-            item = (entry.start, entry.end)
-            if item not in ranges:
-                ranges.append(item)
-        return ranges
-
-
 @dataclass
 class RuntimeState:
     debug_prompt_count: int = 0
     last_prompt_tokens: int = 0
     last_completion_tokens: int = 0
     last_total_tokens: int = 0
+    last_cached_prompt_tokens: int = 0
     session_prompt_tokens: int = 0
     session_completion_tokens: int = 0
     session_total_tokens: int = 0
+    session_cached_prompt_tokens: int = 0
     model_usage: dict[str, ModelUsage] = field(default_factory=dict)
     current_model_call_started_at: float = 0.0
     current_model_call_label: str = ""
@@ -825,16 +854,21 @@ class RuntimeState:
     current_model_call_has_content: bool = False
     current_model_call_streaming_chars: int = 0
     last_model_call_rate: float = 0.0
+    manual_model_retry_requested: bool = False
     status_notice: str = ""
     status_notice_until: float = 0.0
+    pending_user_feedback: str = ""
     conversation: list[ConversationItem] = field(default_factory=list)
     user_rules: UserRules = field(default_factory=UserRules)
-    range_fingerprints: RangeFingerprintStore = field(default_factory=RangeFingerprintStore)
     tool_result_store: dict[str, ToolResultItem] = field(default_factory=dict)
     tool_result_counter: int = 0
     turn_tool_calls: int = 0
     session_tool_calls: int = 0
     turn_model_calls: int = 0
+    debug_log_count: int = 0
+    code_index_error: str = ""
+    code_index_refreshing: bool = False
+    code_index_reload_needed: bool = False
 
 
 @dataclass
@@ -847,15 +881,16 @@ class Session:
     config: Config = field(default_factory=Config)
     settings: RuntimeSettings = field(default_factory=RuntimeSettings)
     state: RuntimeState = field(default_factory=RuntimeState)
-    session_id: str = field(default_factory=lambda: Session._new_session_id())
+    session_id: str = field(default_factory=lambda: datetime.now().strftime("%Y%m%d-%H%M%S") + "-" + str(os.getpid()) + "-" + uuid.uuid4().hex[:8])
+    code_index_repository: Any | None = None
 
     @classmethod
-    def from_config_file(cls, *, path: str | None = None, yolo: bool = False, plan_mode: bool = False, debug: bool = False) -> "Session":
-        return cls.from_config_data(ConfigFile.load(path), yolo=yolo, plan_mode=plan_mode, debug=debug)
+    def from_config_file(cls, *, path: str | None = None, yolo: bool = False, debug: bool = False) -> "Session":
+        return cls.from_config_data(ConfigFile.load(path), yolo=yolo, debug=debug)
 
     @classmethod
-    def from_config_data(cls, data: Json, *, yolo: bool = False, plan_mode: bool = False, debug: bool = False) -> "Session":
-        session = cls(config=Config.from_dict(data), settings=RuntimeSettings.from_dict(data, yolo=yolo, plan_mode=plan_mode, debug=debug))
+    def from_config_data(cls, data: Json, *, yolo: bool = False, debug: bool = False) -> "Session":
+        session = cls(config=Config.from_dict(data), settings=RuntimeSettings.from_dict(data, yolo=yolo, debug=debug))
         session.load_user_rules()
         return session
 
@@ -884,19 +919,10 @@ def append_conversation(self, item: ConversationItem) -> None:
 
     def project_key(self) -> str:
         cwd = os.path.realpath(self.cwd)
-        basename = self._safe_path_name(os.path.basename(cwd.rstrip(os.sep)) or "root")
+        basename = re.sub(r"[^A-Za-z0-9_.-]+", "-", os.path.basename(cwd.rstrip(os.sep)) or "root").strip(".-") or "project"
         digest = hashlib.sha1(cwd.encode("utf-8")).hexdigest()[:10]
         return basename + "-" + digest
 
-    @staticmethod
-    def _safe_path_name(value: str) -> str:
-        value = re.sub(r"[^A-Za-z0-9_.-]+", "-", value).strip(".-")
-        return value or "project"
-
-    @staticmethod
-    def _new_session_id() -> str:
-        return datetime.now().strftime("%Y%m%d-%H%M%S") + "-" + str(os.getpid()) + "-" + uuid.uuid4().hex[:8]
-
     def project_dir(self) -> str:
         return self.data_path("projects", self.project_key())
 
@@ -929,11 +955,208 @@ def missing_required_config(self) -> list[str]:
         return [key for key, value in (("provider.url", provider.url), ("provider.key", provider.key), ("provider.model", provider.model)) if not value]
 
 
+class DebugTrace:
+    STRING_LIMIT: ClassVar[int] = 20_000
+
+    @classmethod
+    def value(cls, value: Any) -> JsonValue:
+        if isinstance(value, dict):
+            return {str(key): cls.value(item) for key, item in value.items()}
+        if isinstance(value, list | tuple):
+            return [cls.value(item) for item in value]
+        if isinstance(value, str):
+            return value if len(value) <= cls.STRING_LIMIT else value[: cls.STRING_LIMIT] + "...<truncated>"
+        if value is None or isinstance(value, str | int | float | bool):
+            return value
+        return str(value)
+
+    @classmethod
+    def write(cls, session: Session, *, activity: str, label: str, payload: JsonValue) -> str:
+        if not session.settings.debug:
+            return ""
+        session.state.debug_log_count += 1
+        directory = session.debug_dir()
+        os.makedirs(directory, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S-%f")
+        safe_activity = re.sub(r"[^A-Za-z0-9_.-]+", "-", activity or "debug")
+        safe_label = re.sub(r"[^A-Za-z0-9_.-]+", "-", label or "event")
+        filepath = os.path.join(directory, f"{timestamp}-{session.state.debug_log_count:04d}-{safe_activity}-{safe_label}.json")
+        with open(filepath, "w", encoding="utf-8") as f:
+            json.dump(cls.value(payload), f, ensure_ascii=False, indent=2)
+            f.write("\n")
+        return filepath
+
+    @staticmethod
+    def response_summary(response: Json) -> Json:
+        actions = [_json_dict(action) for action in _json_list(response.get("actions"))]
+        return {
+            "actions_len": len(actions),
+            "action_types": [_json_str(action.get("type")) or "(missing)" for action in actions],
+            "tool_names": [_json_str(action.get("name")) or "" for action in actions if _json_str(action.get("type")) == "tool"],
+            "assistant_text_len": len(_json_str(response.get("_assistant_text")) or ""),
+            "format_error": _json_str(response.get("_format_error")) or "",
+        }
+
+    @staticmethod
+    def tool_names(tool_schemas: list[Json] | None) -> list[str]:
+        names = []
+        for schema in tool_schemas or []:
+            function = _json_dict(schema.get("function")) or schema
+            names.append(_json_str(function.get("name")) or "(unknown)")
+        return names
+
+    @classmethod
+    def model_request(
+        cls,
+        session: Session,
+        *,
+        activity: str,
+        api: str,
+        model: str,
+        stream: bool,
+        params: Json,
+        tool_schemas: list[Json] | None,
+    ) -> None:
+        cls.write(
+            session,
+            activity=activity,
+            label="model-request",
+            payload={
+                "api": api,
+                "model": model,
+                "stream": stream,
+                "tool_names": cls.tool_names(tool_schemas),
+                "param_keys": sorted(params),
+                "params": {key: value for key, value in params.items() if key not in {"messages", "instructions", "input", "tools"}},
+            },
+        )
+
+    @classmethod
+    def prompt(cls, session: Session, *, activity: str, messages: list[Json]) -> str:
+        if not session.settings.debug:
+            return ""
+        session.state.debug_prompt_count += 1
+        directory = session.debug_dir()
+        os.makedirs(directory, exist_ok=True)
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S-%f")
+        filepath = os.path.join(directory, f"{timestamp}-{session.state.debug_prompt_count:04d}-{activity or 'request'}.txt")
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(cls.format_prompt(messages))
+        return filepath
+
+    @staticmethod
+    def format_prompt(messages: list[Json]) -> str:
+        lines = []
+        for index, message in enumerate(messages, start=1):
+            role = _json_str(message.get("role")) or "(unknown)"
+            content = message.get("content")
+            lines.append(f"--- {role} message {index} ---")
+            lines.append(content if isinstance(content, str) else json.dumps(content, ensure_ascii=False, indent=2))
+            lines.append("")
+        return "\n".join(lines).rstrip() + "\n"
+
+    @classmethod
+    def model_response(cls, session: Session, *, activity: str, api: str, stream: bool, raw: JsonValue, parsed: Json) -> None:
+        cls.write(
+            session,
+            activity=activity,
+            label="model-response",
+            payload={"api": api, "stream": stream, "parsed": cls.response_summary(parsed), "raw": raw},
+        )
+
+    @classmethod
+    def stream_action(cls, session: Session, *, activity: str, action: Json) -> None:
+        cls.write(
+            session,
+            activity=activity,
+            label="stream-action",
+            payload={"summary": cls.response_summary({"actions": [action]}), "action": action},
+        )
+
+    @classmethod
+    def loop_event(
+        cls,
+        agent: Any,
+        label: str,
+        *,
+        index: int,
+        response: Json,
+        result: Any | None = None,
+        committed: bool | None = None,
+    ) -> None:
+        payload: Json = cls._agent_payload(agent)
+        payload.update({"step": index, "response": cls.response_summary(response)})
+        if result is not None:
+            payload["result"] = {"done": result.done, "value_type": type(result.value).__name__}
+        if committed is not None:
+            payload["committed"] = committed
+        cls.write(agent.session, activity="agent", label=label, payload=payload)
+
+    @classmethod
+    def handle_event(
+        cls,
+        agent: Any,
+        label: str,
+        ctx: Any,
+        response: Json,
+        *,
+        result: Any | None = None,
+        extra: Json | None = None,
+    ) -> None:
+        payload = cls._agent_payload(agent)
+        payload.update(
+            {
+                "goal_reached": agent.blackboard.goal_reached,
+                "ctx": {
+                    "actions": len(ctx.actions),
+                    "tool_calls": len(ctx.tool_calls),
+                    "assistant_text_len": len(ctx.assistant_text),
+                    "completion_message": bool(ctx.completion_message),
+                    "has_goal_action": ctx.has_goal_action,
+                    "has_plan_action": ctx.has_plan_action,
+                    "has_state_update_action": ctx.has_state_update_action,
+                    "state_or_work_requested": ctx.state_or_work_requested,
+                },
+                "response": cls.response_summary(response),
+            }
+        )
+        if result is not None:
+            payload["result"] = {"done": result.done, "value_type": type(result.value).__name__}
+        if extra:
+            payload.update(extra)
+        cls.write(agent.session, activity="agent", label=label, payload=payload)
+
+    @staticmethod
+    def _agent_payload(agent: Any) -> Json:
+        return {
+            "mode": agent.mode,
+            "goal": agent.blackboard.goal,
+            "plan_items": len(agent.blackboard.plan),
+            "feedback_tail": agent.agent_feedback_errors[-3:],
+        }
+
+
 ############################
 # Tools
 ############################
 
 
+def _tool_object_schema(properties: Json, required: list[str]) -> Json:
+    return {"type": "object", "properties": properties, "required": required, "additionalProperties": False}
+
+
+def _function_tool_schema(name: str, description: str, parameters: Json) -> Json:
+    return {"type": "function", "function": {"name": name, "description": description, "parameters": parameters}}
+
+
+def _json_value_schema(depth: int = 3) -> Json:
+    values: list[Json] = [{"type": "string"}, {"type": "number"}, {"type": "boolean"}, {"type": "null"}]
+    if depth > 0:
+        child = _json_value_schema(depth - 1)
+        values.extend([{"type": "array", "items": child}, {"type": "object", "additionalProperties": child}])
+    return {"anyOf": values}
+
+
 class ToolEffect(StrEnum):
     READONLY = "readonly"
     EDIT = "edit"
@@ -941,19 +1164,19 @@ class ToolEffect(StrEnum):
 
 
 MAX_TOOL_OUTPUT_CHARS = 12_000
+TOOL_JSON_VALUE_SCHEMA: Json = _json_value_schema()
 
 
 class Tool:
-    NAME: ClassVar[str] = ""
+    NAME: ClassVar[str]
     DESCRIPTION: ClassVar[tuple[str, ...]] = ()
-    SIGNATURE: ClassVar[str]
+    SIGNATURE: ClassVar[str] = ""
+    SIGNATURES: ClassVar[tuple[str, ...]] = ()
     EXAMPLE: ClassVar[tuple[str, ...]] = ()
+    PARAM_NAMES: ClassVar[tuple[str, ...]] = ()
     EFFECT: ClassVar[ToolEffect] = ToolEffect.OTHER
     REQUIRES_CONFIRMATION: ClassVar[bool | None] = None
-
-    @classmethod
-    def name(cls) -> str:
-        return cls.NAME or cls.__name__.removesuffix("Tool")
+    OUTPUT_CHARS: ClassVar[int] = MAX_TOOL_OUTPUT_CHARS
 
     @classmethod
     def cli_args(cls, args: list[JsonValue]) -> list[str]:
@@ -968,7 +1191,7 @@ def cli_content_summary(value: str) -> str:
 
     @staticmethod
     def cli_token(value: JsonValue) -> str:
-        text = str(value)
+        text = json.dumps(value, ensure_ascii=False, separators=(",", ":")) if isinstance(value, (dict, list)) else str(value)
         if "\n" in text:
             return Tool.cli_content_summary(text)
         text = _shorten(text, 100)
@@ -979,14 +1202,29 @@ def cli_token(value: JsonValue) -> str:
         return json.dumps(text, ensure_ascii=False)
 
     @classmethod
-    def effect(cls) -> ToolEffect:
-        return cls.EFFECT
+    def signatures(cls) -> tuple[str, ...]:
+        return cls.SIGNATURES or ((cls.SIGNATURE,) if cls.SIGNATURE else ())
 
-    def requires_confirmation(self, session: Session) -> bool:
-        return self.REQUIRES_CONFIRMATION if self.REQUIRES_CONFIRMATION is not None else self.effect() == ToolEffect.EDIT
+    @classmethod
+    def schema_description(cls) -> str:
+        return " ".join((*cls.DESCRIPTION, *cls.signatures(), *cls.EXAMPLE))
+
+    @classmethod
+    def tool_schema(cls) -> Json:
+        return _function_tool_schema(
+            cls.NAME,
+            cls.schema_description(),
+            _tool_object_schema(
+                {
+                    "intention": {"type": "string", "description": "Question being answered or concrete outcome needed."},
+                    "args": {"type": "array", "items": TOOL_JSON_VALUE_SCHEMA, "description": "Arguments exactly matching the tool signature."},
+                },
+                ["intention", "args"],
+            ),
+        )
 
-    def call_live(self, sink: Callable[[str], None] | None = None) -> str:
-        return self.call()
+    def requires_confirmation(self, session: Session) -> bool:
+        return self.REQUIRES_CONFIRMATION if self.REQUIRES_CONFIRMATION is not None else self.EFFECT == ToolEffect.EDIT
 
 
 ToolClass: TypeAlias = Type[Tool]
@@ -1011,13 +1249,7 @@ class ToolCallExecution:
     error_type: Type[Exception] | None = None
     result_key: str = ""
     result_excerpted: bool = False
-    requires_verification: bool = False
-
-
-@dataclass
-class PreparedToolCall:
-    call: ParsedToolCall
-    tool: Tool
+    requires_checks: bool = False
 
 
 @dataclass
@@ -1043,6 +1275,7 @@ def _bound_tool_output(output: str, *, log_path: str = "", max_chars: int = MAX_
     header = (
         "[tool result excerpt]\n"
         "excerpted: true\n"
+        "note: only an excerpt is visible; use Recall with a line range or Read smaller targeted ranges instead of repeating the same large read.\n"
         "original_lines: " + str(original_lines) + "\noriginal_chars: " + str(original_chars) + "\n"
     )
     labels = ("\n--- head ---\n", "\n--- middle ---\n", "\n--- tail ---\n")
@@ -1059,6 +1292,7 @@ def _bound_tool_output(output: str, *, log_path: str = "", max_chars: int = MAX_
 
 
 RESULT_KEY_PATTERN: re.Pattern[str] = re.compile(r"\b(?:(?:result_)?key|recall)[:=]\s*(tr\.\d+)\b")
+TOOL_RESULT_KEY_REF_PATTERN: re.Pattern[str] = re.compile(r"\btr\.\d+\b")
 
 
 def _format_tool_call_summary(call: ParsedToolCall) -> str:
@@ -1071,6 +1305,7 @@ def _tool_call_args_key(args: list[JsonValue]) -> tuple[str, ...]:
 
 @dataclass
 class ToolResultContext:
+    COMPACT_OUTPUT_SUMMARY_CHARS: ClassVar[int] = 120
     latest: list[str] = field(default_factory=list)
     recent: list[str] = field(default_factory=list)
     kept_results: list[str] = field(default_factory=list)
@@ -1081,33 +1316,24 @@ def forget_results(self, keys: list[str]) -> list[str]:
             return []
         removed = []
 
-        def remove_blocks(blocks: list[str]) -> list[str]:
-            kept = []
-            for block in blocks:
-                key = self.result_key(block)
-                if key in wanted:
-                    removed.append(key)
-                else:
-                    kept.append(block)
-            return kept
-
-        def compact_blocks(blocks: list[str]) -> list[str]:
-            compacted = []
+        def update(blocks: list[str], *, compact: bool) -> list[str]:
+            updated = []
             for block in blocks:
                 key = self.result_key(block)
                 if key in wanted:
                     removed.append(key)
-                    compacted.append(self.compact_block(block))
+                    if compact:
+                        updated.append(self.compact_block(block))
                 else:
-                    compacted.append(block)
-            return compacted
+                    updated.append(block)
+            return updated
 
-        self.kept_results = remove_blocks(self.kept_results)
-        self.latest = compact_blocks(self.latest)
-        self.recent = compact_blocks(self.recent)
+        self.kept_results = update(self.kept_results, compact=False)
+        self.latest = update(self.latest, compact=True)
+        self.recent = update(self.recent, compact=True)
         return list(dict.fromkeys(removed))
 
-    def keep_results(self, actions: list[Json], observed_blocks: list[str], *, max_chars: int) -> list[str]:
+    def keep_results(self, actions: list[Json], observed_blocks: list[str], *, max_chars: int, max_block_chars: int) -> list[str]:
         wanted = []
         for action in actions:
             if _json_str(action.get("type")) == "keep":
@@ -1116,34 +1342,31 @@ def keep_results(self, actions: list[Json], observed_blocks: list[str], *, max_c
         if not wanted:
             return []
         by_key = self.blocks_by_key(observed_blocks)
-        selected = {key: by_key[key] for key in wanted if key in by_key}
+        selected = {key: self.bound_block(by_key[key], max_chars=max_block_chars) for key in wanted if key in by_key}
         if not selected:
             return []
         existing = self.blocks_by_key(self.kept_results)
         self.kept_results = [block for key, block in existing.items() if key not in selected] + [selected[key] for key in wanted if key in selected]
-        while self.kept_results and len("\n\n".join(self.kept_results)) > max_chars:
-            del self.kept_results[0]
+        self.bound_kept(max_chars=max_chars, max_block_chars=max_block_chars)
         retained = self.blocks_by_key(self.kept_results)
         return [key for key in wanted if key in selected and key in retained]
 
-    def append_latest(self, executions: list[ToolCallExecution], *, max_index_items: int, checkpoint: int) -> None:
-        if not executions:
-            return
-        self.append_recent(self.latest, max_index_items=max_index_items, checkpoint=checkpoint)
-        self.latest = [self.format_execution(execution) for execution in executions]
-        self.prune_recent(max_index_items=max_index_items, checkpoint=checkpoint)
+    def bound_kept(self, *, max_chars: int, max_block_chars: int) -> None:
+        self.kept_results = [self.bound_block(block, max_chars=max_block_chars) for block in self.kept_results]
+        while self.kept_results and len("\n\n".join(self.kept_results)) > max_chars:
+            del self.kept_results[0]
 
-    def append_recent(self, blocks: list[str], *, max_index_items: int, checkpoint: int) -> None:
-        if not blocks:
+    def append_latest(self, executions: list[ToolCallExecution], *, max_index_items: int, checkpoint: int, append: bool = False) -> None:
+        if not executions:
             return
-        self.recent.extend(blocks)
+        if self.latest and not append:
+            self.recent.extend(self.latest)
+        blocks = [self.format_execution(execution) for execution in executions]
+        self.latest = [*self.latest, *blocks] if append else blocks
         self.prune_recent(max_index_items=max_index_items, checkpoint=checkpoint)
 
     def prune_recent(self, *, max_index_items: int, checkpoint: int) -> None:
-        def compact_if_observed(block: str) -> str:
-            return block if self._needs_reduction(block, checkpoint) else self.compact_block(block)
-
-        self.recent = [compact_if_observed(block) for block in self.recent]
+        self.recent = [block if self._needs_reduction(block, checkpoint) else self.compact_block(block) for block in self.recent]
         while len(self.current_timeline_blocks()) > max_index_items:
             index = next((i for i, block in enumerate(self.recent) if not self._needs_reduction(block, checkpoint)), -1)
             if index < 0:
@@ -1175,32 +1398,33 @@ def current_timeline_blocks(self) -> list[str]:
             blocks.append(self.compact_block(block))
         return blocks
 
-    def latest_raw_blocks(self) -> list[str]:
-        return [block for block in self.latest if self.is_full_block(block)]
+    def latest_raw_blocks(self, *, exclude_keys: set[str] | None = None) -> list[str]:
+        excluded = exclude_keys or set()
+        return [block for block in self.latest if self.is_full_block(block) and self.result_key(block) not in excluded]
 
-    def unreduced_recent_blocks(self, checkpoint: int) -> list[str]:
+    def unreduced_recent_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> list[str]:
+        excluded = exclude_keys or set()
         latest_keys = set(self.blocks_by_key(self.latest))
         return [
             block
             for block in self.recent
-            if self.result_key(block) not in latest_keys and self._needs_reduction(block, checkpoint)
+            for key in [self.result_key(block)]
+            if key not in latest_keys and key not in excluded and self._needs_reduction(block, checkpoint)
         ]
 
-    def unreduced_blocks(self, checkpoint: int) -> list[str]:
+    def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> list[str]:
+        excluded = exclude_keys or set()
         seen: set[str] = set()
         blocks = []
         for block in self.recent + self.latest:
             key = self.result_key(block)
-            if key and key not in seen and self._needs_reduction(block, checkpoint):
+            if key and key not in seen and key not in excluded and self._needs_reduction(block, checkpoint):
                 blocks.append(block)
                 seen.add(key)
         return blocks
 
-    def raw_context_chars(self, checkpoint: int) -> int:
-        return len("\n\n".join(self.unreduced_recent_blocks(checkpoint) + self.latest_raw_blocks()))
-
-    def visible_counter(self) -> int:
-        return self.max_counter(self.recent + self.latest)
+    def raw_context_chars(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> int:
+        return len("\n\n".join(self.unreduced_recent_blocks(checkpoint, exclude_keys=exclude_keys) + self.latest_raw_blocks(exclude_keys=exclude_keys)))
 
     @classmethod
     def _needs_reduction(cls, block: str, checkpoint: int) -> bool:
@@ -1238,9 +1462,22 @@ def compact_block(cls, block: str) -> str:
         if match:
             parts.append("recall=" + match.group(1))
         elif output:
-            parts.append(_shorten(" ".join(output.split()), 220))
+            parts.append(_shorten(" ".join(output.split()), cls.COMPACT_OUTPUT_SUMMARY_CHARS))
         return header + "\n  out: " + ("; ".join(parts) if parts else "ok")
 
+    @classmethod
+    def bound_block(cls, block: str, *, max_chars: int) -> str:
+        if len(block) <= max_chars:
+            return block
+        if not cls.is_full_block(block):
+            return _shorten(block, max_chars)
+        header, output = block.split("\n  output:\n", 1)
+        separator = "\n  output:\n"
+        output_budget = max_chars - len(header) - len(separator)
+        if output_budget <= 0:
+            return _shorten(cls.compact_block(block), max_chars)
+        return header + separator + _bound_tool_output(output, max_chars=output_budget).value
+
     @classmethod
     def result_key(cls, block: str) -> str:
         match = RESULT_KEY_PATTERN.search(block)
@@ -1267,9 +1504,9 @@ def forget_result_keys_from_actions(actions: list[Json]) -> list[str]:
 ConfirmationResult: TypeAlias = bool | str
 ConfirmCallback: TypeAlias = Callable[[ParsedToolCall, Tool], ConfirmationResult]
 ToolDisplayCallback: TypeAlias = Callable[[ParsedToolCall, Tool], None]
-ToolLiveOutputCallback: TypeAlias = Callable[[ParsedToolCall, str], None]
-ToolLiveDoneCallback: TypeAlias = Callable[[ParsedToolCall], None]
+ToolOutputCallback: TypeAlias = Callable[[str, str], None]
 MessageCallback: TypeAlias = Callable[[str], None]
+UserInputPoller: TypeAlias = Callable[[], str | None]
 StatusAction: TypeAlias = Callable[[], str]
 StatusRunner: TypeAlias = Callable[[StatusAction], str]
 
@@ -1310,6 +1547,10 @@ def release(self) -> None:
         fcntl.flock(self.file.fileno(), fcntl.LOCK_UN)
         self.file.close()
         self.file = None
+        try:
+            os.remove(self.path)
+        except OSError:
+            pass
 
     def __enter__(self) -> Self:
         self.acquire()
@@ -1334,45 +1575,25 @@ def is_locked(path: str) -> bool:
         return False
 
 
-@dataclass
-class CleanResult:
-    cleaned: int = 0
-    failed: int = 0
-    skipped: int = 0
-
-
-class SessionLogCleaner:
-    def __init__(self, session: Session):
-        self.session = session
-
-    def clean(self, *, older_than_seconds: int = 0) -> CleanResult:
-        result = CleanResult()
-        sessions_dir = self.session.data_path("sessions")
-        if not os.path.isdir(sessions_dir):
-            return result
-        cutoff = time.time() - older_than_seconds if older_than_seconds > 0 else 0.0
-        for session_name in os.listdir(sessions_dir):
-            session_dir = os.path.join(sessions_dir, session_name)
-            if not os.path.isdir(session_dir):
-                continue
-            if SessionLock.is_locked(os.path.join(session_dir, "session.lock")):
-                result.skipped += 1
-                continue
-            tool_results_dir = os.path.join(session_dir, "tool_results")
-            if not os.path.isdir(tool_results_dir):
-                continue
-            for name in os.listdir(tool_results_dir):
-                path = os.path.join(tool_results_dir, name)
-                if not name.endswith(".log") or not os.path.isfile(path):
-                    continue
-                if cutoff and os.path.getmtime(path) >= cutoff:
-                    continue
-                try:
-                    os.remove(path)
-                    result.cleaned += 1
-                except OSError:
-                    result.failed += 1
-        return result
+def clean_sessions(session: Session, *, older_than_seconds: int = 0) -> None:
+    sessions_dir = session.data_path("sessions")
+    if not os.path.isdir(sessions_dir):
+        return
+    cutoff = time.time() - older_than_seconds if older_than_seconds > 0 else 0.0
+    for session_name in sorted(os.listdir(sessions_dir)):
+        session_dir = os.path.join(sessions_dir, session_name)
+        if not os.path.isdir(session_dir):
+            continue
+        if cutoff and os.path.getmtime(session_dir) >= cutoff:
+            continue
+        if session_name == session.session_id:
+            continue
+        if SessionLock.is_locked(os.path.join(session_dir, "session.lock")):
+            continue
+        try:
+            shutil.rmtree(session_dir)
+        except OSError:
+            pass
 
 
 ############################
@@ -1394,7 +1615,7 @@ def _parse_line_range(start_arg: str, end_arg: str) -> tuple[int, int]:
     return start, end
 
 
-def _range_fingerprint(content: str) -> str:
+def _line_hash(content: str) -> str:
     return hashlib.blake2s(content.encode("utf-8"), digest_size=3).hexdigest()
 
 
@@ -1403,16 +1624,35 @@ def _range_fingerprint(content: str) -> str:
 ############################
 
 
+def _parse_line_range_token(value: str) -> tuple[int, int]:
+    match = re.fullmatch(r"\s*(\d+)\s*[-:,]\s*(\d+)\s*", value)
+    if match is None:
+        raise ToolCallArgError("invalid range: use a comma token like 0,120")
+    return _parse_line_range(match.group(1), match.group(2))
+
+
+def _looks_like_read_range_error(value: JsonValue) -> bool:
+    text = str(value).strip()
+    return bool(re.fullmatch(r"\d+(?:\s*[-:,]\s*)?", text) or re.search(r"[:,]", text))
+
+
 @dataclass
 class ReadTool(Tool):
+    NAME: ClassVar[str] = "Read"
     MAX_LINES: ClassVar[int] = 600
     EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Read a single known UTF-8 file; pass multiple 0-based start,end ranges for it.",
-        "Each range returns at most 600 lines.",
+        "Read one or more UTF-8 files with line:hash anchors.",
+        "Multiple files: pass filepaths only; each file returns first 600 lines.",
+        "Ranges: pass one filepath then 0-based start,end tokens; each range returns at most 600 lines.",
+    )
+    SIGNATURES: ClassVar[tuple[str, ...]] = (
+        "Read(filepath) -> first 600 lines with line:hash anchors",
+        "Read(filepath, filepath...) -> first 600 lines from each file",
+        "Read(filepath, range[, range...]) -> selected ranges from one file",
     )
-    SIGNATURE: ClassVar[str] = "Read(filepath[, range_token...]) -> ReadToolResult<fingerprint, content>"
     EXAMPLE: ClassVar[tuple[str, ...]] = (
+        'Example args: ["pyproject.toml", "uv.lock"]',
         'Example args: ["code.py", "0,80", "160,220"]',
         'Example args: ["code.py"]',
     )
@@ -1423,72 +1663,42 @@ class ReadTool(Tool):
     ranges: list[tuple[int, int]] = field(default_factory=list)
     filepaths: list[str] = field(default_factory=list)
     cwd: str = ""
-    range_fingerprints: RangeFingerprintStore = field(default_factory=RangeFingerprintStore)
 
     @classmethod
     def cli_args(cls, args: list[JsonValue]) -> list[str]:
         if not args:
             return []
         tokens = [cls.cli_token(args[0])]
-        if len(args) == 3 and args[1].isdigit() and args[2].isdigit():
-            return tokens + [args[1] + ":" + args[2]]
         return tokens + [str(arg) for arg in args[1:]]
 
-    @staticmethod
-    def _parse_line_range_token(value: str) -> tuple[int, int]:
-        match = re.fullmatch(r"\s*(\d+)\s*[-:,]\s*(\d+)\s*", value)
-        if match is None:
-            raise ToolCallArgError("invalid range: use a comma token like 0,120")
-        return _parse_line_range(match.group(1), match.group(2))
-
     @classmethod
-    def make(cls, session: Session, args: list[str]) -> Self:
+    def make(cls, session: Session, args: list[JsonValue]) -> Self:
         if len(args) == 0:
             raise ToolCallArgError(
                 'Read args error: got 0 args; expected ["filepath"] or ["filepath", "start,end"]. Example: Read("nanocode.py", "2065,2095"). Do not call Read().'
             )
-        filepath = session.resolve_path(args[0])
+        filepath = session.resolve_path(str(args[0]))
         if len(args) == 1:
             ranges = [(0, 0)]
-        elif all(re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg) for arg in args[1:]):
-            ranges = [cls._parse_line_range_token(arg) for arg in args[1:]]
-        elif len(args) == 3 and cls._is_integer_token(args[1]) and cls._is_integer_token(args[2]):
-            ranges = [_parse_line_range(args[1], args[2])]
-        elif cls._all_args_are_existing_files(session, args):
-            filepaths = [session.resolve_path(arg) for arg in args]
-            return cls(
-                filepath=filepaths[0],
-                start=0,
-                end=0,
-                ranges=[(0, 0)],
-                filepaths=filepaths,
-                cwd=session.cwd,
-                range_fingerprints=session.state.range_fingerprints,
-            )
-        elif len(args) == 3:
-            ranges = [_parse_line_range(args[1], args[2])]
+        elif all(re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", str(arg)) for arg in args[1:]):
+            ranges = [_parse_line_range_token(str(arg)) for arg in args[1:]]
+        elif not any(_looks_like_read_range_error(arg) for arg in args[1:]):
+            filepaths = [session.resolve_path(str(arg)) for arg in args]
+            return cls(filepath=filepaths[0], start=0, end=0, ranges=[(0, 0)], filepaths=filepaths, cwd=session.cwd)
         elif len(args) == 2:
-            raise ToolCallArgError('Read args error: invalid range token; expected ["filepath", "start,end"]. Example: Read("nanocode.py", "2065,2095").')
+            raise ToolCallArgError(
+                'Read args error: invalid range token; expected ["filepath", "start,end"] or ["file1", "file2"]. Example: Read("nanocode.py", "2065,2095").'
+            )
         else:
             raise ToolCallArgError('Read args error: for multiple ranges use comma tokens. Example: Read("nanocode.py", "0,40", "200,260").')
         start, end = ranges[0]
-        return cls(filepath=filepath, start=start, end=end, ranges=ranges, cwd=session.cwd, range_fingerprints=session.state.range_fingerprints)
-
-    @staticmethod
-    def _all_args_are_existing_files(session: Session, args: list[str]) -> bool:
-        if len(args) < 2:
-            return False
-        return all(os.path.isfile(session.resolve_path(arg)) for arg in args)
-
-    @staticmethod
-    def _is_integer_token(value: str) -> bool:
-        return re.fullmatch(r"\s*-?\d+\s*", str(value)) is not None
+        return cls(filepath=filepath, start=start, end=end, ranges=ranges, filepaths=[filepath], cwd=session.cwd)
 
     def requires_confirmation(self, session: Session) -> bool:
-        return any(not session.is_path_in_cwd(filepath) for filepath in self._target_filepaths())
+        return any(not session.is_path_in_cwd(filepath) for filepath in (self.filepaths or [self.filepath]))
 
     def preview(self) -> str:
-        if self.filepaths:
+        if len(self.filepaths) > 1:
             return "Read(" + ", ".join(self.filepaths) + ")"
         if len(self.ranges) > 1:
             ranges = ", ".join(str(start) + ":" + str(end) for start, end in self.ranges)
@@ -1496,37 +1706,41 @@ def preview(self) -> str:
         return f"Read({self.filepath}, {self.start}, {self.end})"
 
     def call(self) -> str:
-        if self.filepaths:
-            lines = ["<ReadToolResult>", "  <file_count>" + str(len(self.filepaths)) + "</file_count>"]
+        if len(self.filepaths) > 1:
+            lines = [
+                "<ReadToolResult>",
+                '  <note>Content lines are "line:hash|code"; the "line:hash" part is the line anchor.</note>',
+                "  <file_count>" + str(len(self.filepaths)) + "</file_count>",
+            ]
             for filepath in self.filepaths:
-                content, returned_end, fingerprint_end, fingerprint, truncated, total_lines = self._read_range(0, 0, filepath=filepath)
-                lines.append("  <ReadFile>")
-                lines.append("    <path>" + filepath + "</path>")
-                lines.extend(self._format_range_result(0, returned_end, fingerprint_end, fingerprint, truncated, total_lines, content, indent="    "))
+                content, returned_end, range_end, truncated, total_lines = self._read_range(0, 0, filepath=filepath)
+                lines.extend(["  <ReadFile>", "    <path>" + os.path.relpath(filepath, self.cwd) + "</path>"])
+                lines.extend(self._format_range_result(0, returned_end, range_end, truncated, total_lines, content, indent="    "))
                 lines.append("  </ReadFile>")
             lines.append("</ReadToolResult>")
             return "\n".join(lines)
 
         if len(self.ranges) > 1:
-            lines = ["<ReadToolResult>", "  <range_count>" + str(len(self.ranges)) + "</range_count>"]
+            lines = [
+                "<ReadToolResult>",
+                '  <note>Content lines are "line:hash|code"; the "line:hash" part is the line anchor.</note>',
+                "  <range_count>" + str(len(self.ranges)) + "</range_count>",
+            ]
             for start, end in self.ranges:
-                content, returned_end, fingerprint_end, fingerprint, truncated, total_lines = self._read_range(start, end)
+                content, returned_end, range_end, truncated, total_lines = self._read_range(start, end)
                 lines.append("  <ReadRange>")
-                lines.extend(self._format_range_result(start, returned_end, fingerprint_end, fingerprint, truncated, total_lines, content, indent="    "))
+                lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent="    "))
                 lines.append("  </ReadRange>")
             lines.append("</ReadToolResult>")
             return "\n".join(lines)
 
-        content, returned_end, fingerprint_end, fingerprint, truncated, total_lines = self._read_range(self.start, self.end)
-        lines = ["<ReadToolResult>"]
-        lines.extend(self._format_range_result(self.start, returned_end, fingerprint_end, fingerprint, truncated, total_lines, content, indent="  "))
+        content, returned_end, range_end, truncated, total_lines = self._read_range(self.start, self.end)
+        lines = ["<ReadToolResult>", '  <note>Content lines are "line:hash|code"; the "line:hash" part is the line anchor.</note>']
+        lines.extend(self._format_range_result(self.start, returned_end, range_end, truncated, total_lines, content, indent="  "))
         lines.append("</ReadToolResult>")
         return "\n".join(lines)
 
-    def _target_filepaths(self) -> list[str]:
-        return self.filepaths or [self.filepath]
-
-    def _read_range(self, start: int, end: int, *, filepath: str | None = None) -> tuple[str, int, int, str, bool, int]:
+    def _read_range(self, start: int, end: int, *, filepath: str | None = None) -> tuple[str, int, int, bool, int]:
         target_filepath = filepath or self.filepath
         total_lines = 0
         selected_lines = []
@@ -1549,52 +1763,44 @@ def _read_range(self, start: int, end: int, *, filepath: str | None = None) -> t
                     truncated = True
         content = "".join(selected_lines)
         returned_end = start + len(selected_lines)
-        fingerprint_end = returned_end if truncated else end
-        fingerprint = self.range_fingerprints.remember(
-            filepath=target_filepath,
-            start=start,
-            end=fingerprint_end,
-            content=content,
-        )
-        return content, returned_end, fingerprint_end, fingerprint, truncated, total_lines
+        range_end = returned_end if truncated else end
+        return content, returned_end, range_end, truncated, total_lines
 
     def _format_range_result(
         self,
         start: int,
         returned_end: int,
-        fingerprint_end: int,
-        fingerprint: str,
+        range_end: int,
         truncated: bool,
         total_lines: int,
         content: str,
         *,
         indent: str,
     ) -> list[str]:
-        lines = [
-            indent + "<range>" + str(start) + ":" + str(fingerprint_end) + "</range>",
-            indent + "<fingerprint>" + fingerprint + "</fingerprint>",
-        ]
+        lines = [indent + "<range>" + str(start) + ":" + str(range_end) + "</range>"]
         if truncated:
             note = (
                 f"Read returned {returned_end - start} lines from {start}:{returned_end} of {total_lines} total lines. "
-                "Use Search to locate relevant text or Read smaller ranges in batches."
+                "Use Search to locate relevant text, Recall with a line range, or Read smaller targeted ranges; do not repeat the same large read."
             )
             lines.extend(
-                [
-                    indent + "<truncated>true</truncated>",
-                    indent + "<total_lines>" + str(total_lines) + "</total_lines>",
-                    indent + "<note>" + note + "</note>",
-                ]
+                [indent + "<truncated>true</truncated>", indent + "<total_lines>" + str(total_lines) + "</total_lines>", indent + "<note>" + note + "</note>"]
             )
-        lines.extend([indent + "<content no-indention>", content, indent + "</content>"])
+        numbered_content = "".join(f"{start + index}:{_line_hash(line)}|{line}" for index, line in enumerate(content.splitlines(keepends=True)))
+        lines.extend([indent + "<content hashline-numbered>", numbered_content, indent + "</content>"])
         return lines
 
 
 @dataclass
 class LineCountTool(Tool):
+    NAME: ClassVar[str] = "LineCount"
     EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY
-    DESCRIPTION: ClassVar[tuple[str, ...]] = ("Count lines for one or more files. Useful before reading large files or deciding Read ranges.",)
-    SIGNATURE: ClassVar[str] = "LineCount(*filepaths) -> LineCountToolResult<total_lines>"
+    DESCRIPTION: ClassVar[tuple[str, ...]] = (
+        "Count total lines in one or more files.",
+        "Use before large Read calls when choosing ranges.",
+        "Returns one total line count.",
+    )
+    SIGNATURE: ClassVar[str] = "LineCount(filepath[, filepath...]) -> LineCountToolResult<total_lines>"
     EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["code.py", "other.py"]',)
 
     filepaths: list[str] = field(default_factory=list)
@@ -1631,13 +1837,19 @@ def call(self) -> str:
 
 
 @dataclass
-class ListDirTool(Tool):
+class ListTool(Tool):
+    NAME: ClassVar[str] = "List"
     EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "List one directory non-recursively; optional glob filters immediate entry names.",
-        "Batch multiple ListDir actions in one turn when checking several known directories.",
+        "List immediate entries in one directory; non-recursive.",
+        "Optional glob filters immediate entry names.",
+        "Returns type and relative path for each entry.",
+    )
+    SIGNATURES: ClassVar[tuple[str, ...]] = (
+        "List() -> current directory entries",
+        "List(dirpath) -> entries in one directory",
+        "List(dirpath, glob) -> immediate entries matching glob",
     )
-    SIGNATURE: ClassVar[str] = "ListDir([dirpath][, glob]) -> ListDirToolResult<entries>"
     EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["src"]', 'Example args: ["src", "*.py"]', "Current dir args: []")
 
     dirpath: str = ""
@@ -1654,69 +1866,59 @@ def make(cls, session: Session, args: list[str]) -> Self:
 
     def preview(self) -> str:
         if self.glob_pattern:
-            return f'ListDir({self.dirpath}, "{self.glob_pattern}")'
-        return f"ListDir({self.dirpath})"
+            return f'List({self.dirpath}, "{self.glob_pattern}")'
+        return f"List({self.dirpath})"
 
     def requires_confirmation(self, session: Session) -> bool:
         return not session.is_path_in_cwd(self.dirpath)
 
-    def _dir_entry_type(self, entry: os.DirEntry[str]) -> str:
-        if entry.is_symlink():
-            return "symlink"
-        if entry.is_dir(follow_symlinks=False):
-            return "dir"
-        if entry.is_file(follow_symlinks=False):
-            return "file"
-        return "other"
-
-    def _entry_type_sort_key(self, entry_type: str) -> int:
-        return {"dir": 0, "file": 1, "symlink": 2, "other": 3}.get(entry_type, 4)
-
     def call(self) -> str:
         if not os.path.isdir(self.dirpath):
             raise ToolCallError("not a directory")
+        sort_order = {"dir": 0, "file": 1, "symlink": 2, "other": 3}
         entries = []
         with os.scandir(self.dirpath) as scan:
             for entry in scan:
                 if self.glob_pattern and not fnmatch.fnmatch(entry.name, self.glob_pattern):
                     continue
-                entries.append(
-                    {
-                        "name": entry.name,
-                        "path": entry.path,
-                        "type": self._dir_entry_type(entry),
-                    }
-                )
-        entries.sort(key=lambda item: (self._entry_type_sort_key(str(item["type"])), str(item["name"])))
-        lines = ["<ListDirToolResult>"]
+                if entry.is_symlink():
+                    entry_type = "symlink"
+                elif entry.is_dir(follow_symlinks=False):
+                    entry_type = "dir"
+                elif entry.is_file(follow_symlinks=False):
+                    entry_type = "file"
+                else:
+                    entry_type = "other"
+                entries.append({"name": entry.name, "path": entry.path, "type": entry_type})
+        entries.sort(key=lambda item: (sort_order.get(str(item["type"]), 4), str(item["name"])))
+        lines = ["<ListToolResult>"]
         for e in entries:
             lines.append(f"* ({e['type']}): {os.path.relpath(str(e['path']), self.cwd)}")
-        lines.append("</ListDirToolResult>")
+        lines.append("</ListToolResult>")
         return "\n".join(lines)
 
 
 @dataclass
 class SearchTool(Tool):
+    NAME: ClassVar[str] = "Search"
     MAX_MATCHES: ClassVar[int] = 100
+    OUTPUT_CHARS: ClassVar[int] = 24_000
     MAX_FILE_BYTES: ClassVar[int] = 2_000_000
     RG_MAX_FILESIZE: ClassVar[str] = "2M"
-    CONTEXT_LINES: ClassVar[int] = 4
+    CONTEXT_LINES: ClassVar[int] = 0
     MAX_CONTEXT_LINES: ClassVar[int] = 30
     EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Case-insensitive regex search before Read; use A|B|C for alternatives and \\n for multiline matches.",
-        "For exact text, escape regex metacharacters like braces, parens, dots, stars, and brackets.",
-        "Scope with path=FILE_OR_DIR, optionally filter with one glob=*.py, set context=N for 0..30 lines; omitted path defaults to current directory.",
-        "Second positional arg is always path, third positional arg is always glob; with path=, extra leading positional args are joined as regex alternatives.",
-        "Use at most one glob= per Search. For multiple extensions, run multiple Search actions or search path=. without glob.",
-        "Batch multiple Search actions in one turn when checking independent patterns or multiple globs.",
-        "Only options are path=, glob=, context=; escape regex symbols for literal text.",
+        "Case-insensitive regex search across files; use before Read when location is unknown.",
+        "Returns file:line matches and optional line:hash context anchors.",
+        "Options: path=FILE_OR_DIR, glob=GLOB, context=N. Use at most one glob per call.",
+        "Use InspectCode for symbol structure; use Bash rg/grep for custom shell pipelines.",
+        "Escape regex metacharacters for literal text; use A|B for alternatives and \\n for multiline.",
     )
-    SIGNATURE: ClassVar[str] = "Search(pattern[, path=path][, glob=pattern][, context=N]) -> SearchToolResult<matches>"
+    SIGNATURES: ClassVar[tuple[str, ...]] = ("Search(pattern[, path=FILE_OR_DIR][, glob=GLOB][, context=N]) -> matching lines",)
     EXAMPLE: ClassVar[tuple[str, ...]] = (
-        'Example args: ["class .*Tool", "path=nanocode.py", "context=0"]',
+        'Example args: ["class .*Tool", "path=nanocode.py"]',
         'Example args: ["TODO|FIXME", "path=.", "glob=*.py", "context=2"]',
-        'Multiple globs: use separate actions like ["pytest", "path=.", "glob=*.toml"] and ["pytest", "path=.", "glob=*.ini"].',
         'Literal paren args: ["def __init__\\(", "path=.", "glob=*.py"]',
     )
 
@@ -1736,7 +1938,10 @@ class Match:
 
     @classmethod
     def make(cls, session: Session, args: list[str]) -> Self:
-        args = cls._join_pattern_args_with_explicit_path(args)
+        args = [str(arg) for arg in args]
+        path_index = next((index for index, value in enumerate(args[1:], start=1) if value.startswith("path=")), None)
+        if path_index is not None and path_index > 1:
+            args = ["|".join(args[:path_index]), *args[path_index:]]
         if len(args) < 1 or len(args) > 4:
             raise ToolCallArgError("requires 1 to 4 args: pattern[, path=path][, glob=pattern][, context=N]")
         if any(str(arg).startswith("ignore_case") or str(arg).startswith("case_sensitive") for arg in args[1:]):
@@ -1754,8 +1959,6 @@ def make(cls, session: Session, args: list[str]) -> Self:
         path_set = False
         for raw_option in args[1:]:
             option = str(raw_option)
-            if option.startswith("ignore_case") or option.startswith("case_sensitive"):
-                raise ToolCallArgError("Search supports only path=, glob=, and context= options; ignore_case is not supported")
             if option.startswith("path="):
                 if path_set:
                     raise ToolCallArgError("path option cannot be combined with positional path")
@@ -1764,9 +1967,12 @@ def make(cls, session: Session, args: list[str]) -> Self:
                 continue
             if option.startswith("context=") or option.isdigit():
                 try:
-                    context_lines = cls._parse_context_arg(option)
+                    raw_context = option[len("context=") :] if option.startswith("context=") else option
+                    context_lines = int(raw_context)
+                    if context_lines < 0 or context_lines > cls.MAX_CONTEXT_LINES:
+                        raise ValueError
                 except ValueError:
-                    raise ToolCallArgError("context must be an integer between 0 and " + str(cls.MAX_CONTEXT_LINES))
+                    raise ToolCallArgError(f"context must be an integer between 0 and {cls.MAX_CONTEXT_LINES}")
                 continue
             if option.startswith("glob=") or option.startswith("glob_pattern="):
                 if glob_pattern:
@@ -1802,22 +2008,6 @@ def make(cls, session: Session, args: list[str]) -> Self:
             gitignore_patterns=cls._load_gitignore_patterns(session.cwd),
         )
 
-    @classmethod
-    def _join_pattern_args_with_explicit_path(cls, args: list[str]) -> list[str]:
-        values = [str(arg) for arg in args]
-        path_index = next((index for index, value in enumerate(values[1:], start=1) if value.startswith("path=")), None)
-        if path_index is None or path_index <= 1:
-            return values
-        return ["|".join(values[:path_index]), *values[path_index:]]
-
-    @classmethod
-    def _parse_context_arg(cls, value: str) -> int:
-        raw_context = value[len("context=") :] if value.startswith("context=") else value
-        context = int(raw_context)
-        if context < 0 or context > cls.MAX_CONTEXT_LINES:
-            raise ValueError
-        return context
-
     def requires_confirmation(self, session: Session) -> bool:
         return not session.is_path_in_cwd(self.target_path)
 
@@ -1852,9 +2042,6 @@ def _load_gitignore_patterns(cwd: str) -> list[str]:
             pass
         return patterns
 
-    def _is_hidden_path(self, path: str) -> bool:
-        return any(part.startswith(".") for part in self._relpath(path).split(os.sep) if part and part != ".")
-
     def _is_gitignored(self, path: str, is_dir: bool = False) -> bool:
         relpath = self._relpath(path).replace(os.sep, "/")
         name = os.path.basename(path)
@@ -1880,7 +2067,8 @@ def _is_gitignored(self, path: str, is_dir: bool = False) -> bool:
         return False
 
     def _is_skipped_path(self, path: str, is_dir: bool = False) -> bool:
-        return self._is_hidden_path(path) or self._is_gitignored(path, is_dir)
+        hidden = any(part.startswith(".") for part in self._relpath(path).split(os.sep) if part and part != ".")
+        return hidden or self._is_gitignored(path, is_dir)
 
     def _iter_files(self) -> Iterator[str]:
         if os.path.isfile(self.target_path):
@@ -1914,26 +2102,55 @@ def _read_match_context(self, path: str, line_number: int) -> list[tuple[int, st
                     if lineno > end:
                         break
                     if lineno >= start:
-                        context.append((lineno, line.rstrip("\n")[:300]))
+                        context.append((lineno - 1, line))
         except OSError:
             return []
         return context
 
-    def _format_result(self, engine: str, matches: list[Match], truncated: bool) -> str:
+    def _format_result_lines(self, engine: str, matches: list[Match], *, truncated: bool, include_context: bool, context_omitted: bool = False) -> list[str]:
         lines = ["<SearchToolResult>"]
         lines.append(f"* engine: {engine}")
+        if matches:
+            lines.append('<note>Context lines are 0-based "line:hash|code"; the "line:hash" part is the line anchor.</note>')
+        if context_omitted:
+            lines.append("* context_omitted: result too large; rerun with a narrower path or fewer matches for surrounding lines")
         if matches:
             for match in matches:
                 lines.append(f"* {self._relpath(match.path)}:{match.line_number}: {match.text}")
-                for lineno, text in match.context:
-                    marker = ">" if lineno == match.line_number else " "
-                    lines.append(f"  {marker} {lineno}: {text}")
+                if include_context:
+                    for index, line in match.context:
+                        marker = ">" if index == match.line_number - 1 else " "
+                        lines.append(f"  {marker} {index}:{_line_hash(line)}|{line.removesuffix(chr(10))[:300]}")
         else:
             lines.append("No matches.")
         if truncated:
             lines.append("* truncated: true")
         lines.append("</SearchToolResult>")
-        return "\n".join(lines)
+        return lines
+
+    def _format_result(self, engine: str, matches: list[Match], truncated: bool) -> str:
+        lines = self._format_result_lines(engine, matches, truncated=truncated, include_context=True)
+        value = "\n".join(lines)
+        if len(value) <= self.OUTPUT_CHARS:
+            return value
+        if self.context_lines > 0:
+            lines = self._format_result_lines(engine, matches, truncated=truncated, include_context=False, context_omitted=True)
+            value = "\n".join(lines)
+            if len(value) <= self.OUTPUT_CHARS:
+                return value
+
+        lines = self._format_result_lines(engine, [], truncated=True, include_context=False)
+        prefix = lines[:2]
+        suffix = lines[-2:]
+        body: list[str] = []
+        for match in matches:
+            candidate = [*prefix, *body, f"* {self._relpath(match.path)}:{match.line_number}: {match.text}", *suffix]
+            if len("\n".join(candidate)) > self.OUTPUT_CHARS:
+                break
+            body.append(f"* {self._relpath(match.path)}:{match.line_number}: {match.text}")
+        if not body and matches:
+            body.append(_shorten(f"* {self._relpath(matches[0].path)}:{matches[0].line_number}: {matches[0].text}", self.OUTPUT_CHARS // 2))
+        return "\n".join([*prefix, *body, *suffix])
 
     def _rg_command(self, rg: str, *, pcre2: bool = False) -> list[str]:
         cmd = [rg, "--json", "--line-number", "--max-filesize", self.RG_MAX_FILESIZE]
@@ -1954,7 +2171,8 @@ def _call_rg(self, rg: str) -> str:
             proc = subprocess.run(self._rg_command(rg), text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=30)
         except subprocess.TimeoutExpired:
             raise ToolCallError("rg timed out")
-        if proc.returncode not in (0, 1) and self._should_retry_rg_with_pcre2(proc.stderr):
+        stderr = proc.stderr.lower()
+        if proc.returncode not in (0, 1) and "pcre2" in stderr and ("look-around" in stderr or "look-ahead" in stderr or "look-behind" in stderr):
             pcre2 = True
             try:
                 proc = subprocess.run(self._rg_command(rg, pcre2=True), text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=30)
@@ -1988,10 +2206,6 @@ def _call_rg(self, rg: str) -> str:
                 return self._format_result(engine, matches, True)
         return self._format_result(engine, matches, False)
 
-    def _should_retry_rg_with_pcre2(self, stderr: str) -> bool:
-        text = stderr.lower()
-        return "pcre2" in text and ("look-around" in text or "look-ahead" in text or "look-behind" in text)
-
     def _is_multiline(self) -> bool:
         return "\n" in self.pattern or "\r" in self.pattern
 
@@ -2058,93 +2272,349 @@ def call(self) -> str:
         return self._call_python()
 
 
+def _code_index_module() -> Any | None:
+    try:
+        return importlib.import_module("code_symbol_index")
+    except ImportError:
+        return None
+
+
+def _code_index_db_path(session: Session) -> str:
+    return os.path.join(session.project_dir(), "code-symbol-index", "index.sqlite")
+
+
+def _code_index_repository(session: Session, *, create_index: bool = False) -> Any:
+    if not create_index and session.code_index_repository is not None:
+        return session.code_index_repository
+    module = _code_index_module()
+    if module is None:
+        raise ToolCallError("code index is unavailable")
+    db_path = _code_index_db_path(session)
+    if create_index:
+        os.makedirs(os.path.dirname(db_path), exist_ok=True)
+    repository = module.Repository(session.cwd, db_path=db_path, create_index=create_index)
+    if not create_index:
+        session.code_index_repository = repository
+    return repository
+
+
+def _code_index_status(session: Session, *, check: bool = False) -> tuple[str, str]:
+    module = _code_index_module()
+    if module is None:
+        return "unavailable", ""
+    try:
+        status = module.status(session.cwd, db_path=_code_index_db_path(session), check=check, max_pending_files=20, format="object")
+    except Exception as error:
+        return "error", str(error)
+    message = str(getattr(status, "message", None) or getattr(status, "reason", None) or "")
+    changes = getattr(status, "pending_changes", None)
+    files = getattr(status, "pending_files", ())
+    if changes:
+        pending = "pending " + str(changes)
+        if isinstance(files, (list, tuple)) and files:
+            sample = ", ".join(str(item) for item in files[:3])
+            pending += " (" + sample + ("..." if len(files) > 3 else "") + ")"
+        message = (message + "; " if message else "") + pending
+    return str(getattr(status, "status", "error")), message
+
+
+def _code_index_language_breakdown(session: Session) -> str:
+    module = _code_index_module()
+    if module is None:
+        return ""
+    try:
+        status = module.status(session.cwd, db_path=_code_index_db_path(session), check=False, max_pending_files=0, format="object")
+    except Exception:
+        return ""
+    if str(getattr(status, "status", "error")) not in {"ready", "stale"}:
+        return ""
+    rows = []
+    for item in getattr(status, "language_breakdown", ()) or ():
+        language = item.get("language") if isinstance(item, dict) else getattr(item, "language", None)
+        files = item.get("files") if isinstance(item, dict) else getattr(item, "files", None)
+        percent = item.get("percent") if isinstance(item, dict) else getattr(item, "percent", None)
+        if language and files is not None and percent is not None:
+            try:
+                rows.append(f"{language} {files} files ({float(percent):.1f}%)")
+            except (TypeError, ValueError):
+                rows.append(f"{language} {files} files")
+    if rows:
+        return ", ".join(rows)
+    languages = getattr(status, "languages", ()) or ()
+    if isinstance(languages, str):
+        languages = (languages,)
+    return ", ".join(str(language) for language in languages if language)
+
+
+def _code_index_available(session: Session) -> bool:
+    status, message = _code_index_status(session)
+    session.state.code_index_error = message if status == "error" else ""
+    return status in {"ready", "stale"}
+
+
+def _set_code_index_notice(session: Session, event: str, *, done: int = 0, total: int = 0, seconds: int = 30) -> None:
+    phase = {"scan": "scan", "start": "parse", "file": "parse", "finish": "done"}.get(event, event)
+    suffix = (" " + str(done) + "/" + str(total)) if total > 0 else ""
+    session.state.status_notice = "index:" + phase + suffix
+    session.state.status_notice_until = time.monotonic() + seconds
+    session.state.code_index_refreshing = phase not in {"done", "error"}
+
+
+def _code_index_progress(session: Session) -> Callable[..., None]:
+    def update(event: str, *, done: int = 0, total: int = 0, **_kwargs: object) -> None:
+        _set_code_index_notice(session, event, done=done, total=total)
+
+    return update
+
+
+def _code_index_refresh_existing_async(session: Session, progress: Callable[..., None] | None = None) -> bool:
+    status, _message = _code_index_status(session)
+    if status not in {"ready", "stale"}:
+        return False
+    module = _code_index_module()
+    if module is None:
+        return False
+    session.code_index_repository = None
+    session.state.code_index_error = ""
+    session.state.code_index_refreshing = True
+    session.state.code_index_reload_needed = False
+    callback = progress or _code_index_progress(session)
+
+    def refresh_progress(event: str, *, done: int = 0, total: int = 0, **kwargs: object) -> None:
+        callback(event, done=done, total=total, **kwargs)
+        if {"finish": "done", "done": "done"}.get(event, event) == "done":
+            session.state.code_index_reload_needed = True
+
+    try:
+        module.refresh_async(session.cwd, db_path=_code_index_db_path(session), progress=refresh_progress)
+    except Exception as error:
+        session.state.code_index_refreshing = False
+        session.state.code_index_reload_needed = False
+        session.state.code_index_error = str(error)
+    return True
+
+
+def _code_index_reload_if_ready(session: Session) -> None:
+    if not session.state.code_index_reload_needed or session.state.code_index_refreshing:
+        return
+    try:
+        _code_index_repository(session)
+        session.state.code_index_error = ""
+    except Exception as error:
+        session.code_index_repository = None
+        session.state.code_index_error = str(error)
+    session.state.code_index_reload_needed = False
+
+
+def _code_index_sync(session: Session, *, force: bool = False) -> str:
+    before, _message = _code_index_status(session)
+    if force:
+        if _code_index_module() is None:
+            return "code_index: error\ncode index is unavailable"
+        session.code_index_repository = None
+        shutil.rmtree(os.path.dirname(_code_index_db_path(session)), ignore_errors=True)
+    try:
+        repository = _code_index_repository(session, create_index=True)
+        repository.refresh(progress=_code_index_progress(session))
+        session.code_index_repository = repository
+        session.state.code_index_reload_needed = False
+    except Exception as error:
+        session.code_index_repository = None
+        session.state.code_index_error = str(error)
+        return "code_index: error\n" + str(error)
+    session.state.code_index_error = ""
+    _set_code_index_notice(session, "done", seconds=2)
+    status, message = _code_index_status(session)
+    action = "rebuilt" if force else ("initialized" if before == "missing" else "synced")
+    lines = ["code_index: " + action, "status: " + status, "path: " + _code_index_db_path(session)]
+    if message:
+        lines.append("note: " + message)
+    return "\n".join(lines)
+
+
+CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT = 20
+
+
+def _code_index_update_pending(session: Session, *, limit: int = CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT) -> None:
+    module = _code_index_module()
+    if module is None or session.state.code_index_refreshing:
+        return
+    try:
+        status = module.status(session.cwd, db_path=_code_index_db_path(session), check=True, max_pending_files=limit + 1, format="object")
+    except Exception as error:
+        session.state.code_index_error = str(error)
+        return
+    if str(getattr(status, "status", "")) != "stale":
+        return
+    pending_changes = getattr(status, "pending_changes", None)
+    files = [str(path) for path in getattr(status, "pending_files", ()) if path]
+    if not files or len(files) > limit or (isinstance(pending_changes, int) and pending_changes > limit):
+        return
+    paths = list(dict.fromkeys(path for path in (session.resolve_path(path) for path in files) if session.is_path_in_cwd(path)))
+    if not paths:
+        return
+    try:
+        _code_index_repository(session).update(paths)
+        session.state.code_index_error = ""
+    except Exception as error:
+        session.state.code_index_error = str(error)
+
+
 @dataclass
-class EditTool(Tool):
-    EFFECT: ClassVar[ToolEffect] = ToolEffect.EDIT
+class InspectCodeTool(Tool):
+    NAME: ClassVar[str] = "InspectCode"
+    DEFAULT_LIMIT: ClassVar[int] = 20
+    MAX_LIMIT: ClassVar[int] = 80
+    EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Replace/delete one unique exact literal text block in an existing file; best for tiny unambiguous edits, not regex.",
-        "If the target text is repeated, structural, or line ranges are clearer, use ReplaceRange.",
+        "Use the current code index for symbols and file outlines.",
+        "find: symbol prefix -> candidates. inspect: one symbol -> anchored source and references. outline: file path -> symbol outline.",
+        "Targets are symbol names/prefixes, not natural language. Use Search/Read for literal text, config, or logs.",
+        "Options: limit, kind, path, exact_only, symbol.",
+    )
+    SIGNATURES: ClassVar[tuple[str, ...]] = (
+        "InspectCode('find', symbol_prefix[, {limit, kind, path, exact_only}]) -> symbol candidates with file/range",
+        "InspectCode('inspect', symbol_name[, {kind, path, exact_only}]) -> anchored source, signature, imports, and callers/callees when available",
+        "InspectCode('outline', filepath[, {symbol}]) -> file outline, or focused outline for one symbol in the file",
+    )
+    EXAMPLE: ClassVar[tuple[str, ...]] = (
+        'Find: ["find", "Tool", {"kind":"class","limit":20}]',
+        'Inspect: ["inspect", "Agent.run", {"path":"nanocode.py","exact_only":true}]',
+        'Outline: ["outline", "nanocode.py", {"symbol":"Tool"}]',
     )
-    SIGNATURE: ClassVar[str] = "Edit(filepath, find, replace) -> EditToolResult<path, replacements>"
-    EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["code.py", "old text", "new text"]',)
 
-    filepath: str = ""
-    find: str = ""
-    replace: str = ""
-    cwd: str = ""
+    mode: str = ""
+    target: str = ""
+    limit: int = DEFAULT_LIMIT
+    kind: str = ""
+    path: str = ""
+    exact_only: bool = False
+    symbol: str = ""
+    session: Session | None = None
 
     @classmethod
-    def cli_args(cls, args: list[str]) -> list[str]:
-        return [cls.cli_token(args[0])] if args else []
+    def tool_schema(cls) -> Json:
+        schema = super().tool_schema()
+        schema["function"]["parameters"]["properties"]["args"] = {
+            "type": "array",
+            "minItems": 2,
+            "maxItems": 3,
+            "items": {"type": ["string", "object"], "description": 'mode, target, then optional filters object. mode is "find", "inspect", or "outline".'},
+        }
+        return schema
 
     @classmethod
-    def make(cls, session: Session, args: list[str]) -> Self:
-        if len(args) != 3:
-            raise ToolCallArgError(
-                "Edit args error: got "
-                + str(len(args))
-                + ' args; expected ["filepath", "find", "replace"]. Example: Edit("nanocode.py", "old text", "new text"). Do not call Edit().'
-            )
-        find = str(args[1])
-        return cls(filepath=session.resolve_path(args[0]), find=find, replace=str(args[2]), cwd=session.cwd)
+    def make(cls, session: Session, args: list[JsonValue]) -> Self:
+        if not 2 <= len(args) <= 3:
+            raise ToolCallArgError("requires args: mode, target[, options]")
+        mode = str(args[0]).strip().lower()
+        if mode not in {"find", "inspect", "outline"}:
+            raise ToolCallArgError("mode must be find, inspect, or outline")
+        target = str(args[1]).strip()
+        if not target:
+            raise ToolCallArgError("target cannot be empty")
+        if len(args) == 2:
+            options = {}
+        else:
+            options = _json_dict(args[2])
+            if not options:
+                raise ToolCallArgError("options must be an object")
+        limit = cls.DEFAULT_LIMIT
+        if mode == "find":
+            cls._validate_symbolish(target, "query")
+            try:
+                limit = min(cls.MAX_LIMIT, max(1, int(options.get("limit", cls.DEFAULT_LIMIT))))
+            except (TypeError, ValueError):
+                raise ToolCallArgError("limit must be an integer")
+        elif mode == "inspect":
+            cls._validate_symbolish(target, "symbol")
+            path_target = session.resolve_path(target)
+            dotted_path = session.resolve_path(target.replace(".", os.sep)) if "." in target and os.sep not in target else ""
+            if os.path.exists(path_target) or (dotted_path and os.path.exists(dotted_path)):
+                raise ToolCallArgError("inspect target looks like a file or directory; use mode=outline, List, Search, or Read")
+            if "." in target and not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?", target):
+                raise ToolCallArgError("symbol looks like a module path; use List/Search/Read for modules/packages, or pass a specific symbol")
+        else:
+            filepath = session.resolve_path(target)
+            if not os.path.isfile(filepath):
+                raise ToolCallArgError("outline target must be an existing file")
+            target = filepath
+            symbol = str(options.get("symbol") or "").strip()
+            if re.search(r"\s", symbol):
+                raise ToolCallArgError("outline symbol filter must be one symbol name or prefix")
+            options["symbol"] = symbol
+        if not _code_index_available(session):
+            raise ToolCallError("code index is not available")
+        return cls(
+            mode=mode,
+            target=target,
+            limit=limit,
+            kind=str(options.get("kind") or "").strip(),
+            path=str(options.get("path") or "").strip(),
+            exact_only=options.get("exact_only") is True,
+            symbol=str(options.get("symbol") or "").strip(),
+            session=session,
+        )
+
+    @staticmethod
+    def _validate_symbolish(value: str, label: str) -> None:
+        if re.search(r"\s", value):
+            raise ToolCallArgError(label + " must be one symbol name or prefix; do not pass natural language")
 
     def preview(self) -> str:
-        label = f'Edit({self.filepath}, find="{self.find}")'
-        try:
-            with open(self.filepath, "r", encoding="utf-8") as f:
-                content = f.read()
-        except FileNotFoundError:
-            if self.find == "":
-                return _make_unified_diff("", self.replace, self.filepath) or label
-            return label + "\n# preview unavailable: file does not exist; use empty find to create"
-        except OSError as error:
-            return label + "\n# preview unavailable: " + str(error)
-        if self.find == "":
-            return label + "\n# preview unavailable: empty find creates missing files only"
-        if self.find not in content:
-            return label
-        if content.count(self.find) != 1:
-            return label + "\n# preview unavailable: target `find` text matched multiple times; use ReplaceRange or a larger unique find block"
-        return _make_unified_diff(content, content.replace(self.find, self.replace, 1), self.filepath) or label
+        options = {
+            key: value
+            for key, value in (
+                ("limit", self.limit if self.mode == "find" and self.limit != self.DEFAULT_LIMIT else 0),
+                ("kind", self.kind),
+                ("path", self.path),
+                ("exact_only", self.exact_only),
+                ("symbol", self.symbol),
+            )
+            if value
+        }
+        target = os.path.relpath(self.target, self.session.cwd) if self.mode == "outline" and self.session is not None else self.target
+        args: list[JsonValue] = [self.mode, target] + ([options] if options else [])
+        return "InspectCode(" + ", ".join(json.dumps(arg, ensure_ascii=False) for arg in args) + ")"
 
     def call(self) -> str:
-        created = False
-        try:
-            with open(self.filepath, "r", encoding="utf-8") as f:
-                content = f.read()
-        except FileNotFoundError:
-            if self.find != "":
-                raise ToolCallError("file does not exist; use empty find to create")
-            content = ""
-            created = True
-        if self.find == "" and not created:
-            raise ToolCallError("empty find creates missing files only")
-        if self.find not in content:
-            raise ToolCallError("target `find` text not found")
-        if content.count(self.find) != 1:
-            raise ToolCallError("target `find` text matched multiple times; use ReplaceRange or a larger unique find block")
-
-        with open(self.filepath, "w", encoding="utf-8") as f:
-            f.write(content.replace(self.find, self.replace, 1))
-
-        lines = [
-            "<EditToolResult>",
-            f"* path: {os.path.relpath(self.filepath, self.cwd)}",
-        ]
-        if created:
-            lines.append("* created: true")
+        if self.session is None:
+            raise ToolCallError("missing session")
+        repo = _code_index_repository(self.session)
+        if self.mode == "find":
+            text = repo.search_text(
+                self.target,
+                limit=self.limit,
+                kind=self.kind or None,
+                path=self.path or None,
+                exact_only=self.exact_only,
+            )
+        elif self.mode == "inspect":
+            text = repo.inspect_text(
+                self.target,
+                kind=self.kind or None,
+                path=self.path or None,
+                exact_only=self.exact_only,
+                anchors=True,
+            )
         else:
-            lines.append("* replacements: 1")
-        lines.append("</EditToolResult>")
+            text = repo.outline_text(self.target, symbol=self.symbol or None)
+        lines = ["<InspectCodeToolResult>"]
+        result = "mode: " + self.mode + "\n" + text
+        if result.strip():
+            lines.append(result.rstrip("\n"))
+        lines.append("</InspectCodeToolResult>")
         return "\n".join(lines)
 
 
 @dataclass
 class CreateFileTool(Tool):
+    NAME: ClassVar[str] = "CreateFile"
     EFFECT: ClassVar[ToolEffect] = ToolEffect.EDIT
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Create a new UTF-8 file with short initial content; parent directory must exist and target file must not exist.",
-        "For substantial new files, create only a small skeleton first, then grow it with focused ReplaceRange edits.",
+        "Create a new UTF-8 file; target file must not exist.",
+        "Use Edit for existing files.",
+        "Returns changed path and created=true.",
     )
     SIGNATURE: ClassVar[str] = "CreateFile(filepath, content) -> CreateFileToolResult<path>"
     EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["new.py", "minimal content\\n"]',)
@@ -2152,6 +2622,7 @@ class CreateFileTool(Tool):
     filepath: str = ""
     content: str = ""
     cwd: str = ""
+    can_create_parent: bool = False
 
     @classmethod
     def cli_args(cls, args: list[str]) -> list[str]:
@@ -2163,7 +2634,8 @@ def cli_args(cls, args: list[str]) -> list[str]:
     def make(cls, session: Session, args: list[str]) -> Self:
         if len(args) != 2:
             raise ToolCallArgError('requires exactly 2 args: filepath, content. Example: CreateFile("new.py", "content\\n")')
-        return cls(filepath=session.resolve_path(args[0]), content=str(args[1]), cwd=session.cwd)
+        filepath = session.resolve_path(args[0])
+        return cls(filepath=filepath, content=str(args[1]), cwd=session.cwd, can_create_parent=session.is_path_in_cwd(os.path.dirname(filepath)))
 
     def preview(self) -> str:
         label = f"CreateFile({self.filepath})"
@@ -2172,6 +2644,9 @@ def preview(self) -> str:
         return _make_unified_diff("", self.content, self.filepath) or label
 
     def call(self) -> str:
+        parent = os.path.dirname(self.filepath)
+        if parent and not os.path.isdir(parent) and self.can_create_parent:
+            os.makedirs(parent, exist_ok=True)
         try:
             with open(self.filepath, "x", encoding="utf-8") as f:
                 f.write(self.content)
@@ -2190,141 +2665,126 @@ def call(self) -> str:
 
 
 @dataclass
-class ReplaceRangeEdit:
-    start: int
-    end: int
-    fingerprint: str
-    before_context: str
-    after_context: str
+class EditEdit:
+    op: str
+    start: str
+    end: str
     content: str
+    old: str = ""
+    new: str = ""
 
 
 @dataclass
-class ReplaceRangeTool(Tool):
+class EditTool(Tool):
+    NAME: ClassVar[str] = "Edit"
+    PARAM_NAMES: ClassVar[tuple[str, ...]] = ("filepath", "edits")
     EFFECT: ClassVar[ToolEffect] = ToolEffect.EDIT
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Replace one or more small Read-backed [start,end) ranges in an existing file; best when exact line ranges are known or target text is not unique.",
-        "For several independent ranges in the same file, pass a batch as ReplaceRange(filepath, [[start,end,fingerprint,before_context,after_context,content], ...]).",
-        "Pass exact before_context and after_context boundary lines; use empty string at BOF/EOF.",
-        "Content is only the replacement for that range; do not include boundary lines.",
+        "Edit an existing UTF-8 file atomically.",
+        "Use line:hash anchors from Read, Search, or InspectCode for replace/delete/insert.",
+        "Use replace_all only for exact literal file-wide replacement.",
+        "Returns changed path, edit count, and applied ranges.",
     )
-    SIGNATURE: ClassVar[str] = (
-        "ReplaceRange(filepath, start, end, fingerprint, before_context, after_context, content) "
-        "or ReplaceRange(filepath, ranges) -> ReplaceRangeToolResult<path, range>"
+    SIGNATURES: ClassVar[tuple[str, ...]] = (
+        "Edit(filepath, [{op:'replace', start, end, content}, ...]) -> replace anchored ranges",
+        "Edit(filepath, [{op:'delete', start, end}, ...]) -> delete anchored ranges",
+        "Edit(filepath, [{op:'insert_before'|'insert_after', start, content}, ...]) -> insert at anchors",
+        "Edit(filepath, [{op:'replace_all', old, new}]) -> literal file-wide replacement",
     )
     EXAMPLE: ClassVar[tuple[str, ...]] = (
-        'Example args: ["code.py", "10", "12", "a1b2c3", "line before\\n", "line after\\n", "replacement lines\\n"]',
-        'Batch args: ["code.py", [["10", "12", "a1b2c3", "before\\n", "after\\n", "replacement\\n"]]]',
+        'Example args: ["code.py", [{"op":"replace","start":"10:a1b2c3","end":"12:d4e5f6","content":"new lines\\n"}]]',
+        'Example args: ["code.py", [{"op":"insert_after","start":"20:abc123","content":"new line\\n"}]]',
+        'Example args: ["code.py", [{"op":"replace_all","old":"OldName","new":"NewName"}]]',
     )
 
     filepath: str = ""
-    start: int = 0
-    end: int = 0
-    fingerprint: str = ""
-    before_context: str = ""
-    after_context: str = ""
-    content: str = ""
-    edits: list[ReplaceRangeEdit] = field(default_factory=list)
+    edits: list[EditEdit] = field(default_factory=list)
     cwd: str = ""
-    range_fingerprints: RangeFingerprintStore = field(default_factory=RangeFingerprintStore)
-
-    @classmethod
-    def cli_args(cls, args: list[str]) -> list[str]:
-        if len(args) == 2:
-            ranges = _json_list(args[1])
-            if ranges:
-                return [cls.cli_token(args[0]), str(len(ranges)) + " ranges"]
-        if len(args) < 3:
-            return [cls.cli_token(arg) for arg in args]
-        return [cls.cli_token(args[0]), str(args[1]) + ":" + str(args[2])]
 
     @classmethod
-    def merge_key(cls, call: ParsedToolCall) -> tuple[str, ...] | None:
-        if len(call.args) != 7:
-            return None
-        return (str(call.args[0]),)
+    def tool_schema(cls) -> Json:
+        schema = super().tool_schema()
+        anchored_edit_schema: Json = _tool_object_schema(
+            {
+                "op": {"type": "string", "enum": ["replace", "delete", "insert_before", "insert_after"]},
+                "start": {"type": "string", "description": 'Anchor copied from tool output, e.g. "10:a1b2c3".'},
+                "end": {"type": "string", "description": "Required for replace/delete; omit for inserts."},
+                "content": {"type": "string", "description": "Replacement or inserted text; use empty string for delete."},
+            },
+            ["op", "start"],
+        )
+        replace_all_schema: Json = _tool_object_schema(
+            {
+                "op": {"type": "string", "enum": ["replace_all"]},
+                "old": {"type": "string", "description": "Required for replace_all; literal text to replace."},
+                "new": {"type": "string", "description": "Required for replace_all; literal replacement text."},
+            },
+            ["op", "old", "new"],
+        )
+        schema["function"]["parameters"]["properties"]["args"] = {
+            "type": "array",
+            "minItems": 2,
+            "maxItems": 2,
+            "items": {"anyOf": [{"type": "string"}, {"type": "array", "minItems": 1, "items": {"anyOf": [anchored_edit_schema, replace_all_schema]}}]},
+            "description": "Exactly two arguments: filepath string, then edits array. Do not pass edits as a JSON string.",
+        }
+        return schema
 
     @classmethod
-    def merge_calls(cls, session: Session, calls: list[ParsedToolCall]) -> PreparedToolCall | None:
-        if len(calls) < 2:
-            return None
-        filepath = calls[0].args[0]
-        edits = []
-        intentions = []
-        for call in calls:
-            try:
-                start, end = _parse_line_range(str(call.args[1]), str(call.args[2]))
-            except ToolCallArgError:
-                return None
-            fingerprint = str(call.args[3])
-            if not fingerprint:
-                return None
-            edits.append(
-                ReplaceRangeEdit(start=start, end=end, fingerprint=fingerprint, before_context=str(call.args[4]), after_context=str(call.args[5]), content=str(call.args[6]))
-            )
-            if call.intention:
-                intentions.append(call.intention)
-        tool = cls._from_edits(session, filepath=filepath, edits=edits)
-        call = ParsedToolCall(name=cls.name(), intention="; ".join(intentions), args=list(calls[0].args))
-        return PreparedToolCall(call=call, tool=tool)
+    def cli_args(cls, args: list[str]) -> list[str]:
+        if len(args) == 2:
+            edits = _json_list(args[1])
+            if edits:
+                return [cls.cli_token(args[0]), str(len(edits)) + " edits"]
+        return [cls.cli_token(arg) for arg in args]
 
     @classmethod
     def make(cls, session: Session, args: list[JsonValue]) -> Self:
-        if len(args) == 2:
-            ranges = _json_list(args[1])
-            if not ranges:
-                raise ToolCallArgError("ranges cannot be empty")
-            return cls._from_edits(session, filepath=str(args[0]), edits=[cls._edit_from_args(_json_list(item)) for item in ranges])
-        if len(args) != 7:
-            raise ToolCallArgError("requires exactly 7 args or batch args: filepath, ranges")
-        return cls._from_edits(session, filepath=str(args[0]), edits=[cls._edit_from_args(args[1:])])
+        if len(args) != 2:
+            raise ToolCallArgError("requires args: filepath, edits")
+        edits = _json_list(args[1])
+        if not edits:
+            raise ToolCallArgError("edits cannot be empty")
+        return cls(filepath=session.resolve_path(str(args[0])), edits=[cls._edit_from_json(item) for item in edits], cwd=session.cwd)
 
     @staticmethod
-    def _edit_from_args(args: list[JsonValue]) -> ReplaceRangeEdit:
-        if len(args) != 6:
-            raise ToolCallArgError("range requires exactly 6 args: start, end, fingerprint, before_context, after_context, content")
-        start, end = _parse_line_range(str(args[0]), str(args[1]))
-        fingerprint = str(args[2])
-        if not fingerprint and (start != 0 or end != 0):
-            raise ToolCallArgError("fingerprint cannot be empty")
-        return ReplaceRangeEdit(start=start, end=end, fingerprint=fingerprint, before_context=str(args[3]), after_context=str(args[4]), content=str(args[5]))
-
-    @classmethod
-    def _from_edits(cls, session: Session, *, filepath: str, edits: list[ReplaceRangeEdit]) -> Self:
-        first = edits[0]
-        return cls(
-            filepath=session.resolve_path(filepath),
-            start=first.start,
-            end=first.end,
-            fingerprint=first.fingerprint,
-            before_context=first.before_context,
-            after_context=first.after_context,
-            content=first.content,
-            edits=edits,
-            cwd=session.cwd,
-            range_fingerprints=session.state.range_fingerprints,
-        )
+    def _edit_from_json(value: JsonValue) -> EditEdit:
+        item = _json_dict(value)
+        if not item:
+            raise ToolCallArgError("each edit must be an object")
+        op = str(item.get("op") or "").strip()
+        if op not in {"replace", "delete", "insert_before", "insert_after", "replace_all"}:
+            raise ToolCallArgError("edit op must be replace, delete, insert_before, insert_after, or replace_all")
+        start = str(item.get("start") or "").strip()
+        end = str(item.get("end") or "").strip()
+        content = str(item.get("content") or "")
+        old = str(item.get("old") or "")
+        new = str(item.get("new") or "")
+        if op == "replace_all":
+            if "old" not in item or "new" not in item:
+                raise ToolCallArgError("replace_all requires old and new")
+            if not old:
+                raise ToolCallArgError("replace_all old cannot be empty")
+            if start or end:
+                raise ToolCallArgError("replace_all does not use anchors")
+            return EditEdit(op=op, start="", end="", content="", old=old, new=new)
+        if not start:
+            raise ToolCallArgError("edit start anchor is required")
+        if op in {"replace", "delete"} and not end:
+            raise ToolCallArgError("replace/delete edits require end anchor")
+        if op in {"insert_before", "insert_after"} and end:
+            raise ToolCallArgError("insert edits use start anchor only")
+        if op in {"replace", "insert_before", "insert_after"} and "content" not in item:
+            raise ToolCallArgError("edit content is required")
+        return EditEdit(op=op, start=start, end=end, content=content)
 
     def preview(self) -> str:
-        label = self._label()
+        label = f"Edit({self.filepath}, {len(self.edits)} edits)"
         try:
             original, new_content, _ = self._preview()
         except (OSError, ToolCallError) as error:
             return label + "\n# preview unavailable: " + str(error)
-        warning = self._preview_warning()
-        diff = _make_unified_diff(original, new_content, self.filepath) or label
-        return (warning + "\n" if warning else "") + diff
-
-    def _preview_warning(self) -> str:
-        if len(self.edits) != 1:
-            return ""
-        if self.start == 0 and self.end == 0 and not os.path.exists(self.filepath):
-            return ""
-        if self.end == 0:
-            return "# warning: broad range replacement; prefer smaller semantic ranges"
-        if self.end - self.start > 20:
-            return "# warning: broad range replacement; prefer smaller semantic ranges"
-        return ""
+        return _make_unified_diff(original, new_content, self.filepath) or label
 
     def preview_error(self) -> str:
         try:
@@ -2334,114 +2794,97 @@ def preview_error(self) -> str:
         return ""
 
     def call(self) -> str:
-        created = not os.path.exists(self.filepath)
         original, new_content, replacements = self._preview()
         if new_content == original:
-            raise ToolCallError("range replacement produced no changes")
+            raise ToolCallError("edits produced no changes")
         with open(self.filepath, "w", encoding="utf-8") as f:
             f.write(new_content)
-
         relpath = os.path.relpath(self.filepath, self.cwd)
-        if len(replacements) == 1:
-            resolved, _ = replacements[0]
-            lines = [
-                "<ReplaceRangeToolResult>",
-                f"* path: {relpath}",
-                f"* range: {resolved.start}:{resolved.end}",
-                f"* fingerprint: {resolved.fingerprint}",
-            ]
-            if created:
-                lines.append("* created: true")
-            if resolved.relocated_from:
-                old_start, old_end = resolved.relocated_from
-                lines.append(f"* relocated_from: {old_start}:{old_end}")
-            lines.append("</ReplaceRangeToolResult>")
-            return "\n".join(lines)
-
         lines = [
-            "<ReplaceRangeToolResult>",
+            "<EditToolResult>",
             f"* path: {relpath}",
-            f"* replacements: {len(replacements)}",
+            f"* edits: {len(replacements)}",
         ]
-        for index, (resolved, _) in enumerate(replacements, start=1):
-            lines.append(f"* range[{index}]: {resolved.start}:{resolved.end}")
-            lines.append(f"* fingerprint[{index}]: {resolved.fingerprint}")
-            if resolved.relocated_from:
-                old_start, old_end = resolved.relocated_from
-                lines.append(f"* relocated_from[{index}]: {old_start}:{old_end}")
-        lines.append("</ReplaceRangeToolResult>")
+        for index, (start, end, _) in enumerate(replacements, start=1):
+            if start < 0:
+                lines.append(f"* replace_all[{index}]: {end} replacements")
+            else:
+                lines.append(f"* range[{index}]: {start}:{end}")
+        lines.append("</EditToolResult>")
         return "\n".join(lines)
 
-    def _preview(self) -> tuple[str, str, list[tuple[RangeFingerprintStore.Resolved, list[str]]]]:
-        file_missing = False
+    def _preview(self) -> tuple[str, str, list[tuple[int, int, list[str]]]]:
         try:
             with open(self.filepath, "r", encoding="utf-8") as f:
                 original = f.read()
         except FileNotFoundError:
-            file_missing = True
-            original = ""
+            raise ToolCallError("file does not exist; use CreateFile for new files")
+        if any(edit.op == "replace_all" for edit in self.edits):
+            if any(edit.op != "replace_all" for edit in self.edits):
+                raise ToolCallError("replace_all cannot be mixed with anchored edits")
+            new_content = original
+            replacements = []
+            for edit in self.edits:
+                count = new_content.count(edit.old)
+                if count == 0:
+                    raise ToolCallError("replace_all old text not found")
+                new_content = new_content.replace(edit.old, edit.new)
+                replacements.append((-1, count, []))
+            return original, new_content, replacements
+
         lines = original.splitlines(keepends=True)
         replacements = []
         for edit in self.edits:
-            if file_missing:
-                if len(self.edits) != 1 or edit.start != 0 or edit.end != 0 or edit.fingerprint or edit.before_context or edit.after_context:
-                    raise ToolCallError('file does not exist; use ReplaceRange(filepath, "0", "0", "", "", "", content) to create')
-                resolved = RangeFingerprintStore.Resolved(start=0, end=0, fingerprint=_range_fingerprint(""))
+            start = self._resolve_anchor(lines, edit.start)
+            if edit.op in {"replace", "delete"}:
+                end = self._resolve_anchor(lines, edit.end)
+                if end < start:
+                    raise ToolCallError("edit end anchor must be at or after start anchor")
+                slice_start, slice_end = start, end + 1
             else:
-                resolved = self.range_fingerprints.resolve(
-                    lines,
-                    filepath=self.filepath,
-                    start=edit.start,
-                    end=edit.end,
-                    fingerprint=edit.fingerprint,
-                )
-            replacement = self._replacement_lines(edit.content, has_following_line=resolved.end < len(lines))
-            self._validate_boundary_context(lines, resolved, edit, replacement)
-            replacements.append((resolved, replacement))
-        self._reject_overlapping_ranges(replacements)
+                slice_start = start if edit.op == "insert_before" else start + 1
+                slice_end = slice_start
+            if edit.op == "delete":
+                replacement = []
+            else:
+                replacement = edit.content.splitlines(keepends=True)
+                if edit.content and slice_end < len(lines) and not edit.content.endswith("\n"):
+                    replacement[-1] += "\n"
+            replacements.append((slice_start, slice_end, replacement))
+        previous: tuple[int, int] | None = None
+        for start, end, _ in sorted(replacements, key=lambda item: item[0]):
+            if previous is not None and (start < previous[1] or (start == previous[0] and end == previous[1])):
+                raise ToolCallError(f"edits overlap or share an insertion point: {previous[0]}:{previous[1]} and {start}:{end}")
+            previous = (start, end)
         new_lines = list(lines)
-        for resolved, replacement in sorted(replacements, key=lambda item: item[0].start, reverse=True):
-            new_lines[resolved.start : resolved.end] = replacement
+        for start, end, replacement in sorted(replacements, key=lambda item: item[0], reverse=True):
+            new_lines[start:end] = replacement
         return original, "".join(new_lines), replacements
 
-    def _label(self) -> str:
-        if len(self.edits) <= 1:
-            return f"ReplaceRange({self.filepath}, {self.start}, {self.end}, {self.fingerprint})"
-        return f"ReplaceRange({self.filepath}, {len(self.edits)} ranges)"
-
-    @staticmethod
-    def _reject_overlapping_ranges(replacements: list[tuple[RangeFingerprintStore.Resolved, list[str]]]) -> None:
-        previous: RangeFingerprintStore.Resolved | None = None
-        for resolved, _ in sorted(replacements, key=lambda item: item[0].start):
-            if previous is not None and resolved.start < previous.end:
-                raise ToolCallError(f"range replacements overlap: {previous.start}:{previous.end} and {resolved.start}:{resolved.end}")
-            previous = resolved
-
-    @staticmethod
-    def _validate_boundary_context(lines: list[str], resolved: RangeFingerprintStore.Resolved, edit: ReplaceRangeEdit, replacement: list[str]) -> None:
-        before_context = "" if resolved.start == 0 else lines[resolved.start - 1]
-        after_context = "" if resolved.end >= len(lines) else lines[resolved.end]
-        if edit.before_context != before_context:
-            raise ToolCallError("before_context mismatch; Read the target range with one line before and retry")
-        if edit.after_context != after_context:
-            raise ToolCallError("after_context mismatch; Read the target range with one line after and retry")
-        if before_context and replacement and replacement[0] == before_context:
-            raise ToolCallError("content includes before_context; expand start or remove the boundary line from content")
-        if after_context and replacement and replacement[-1] == after_context:
-            raise ToolCallError("content includes after_context; expand end or remove the boundary line from content")
-
     @staticmethod
-    def _replacement_lines(content: str, *, has_following_line: bool) -> list[str]:
-        lines = content.splitlines(keepends=True)
-        if content and has_following_line and not content.endswith("\n"):
-            lines[-1] += "\n"
-        return lines
+    def _resolve_anchor(lines: list[str], anchor: str) -> int:
+        anchor = anchor.split("|", 1)[0].strip()
+        match = re.fullmatch(r"(\d+):([0-9a-fA-F]{6})", anchor)
+        if match is None:
+            raise ToolCallError('invalid anchor; use "line:hash" copied from Search, Read, or InspectCode mode=inspect output')
+        index = int(match.group(1))
+        if index >= len(lines):
+            raise ToolCallError("anchor line is out of range; Read the target range again")
+        expected = match.group(2).lower()
+        current = _line_hash(lines[index])
+        if current != expected:
+            raise ToolCallError(f"stale anchor {anchor}; current hash is {current}; Read the target range again")
+        return index
 
 
 @dataclass
 class BashTool(Tool):
+    NAME: ClassVar[str] = "Bash"
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Run one explicit shell command via bash -lc in cwd; not for search, listing, or file edits when dedicated tools exist.",
+        "Run one shell command via bash -lc in cwd.",
+        "Use for tests, builds, scripts, or custom shell pipelines.",
+        "Prefer Search for anchored search results; use Bash rg/grep for custom filters.",
+        "Pass exactly one command string. Returns exit_code, stdout, and stderr.",
     )
     SIGNATURE: ClassVar[str] = "Bash(command) -> BashToolResult<exit_code, stdout, stderr>"
     EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["python3 -m py_compile nanocode.py"]', 'Example args: ["make test"]')
@@ -2451,18 +2894,14 @@ class BashTool(Tool):
     bash_path: str = ""
     cwd: str = ""
     timeout: int = 60
+    live_output: ToolOutputCallback | None = None
 
     @classmethod
     def cli_args(cls, args: list[str]) -> list[str]:
         if not args:
             return []
-        return [cls._cli_command_arg(args[0])]
-
-    @staticmethod
-    def _cli_command_arg(value: str) -> str:
-        if "\n" in value:
-            return Tool.cli_content_summary(value)
-        return _shorten(" ".join(value.split()), 120)
+        command = str(args[0])
+        return [Tool.cli_content_summary(command) if "\n" in command else _shorten(" ".join(command.split()), 120)]
 
     @classmethod
     def make(cls, session: Session, args: list[str]) -> Self:
@@ -2476,9 +2915,6 @@ def preview(self) -> str:
         return f'Bash("{self.command}")'
 
     def call(self) -> str:
-        return self.call_live()
-
-    def call_live(self, sink: Callable[[str], None] | None = None) -> str:
         stdout_parts: list[str] = []
         stderr_parts: list[str] = []
         selector = selectors.DefaultSelector()
@@ -2504,13 +2940,13 @@ def call_live(self, sink: Callable[[str], None] | None = None) -> str:
                         timed_out = True
                         self._kill_process_group(proc)
                         proc.wait()
-                        self._drain_selector(selector, stdout_parts, stderr_parts, sink)
+                        self._drain_selector(selector, stdout_parts, stderr_parts, self.live_output)
                         break
                     events = selector.select(min(0.2, remaining))
                     if not events:
                         continue
                     for key, _ in events:
-                        self._read_stream_chunk(selector, key, stdout_parts, stderr_parts, sink)
+                        self._read_stream_chunk(selector, key, stdout_parts, stderr_parts, self.live_output)
                 if proc.returncode is None:
                     proc.wait()
             except KeyboardInterrupt:
@@ -2524,6 +2960,8 @@ def call_live(self, sink: Callable[[str], None] | None = None) -> str:
                     proc.wait()
                 raise
             finally:
+                if self.live_output is not None:
+                    self.live_output("", "")
                 selector.close()
 
             stdout_text = "".join(stdout_parts)
@@ -2564,10 +3002,10 @@ def _drain_selector(
         selector: selectors.BaseSelector,
         stdout_parts: list[str],
         stderr_parts: list[str],
-        sink: Callable[[str], None] | None,
+        live_output: ToolOutputCallback | None = None,
     ) -> None:
         for key in list(selector.get_map().values()):
-            while cls._read_stream_chunk(selector, key, stdout_parts, stderr_parts, sink):
+            while cls._read_stream_chunk(selector, key, stdout_parts, stderr_parts, live_output):
                 pass
 
     @staticmethod
@@ -2576,7 +3014,7 @@ def _read_stream_chunk(
         key: selectors.SelectorKey,
         stdout_parts: list[str],
         stderr_parts: list[str],
-        sink: Callable[[str], None] | None,
+        live_output: ToolOutputCallback | None = None,
     ) -> bool:
         try:
             data = os.read(key.fileobj.fileno(), 4096)
@@ -2593,12 +3031,16 @@ def _read_stream_chunk(
                 pass
             return False
         text = data.decode("utf-8", errors="replace")
+        stream = "stdout" if key.data == "stdout" else "stderr"
         if key.data == "stdout":
             stdout_parts.append(text)
         else:
             stderr_parts.append(text)
-        if sink is not None:
-            sink(text)
+        if live_output is not None:
+            try:
+                live_output(stream, text)
+            except Exception:
+                pass
         return True
 
 
@@ -2607,9 +3049,12 @@ def _read_stream_chunk(
 
 @dataclass
 class GitTool(Tool):
+    NAME: ClassVar[str] = "Git"
     DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Run git without a shell for repository state, history, status, diff, and changed files.",
+        "Run git directly without a shell.",
+        "Use for status, diff, log, show, blame, staging, and commits.",
         "Pass each git argument separately; optional first arg cwd=path changes repository directory.",
+        "Returns exit_code, stdout, and stderr. Mutating git commands require confirmation.",
     )
     SIGNATURE: ClassVar[str] = "Git([cwd=path,] git_arg...) -> GitToolResult<exit_code, stdout, stderr>"
     EXAMPLE: ClassVar[tuple[str, ...]] = (
@@ -2667,24 +3112,21 @@ def call(self) -> str:
             return _format_process_result("GitToolResult", -1, error.stdout or "", (error.stderr or "") + "timeout")
 
 
-class PlanModeGitTool(GitTool):
-    NAME: ClassVar[str] = "Git"
-    DESCRIPTION: ClassVar[tuple[str, ...]] = (
-        "Run readonly git commands only: status, diff, log, show, rev-parse, ls-files, grep, blame.",
-        "Pass each git argument separately; optional first arg cwd=path changes repository directory.",
-    )
-
-
 @dataclass
 class ToolResultTool(Tool):
     NAME: ClassVar[str] = "Recall"
     EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY
-    DESCRIPTION: ClassVar[tuple[str, ...]] = ("Recall stored tool results by tr.* key; pass optional 0-based line ranges to read exact slices from the stored full log.",)
-    SIGNATURE: ClassVar[str] = "Recall(key...[, range_token...]) -> RecallToolResult<content>"
+    DESCRIPTION: ClassVar[tuple[str, ...]] = (
+        "Retrieve stored tool results by tr.N key.",
+        "Use when output was truncated, forgotten, or no longer visible.",
+        "Optional 0-based ranges read exact slices from the stored full log.",
+        "Returns result metadata plus content.",
+    )
+    SIGNATURE: ClassVar[str] = "Recall(key[, key...][, range...]) -> RecallToolResult<content>"
     EXAMPLE: ClassVar[tuple[str, ...]] = (
         'Example args: ["tr.1"]',
-        'Batch keys: ["tr.1", "tr.2"]',
-        'Full-log slice: ["tr.1", "0,120"]',
+        'Example args: ["tr.1", "tr.2"]',
+        'Example args: ["tr.1", "0,120"]',
     )
     REQUIRES_CONFIRMATION: ClassVar[bool | None] = False
 
@@ -2696,7 +3138,7 @@ class ToolResultTool(Tool):
     @classmethod
     def make(cls, session: Session, args: list[str]) -> Self:
         keys = [arg for arg in args if not re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg)]
-        ranges = [ReadTool._parse_line_range_token(arg) for arg in args if re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg)]
+        ranges = [_parse_line_range_token(arg) for arg in args if re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg)]
         return cls(keys=keys, results=session.state.tool_result_store, cwd=session.cwd, ranges=ranges)
 
     def preview(self) -> str:
@@ -2742,323 +3184,205 @@ def _content(self, item: ToolResultItem) -> str:
 
 
 TOOL_REGISTRY: dict[str, ToolClass] = {
-    ReadTool.name(): ReadTool,
-    LineCountTool.name(): LineCountTool,
-    ListDirTool.name(): ListDirTool,
-    SearchTool.name(): SearchTool,
-    CreateFileTool.name(): CreateFileTool,
-    EditTool.name(): EditTool,
-    ReplaceRangeTool.name(): ReplaceRangeTool,
-    BashTool.name(): BashTool,
-    GitTool.name(): GitTool,
-    ToolResultTool.name(): ToolResultTool,
+    ReadTool.NAME: ReadTool,
+    LineCountTool.NAME: LineCountTool,
+    ListTool.NAME: ListTool,
+    InspectCodeTool.NAME: InspectCodeTool,
+    SearchTool.NAME: SearchTool,
+    CreateFileTool.NAME: CreateFileTool,
+    EditTool.NAME: EditTool,
+    BashTool.NAME: BashTool,
+    GitTool.NAME: GitTool,
+    ToolResultTool.NAME: ToolResultTool,
+}
+
+
+def _canonical_tool_name(name: str | None) -> str:
+    if not name:
+        return ""
+    return next((tool_name for tool_name in TOOL_REGISTRY if tool_name.lower() == name.lower()), name)
+
+
+TOOL_STRING_SCHEMA: Json = {"type": "string"}
+TOOL_NULLABLE_STRING_SCHEMA: Json = {"type": ["string", "null"]}
+TOOL_ITEMS_SCHEMA: Json = {"type": "array", "items": TOOL_JSON_VALUE_SCHEMA}
+TOOL_STRING_LIST_SCHEMA: Json = {"type": "array", "items": {"type": "string"}}
+TOOL_PLAN_FOLLOWUP_STATUS_SCHEMA: Json = {
+    "type": ["string", "null"],
+    "enum": [*ALL_PLAN_FOLLOWUP_STATUSES],
+}
+TOOL_PLAN_FOLLOWUP_SCHEMA: Json = _tool_object_schema(
+    {
+        "status": TOOL_PLAN_FOLLOWUP_STATUS_SCHEMA,
+        "reason": {
+            **TOOL_NULLABLE_STRING_SCHEMA,
+            "description": "Short reason or evidence for this status. Required when status is not unknown.",
+        },
+    },
+    [],
+)
+TOOL_PLAN_ITEMS_SCHEMA: Json = {
+    "type": "array",
+    "items": _tool_object_schema(
+        {
+            "op": {"type": ["string", "null"], "enum": ["add", "update", "remove"]},
+            "id": TOOL_NULLABLE_STRING_SCHEMA,
+            "text": TOOL_NULLABLE_STRING_SCHEMA,
+            "status": {"type": ["string", "null"], "enum": [*ALL_PLAN_STATUSES]},
+            "context": TOOL_NULLABLE_STRING_SCHEMA,
+            "followup_action": {
+                **TOOL_PLAN_FOLLOWUP_SCHEMA,
+                "description": "Follow-on non-check work caused by this step. Use needed until the action is added/done, none only with reason.",
+            },
+            "followup_check": {
+                **TOOL_PLAN_FOLLOWUP_SCHEMA,
+                "description": "Follow-on validation caused by this step. Use needed until checked, done with evidence, none only with reason.",
+            },
+        },
+        [],
+    ),
+}
+TOOL_LEAD_ITEMS_SCHEMA: Json = {
+    "type": "array",
+    "items": _tool_object_schema(
+        {
+            "id": TOOL_NULLABLE_STRING_SCHEMA,
+            "text": TOOL_NULLABLE_STRING_SCHEMA,
+            "status": {"type": ["string", "null"], "enum": [*ALL_LEAD_STATUSES]},
+            "source": TOOL_STRING_LIST_SCHEMA,
+            "context": TOOL_NULLABLE_STRING_SCHEMA,
+        },
+        [],
+    ),
 }
-PLAN_MODE_TOOLS: tuple[ToolClass, ...] = (ReadTool, LineCountTool, ListDirTool, SearchTool, PlanModeGitTool, ToolResultTool)
 
 
+STATE_TOOL_PARAMS: dict[str, tuple[str, Json, list[str]]] = {
+    "goal": (
+        "Set or complete the active task goal. Use message_for_complete for the final user message.",
+        {
+            "text": TOOL_STRING_SCHEMA,
+            "complete": {"type": "boolean"},
+            "message_for_complete": TOOL_NULLABLE_STRING_SCHEMA,
+        },
+        ["text", "complete", "message_for_complete"],
+    ),
+    "plan": ("Set or patch the shortest necessary plan for tracked work.", {"mode": TOOL_NULLABLE_STRING_SCHEMA, "items": TOOL_PLAN_ITEMS_SCHEMA}, ["items"]),
+    "lead": ("Record investigation leads and their status.", {"items": TOOL_LEAD_ITEMS_SCHEMA}, ["items"]),
+    "known": ("Record confirmed Facts that affect the current task.", {"items": TOOL_ITEMS_SCHEMA}, ["items"]),
+    "user_rule": (
+        "Save an explicit future behavior rule from the user.",
+        {"text": TOOL_STRING_SCHEMA, "message": TOOL_STRING_SCHEMA},
+        ["text", "message"],
+    ),
+    "forget": (
+        "Remove visible tool result keys from active context; keys remain recallable.",
+        {"source": TOOL_STRING_LIST_SCHEMA, "reason": TOOL_STRING_SCHEMA},
+        ["source", "reason"],
+    ),
+    "verify": (
+        "Record a concrete check result or blocker.",
+        {
+            "method": TOOL_NULLABLE_STRING_SCHEMA,
+            "status": {"type": "string", "enum": ["passed", "failed", "blocked"]},
+            "blocker": {"type": ["string", "null"], "enum": ["user", "environment", "tool", "unknown"]},
+            "context": TOOL_NULLABLE_STRING_SCHEMA,
+        },
+        ["status", "context"],
+    ),
+    "keep": (
+        "Keep visible raw tool result keys in context during observe.",
+        {"source": TOOL_STRING_LIST_SCHEMA, "reason": TOOL_STRING_SCHEMA},
+        ["source", "reason"],
+    ),
+}
+PROTOCOL_ACTION_TYPES = frozenset((*STATE_TOOL_PARAMS, "tool"))
+
+
+def _canonical_protocol_action_type(name: str | None) -> str:
+    if not name:
+        return ""
+    return next((action_type for action_type in PROTOCOL_ACTION_TYPES if action_type.lower() == name.lower()), name)
+
+
+def _state_tool_schema(name: str) -> Json:
+    description, properties, required = STATE_TOOL_PARAMS[name]
+    return _function_tool_schema(name, description, _tool_object_schema(properties, required))
+
+
+COMPACT_TOOL_SCHEMA = _function_tool_schema(
+    "compact",
+    "Return a compact continuation summary and retained facts.",
+    _tool_object_schema(
+        {
+            "summary": TOOL_STRING_SCHEMA,
+            "known": TOOL_ITEMS_SCHEMA,
+        },
+        ["summary", "known"],
+    ),
+)
+
 ############################
 # Agent Prompt
 ############################
 
-AGENT_SYSTEM_PROMPT = """You are nanocode, a coding agent.
-
-OUTPUT
-- Return JSON action frames only: no prose, no native/function tool calls.
-- Separate multiple actions with __END_ACTION__.
-- Valid action types: chat, start, goal, plan, hypothesis, known, stable_knowledge, progress, user_rule, tool, verify, forget.
-- Tool names such as Read, Search, Edit, Git, and Recall belong in tool.name, never in action type.
-- Tool actions require name, intention, and args.
-- Use the latest user language for user-facing text; keep it plain, concise, and direct.
-
-PRIORITY AND STATE
-- Priority: Latest User Request > User Rules > Current Goal > Plan/Known/Stable Knowledge > Conversation History.
-- Latest User Request overrides stale Goal, but Task Code decides whether to start a new task.
-- Task Code: new = align latest request with start; working = continue current Goal; verifying = run/record verification; done = wait for next request.
-- If Task Code is working or verifying, do not output start or rewrite Goal.
-- Never repeat a previous completion as the answer.
-- User Rules are mandatory long-term behavior rules; add them only when the user explicitly asks to remember future behavior.
-
-MEMORY AND TOOL RESULTS
-- Known = settled current-task facts that still matter after visible tool results disappear.
-- Hypotheses = investigation directions with status { __hypothesis_status_text__ }.
-- Stable Knowledge = rare reusable codebase facts: stack, structure, workflow, convention, gotcha.
-- Do not store intentions, TODOs, guesses, user requests, routine observations, or duplicate facts in Known.
-- Tool Result Index, Kept Tool Results, Unreduced Tool Results, and Latest Tool Results are support context; do not restate raw results.
-- OBSERVE keeps useful raw results and forgets noise. ACT must not keep results.
-- In ACT, use forget only when a visible result is already irrelevant; first preserve any needed conclusion in Plan, Known, Hypotheses, or Verify. Forget preserves logs and Recall.
-
-DECISION ORDER
-Choose the main next action and include tightly related state updates in the same turn.
-1. chat: casual chat or direct non-coding answers.
-2. user_rule: only explicit future-behavior memory requests.
-3. start: only when Task Code is new; set goal, work_mode normal|investigate, and a short plan.
-4. plan/known/hypothesis: only when direction, target, hypothesis status, or verification path changes. If a frontier tool/verify/goal is already known, include it in the same turn instead of stopping on state updates.
-5. tool: execute the current action frontier. Frontier = useful next actions with known args and no dependency between them. Batch broad related searches/reads/recalls/checks; serialize only when later args depend on earlier results.
-6. verify: after edits or explicit check/test/build requests, use the smallest relevant check; if the exact check already passed in recent results, record passed.
-7. goal: complete only when the goal is done, all Plan items are done/blocked with result context, and verification passed or is blocked by the user.
-
-PLANNING
-- Use plans only for real tasks; usually 2-5 concrete outcome steps.
-- Update Plan only when status, text, context, or ordering changes.
-- Pair Plan/Known/Hypothesis updates with the next frontier action whenever its arguments are known.
-- Use patch for small Plan changes; use replace only when restructuring.
-- At most one item may be doing.
-- Done context must cite result context; blocked context must name the concrete blocker.
-- Add a verify step only for edits, explicit checks, or correctness-sensitive changes.
-- If Plan is complete and verification passed/blocked, finish by default. To continue tools, first reopen Plan with a todo/doing item explaining why completion is insufficient.
-
-INVESTIGATION
-- Use work_mode=investigate for competing explanations, root-cause reasoning, or branch elimination.
-- Track plausible directions separately; each should imply a concrete check.
-- Mark hypotheses ruled_out when result context eliminates them, confirmed before root-cause completion.
-- Prefer useful readonly tool batches over intermediate state-only turns.
-
-EDITING AND DISCOVERY
-- Use Search/ListDir/LineCount when target file/path/symbol/range is unknown.
-- Read only known paths/ranges or search-narrowed targets; read small ranges around likely matches.
-- Stop discovery when exact target and next edit/check are clear; do not repeat equivalent searches.
-- Edit incrementally: one small coherent change per edit action.
-- New file: create a minimal skeleton first; grow large content with focused ReplaceRange chunks.
-- Existing file: inspect exact target before editing. Never rewrite a large file in one action.
-- Use Edit for one tiny exact literal block that appears once.
-- Use ReplaceRange after Read for ranges, repeated text, insertions, and structural edits; use ReplaceRange(filepath, ranges) for several known independent ranges in one file.
-
-VERIFICATION
-- Verification strength: none for simple answers, light for read/static confirmation, tool for code changes or requested checks, user for visual/manual confirmation.
-- Verify action requires kind, method, criteria, status passed|failed|blocked, context, and blocker when blocked.
-- Passed context must cite concrete recent tool result context. Blocked verification must set blocker and context.
-- If verification fails, record failed and repair before completion.
-- A build/test after a failed edit in the same tool batch does not verify that edit; repair or confirm the edit first.
-- Do not use pending verification status.
-- Complete with verify blocked only when blocker=user; otherwise continue, repair, or ask.
-
-TOOLS
-- Prefer dedicated tools over Bash. Bash is for explicit shell commands or when no dedicated tool exists.
-- Git is for status, diff, history, and changed files.
-- Recall fetches stored result keys; batch distinct keys and recall each needed key at most once.
-- Every tool intention must state the question being answered or concrete outcome needed.
-
-ACTIONS:
-
-{"type":"chat","text":"<reply>"}
-
-{"type":"start","goal":"<current task goal>","work_mode":"normal|investigate","plan":[{"id":"p1","text":"<step>","status":"todo|doing|done|blocked","context":null}]}
-
-{"type":"goal","text":"<current task goal>","complete":true|false,"message_for_complete":null|"<final user message>"}
-
-{"type":"plan","items":[{"id":"p1","text":"<step>","status":"todo|doing|done|blocked","context":null|"<short result context>"}]}
-
-{"type":"plan","mode":"patch","items":[{"id":"p1","status":"todo|doing|done|blocked","context":null|"<short result context>"}]}
-
-{"type":"hypothesis","items":[{"id":"h1","text":"<possible root-cause direction>","status":"{ __hypothesis_statuses__ }","source":["tr.1"],"context":null|"<short result context>"}]}
-
-{"type":"known","items":["<new durable current-task fact>"]}
-{"type":"known","items":[{"source":["tr.1"],"text":"<durable current-task fact supported by a tool result>"}]}
-
-{"type":"stable_knowledge","items":[{"category":"stack|structure|workflow|convention|gotcha","text":"<rare reusable codebase fact>"}]}
-
-{"type":"progress","text":"<short progress update>"}
-
-{"type":"user_rule","text":"<long-term user behavior rule>","message":"<short acknowledgement>"}
-
-{"type":"forget","source":["tr.1"],"reason":"<why this visible tool result no longer matters>"}
-
-{"type":"tool","name":"{ __tool_names__ }","intention":"<question or concrete outcome>","args":["<arg>"]}
-
-{"type":"verify","kind":"syntax_check|change_syntax_check|lint|test|build|change_check|other|kind+kind","method":null|"<short target label>","criteria":["<explicit pass/block criterion>"],"status":"passed|failed|blocked","blocker":null|"user|environment|tool|unknown","context":null|"<tool result context or blocker>"}
-
-TOOL SPECS:
-{ __tools__ }
-"""
-AGENT_PLAN_SYSTEM_PROMPT = """You are nanocode in PLAN MODE.
-
-You are a planning agent, not an implementation agent.
-
-OUTPUT PROTOCOL
-- Return JSON action frames only.
-- No prose outside JSON.
-- No native/function tool calls.
-- Separate multiple actions with __END_ACTION__.
-- Allowed action types: start, goal, plan, hypothesis, known, stable_knowledge, progress, tool, verify.
-- Tool names such as Read, Search, Git, Recall, LineCount, and ListDir belong in tool.name, never in action type.
-- Every action must be a single valid JSON object.
-- Do not invent fields when a listed action shape already fits.
-
-MODE BOUNDARIES
-- Produce an implementation plan for the latest user request.
-- Do not implement, change files, run tests, install packages, run shell commands, or mutate repository state.
-- Do not propose non-readonly discovery.
-- Do not turn the plan into code unless the user explicitly asked only for a design/code sketch outside the repository.
-- If the user asks for implementation while you are in PLAN MODE, plan the implementation; do not perform it.
-
-LANGUAGE
-- Use the latest user language for all user-facing text, including progress and the final proposed plan.
-- Preserve code, identifiers, filenames, command names, config keys, API names, and quoted text exactly.
-- If the user mixes languages, follow the dominant language of the latest request.
-
-READONLY DISCOVERY
-- Allowed tools: Read, LineCount, ListDir, Search, Recall.
-- Git is allowed only for readonly inspection: status, diff, log, show, rev-parse, ls-files, grep, blame.
-- Use only the readonly tools listed in TOOL SPECS. Do not request any other tools.
-- Use the smallest useful discovery batch.
-- Prefer targeted Search/Read over broad surveys.
-- Prefer reading the owning file and nearby tests over unrelated code.
-- Stop discovery as soon as the files, ownership boundaries, approach, risks, and verification path are clear enough.
-- Call more readonly tools only when the final proposal would otherwise rely on guesswork.
-
-PLANNING DOCTRINE
-Design before action:
-- First clarify what problem is being solved, what must not change, and what success looks like.
-- Separate the user's goal from the possible implementation mechanism.
-- Prefer a correct direction over a fast but structurally wrong shortcut.
-- Think several steps ahead, but only propose the smallest useful step now.
-
-Fit the existing system:
-- Fit the existing architecture before proposing new abstractions.
-- Identify current ownership boundaries: modules, layers, public APIs, state owners, side-effect owners, and test owners.
-- Respect existing naming, style, dependency direction, error handling, and data flow.
-- Do not introduce a new architectural style when a local change fits the current one.
-
-Start from concerns:
-- Identify relevant functional concerns.
-- Identify relevant non-functional concerns when they may affect design: performance, consistency, availability, latency, scalability, compatibility, maintainability, security, debuggability, and migration cost.
-- State tradeoffs only when they affect the proposed implementation.
-- Scale the depth of design analysis to the risk and scope of the request.
-
-Keep it simple:
-- Prefer the simplest design that preserves correctness and future flexibility.
-- Avoid speculative generality.
-- Add an abstraction only when it removes real duplication, stabilizes a boundary, hides unavoidable complexity, or enables a known extension.
-- Avoid thin pass-through interfaces that add coupling without adding capability.
-- Avoid special-case fixes unless the request is itself special-case behavior.
-- If two designs are viable, prefer the one with fewer moving parts, clearer ownership, and easier verification.
-
-Module and layer judgment:
-- Decompose top-down for broad changes: subsystem -> module -> file -> symbol.
-- For local changes, start at the owning symbol and expand only as needed.
-- Keep modules focused on one topic.
-- Keep high-cohesion logic together and low-coupling boundaries explicit.
-- Prefer dependency flow from higher-level orchestration toward lower-level capabilities.
-- Avoid new cycles; if a cycle is unavoidable, call it out as a risk or propose a smaller split.
-- Push unavoidable complexity downward behind a stable boundary when doing so simplifies callers.
-- Do not leak internal failure handling, retries, fallback, or compatibility mechanics into unrelated callers.
-
-Interfaces and contracts:
-- For any public or shared interface, identify the contract before proposing changes.
-- Check whether the interface should be orthogonal to nearby APIs, whether it overlaps existing behavior, and whether important cases are missing.
-- Prefer interfaces that make the common case simple.
-- Note idempotency, undefined behavior, validation, error cases, compatibility, and call ordering when relevant.
-- Prefer explicit names and explicit state transitions over ambiguous combined operations.
-- Preserve backward compatibility unless the user explicitly asks for a breaking change.
-- If compatibility may break, propose versioning, migration, adapter behavior, or rollback.
-
-Data, state, and side effects:
-- Identify what data is read, written, derived, cached, emitted, or persisted.
-- Keep data model changes minimal and direct.
-- Separate calculation from IO when it makes the logic easier to test or reason about.
-- Separate data and behavior when behavior should apply to many entities or batches.
-- Separate strategy/policy from core model when business rules may vary while the model should stay stable.
-- Identify side effects such as filesystem writes, network calls, database writes, cache invalidation, events, logging, metrics, and user-visible output.
-
-Time, concurrency, and sequencing:
-- When behavior spans multiple steps, processes, workers, requests, events, or retries, describe the sequence.
-- Identify the driver: user action, request, IO event, queue consumer, cron/timer, test runner, or background worker.
-- Call out ordering assumptions, races, idempotency requirements, retry behavior, and compensation paths when relevant.
-- For event/signal based designs, avoid circular signal chains and unclear ownership.
-
-Closed-loop reliability:
-- Prefer designs where each module contains its own routine failure handling.
-- Prevent errors, retries, fallback, and cleanup responsibilities from leaking across unrelated boundaries.
-- Include observability/debuggability when useful: logs, metrics, traces, error messages, assertions, or inspection points.
-- Include rollback or migration concerns when a change affects public APIs, persisted data, configuration, deployment, or shared behavior.
-- Use redundancy/fallback only when it addresses a real failure mode; keep the added complexity local.
-
-Verification:
-- Scale verification with risk.
-- For local changes, propose narrow tests or checks near the touched code.
-- For shared contracts, propose broader regression tests.
-- For data, migration, compatibility, or concurrency risks, propose targeted edge-case tests.
-- Include manual verification only when automated verification is unavailable or insufficient.
-- Verification steps must be executable by a coding agent, but you must not run them.
-
-DISCOVERY STRATEGY
-1. For a new Task Code, start with one concise planning goal and 2-4 discovery steps.
-2. Search for owners before reading large files.
-3. Prefer support from code, tests, docs, and recent relevant Git history.
-4. After tool results, use Latest Tool Results, Unreduced Tool Results, and Kept Tool Results; use known for settled current-task facts and stable_knowledge only for rare reusable codebase facts.
-5. Use stable_knowledge sparingly for broadly true technical facts that are not repository-specific.
-6. Update plan status as discovery progresses.
-7. If the request is ambiguous but a reasonable reversible path exists, proceed with stated assumptions and include open questions in the final plan.
-8. Complete with goal.complete=true only when the final proposal is ready.
-
-ACTION SEMANTICS
-- start: initialize the planning goal and discovery plan for a new Task Code.
-- plan: update discovery or planning item status.
-- known: record durable repository findings from discovery. Do not include guesses.
-- stable_knowledge: record stable external/technical knowledge. Use sparingly.
-- progress: brief user-facing status update in the latest user language.
-- tool: request one readonly discovery tool call.
-- verify: record only concrete verification status from readonly discovery; put planned checks in the final proposed plan.
-- goal: complete the planning task with the final proposed plan.
-
-FINAL MESSAGE CONTRACT
-- The final action must be type="goal" with complete=true.
-- message_for_complete must contain exactly one <proposed_plan>...</proposed_plan> block.
-- Do not include text before or after the <proposed_plan> block inside message_for_complete.
-- The proposed plan must be concrete and executable by a coding agent.
-- The proposed plan must not include implementation output, generated patches, command execution results, or claims that tests were run.
-
-The <proposed_plan> block should include these sections, in this order:
-1. Goal
-2. Current understanding / durable findings
-3. Design rationale
-4. Touched files and symbols
-5. Ordered implementation steps
-6. Verification plan
-7. Risks, tradeoffs, rollback, and open questions
-
-FINAL PLAN QUALITY BAR
-Before completing, ensure the plan answers:
-- What is the smallest correct change?
-- Which module owns the change?
-- What public contracts or data contracts are affected?
-- What state, side effects, or sequencing matter?
-- What failure modes should stay closed-loop within the owning module?
-- What compatibility or migration concern exists, if any?
-- How should the coding agent verify the change?
-- What uncertainty remains?
-
-CORE ACTION SHAPES
-{"type":"start","goal":"<planning goal>","work_mode":"normal|investigate","plan":[{"id":"p1","text":"<discovery step>","status":"todo|doing|done|blocked","context":null}]}
-{"type":"plan","mode":"patch","items":[{"id":"p1","status":"todo|doing|done|blocked","context":"<result context or reason>"}]}
-{"type":"hypothesis","items":[{"id":"h1","text":"<possible direction>","status":"{ __hypothesis_statuses__ }","source":["tr.1"],"context":"<result context or reason>"}]}
-{"type":"known","items":[{"source":["tr.1"],"text":"<durable fact from discovery>"}]}
-{"type":"stable_knowledge","items":["<stable technical fact relevant to the plan>"]}
-{"type":"progress","message":"<brief user-facing progress update>"}
-{"type":"tool","name":"{ __tool_names__ }","intention":"<question being answered>","args":["<arg>"]}
-{"type":"verify","kind":"other","method":"<check label>","criteria":["<what should pass>"],"status":"blocked","blocker":"user|environment|tool|unknown","context":"<why verification cannot run in plan mode>"}
-{"type":"goal","text":"<planning goal>","complete":true,"message_for_complete":"<proposed_plan>...</proposed_plan>"}
-
-TOOL SPECS:
-{ __tools__ }
+# Prompt design:
+# - Keep the system prompt short and stable; put tool-specific rules in tool descriptions.
+# - Order the user prompt from stable context to volatile context to preserve provider prefix cache hits.
+# - Keep the latest request, blocking feedback, and output guide near the end because they change most and steer the next output.
+# - Keep section names stable; change prompt shape only when the workflow meaning changes.
+AGENT_SYSTEM_PROMPT = """You are nanocode, a terminal coding agent.
+
+Use assistant text for chat/final answers; use function tools for state/repo work.
+Use tool schemas for exact names, capabilities, and arguments.
+WHEN THE NEXT USEFUL ACTION IS CLEAR, TAKE IT NOW.
+
+Priority: latest user request > blocking feedback > user rules > active state > conversation.
+Never repeat an old completion. Do not rewrite Goal unless the user changed the task.
+
+Workflow:
+- Chat: answer directly; do not create task state.
+- One-shot: use only needed tools, then answer and stop; do not create task state just to report.
+- Tracked task: for edits/debugging/checks/multi-step work, set Goal, keep the shortest necessary correct Plan, act on the current step, record Checks after edits or requested checks, finish with goal.complete=true.
+
+Current step:
+- Choose the smallest useful action from latest request, feedback, visible results, and Plan.
+- Batch clear tool calls in one response.
+- Tool calls run in order. If one fails, later tool calls are skipped.
+- Use ordered tools for edit-then-check when the check is clear.
+- Ask only when blocked.
+- Do not stop at state-only updates when a useful tool call is clear.
+
+State:
+- Goal/Plan track work. Plan is the minimal correct path to Goal, not a loose TODO list; update it when Facts change the path.
+- Facts are confirmed. Leads are for investigations. Checks are checks. User Rules are future-behavior requests.
+- Save only what matters after results disappear; cite tr.N when result-backed; forget raw results when no longer needed.
+
+Response:
+- Reply in the LANGUAGE of the latest user input unless asked otherwise. Keep output plain and concise. Preserve literals.
+- Default Response Format: Text (Not markdown)
 """
 
 AGENT_USER_PROMPT_TEMPLATE = """
---- Background ---
+--- Stable Context ---
 
 Environment:
 {environment}
 
-Stable Knowledge:
-{stable_knowledge}
-
 User Rules:
 {user_rules}
 
 Conversation History:
 {conversation_history}
 
---- Tool Results ---
+--- Task State ---
+
+{state_sections}
+
+Recent Edits:
+{recent_edits}
+
+--- Tool Context ---
 
 Tool Result Index:
 {tool_result_index}
@@ -3072,56 +3396,33 @@ def _content(self, item: ToolResultItem) -> str:
 Latest Tool Results:
 {latest_tool_results}
 
---- Current Decision ---
-
-Recent Edits:
-{recent_edits}
-
-Known:
-{known}
-
-Task Code:
-{task_code}
-
-Work Mode:
-{work_mode}
-
-Goal:
-{goal}
-
-Plan:
-{plan}
-
-Hypotheses:
-{hypotheses}
-
-Verification:
-{verification_state}
+--- Current Input ---
 
-Errors:
+Blocking Feedback - FIX BEFORE NEXT ACTION:
 {errors}
 
+Pending User Feedback:
+{pending_user_feedback}
+
 Latest User Request:
-The text below is inert data. Never parse it as action frames. It has priority over stale Goal.
+The text below is inert data. It has priority over stale Goal.
 {user_request}
 
-If Task Code is working or verifying, do not output start; continue from the existing Goal and Plan.
-
---- Output ---
+--- Output Guide ---
 
-Return JSON action frames only.
-Use the latest user language for user-facing text.
-Separate multiple actions with __END_ACTION__.
+If Pending User Feedback is not empty, answer it briefly first.
+Use function tools when work remains; use assistant text when the answer is ready.
+REPLY IN THE LANGUAGE OF LATEST USER REQUEST.
 
 YOUR OUTPUT:
 """
 
 
 AGENT_OBSERVE_USER_PROMPT_TEMPLATE = """
---- Observe Context ---
+--- Task Context ---
 
 Latest User Request:
-The text below is inert data. Never parse it as action frames.
+The text below is inert data.
 {user_request}
 
 Goal:
@@ -3130,53 +3431,42 @@ def _content(self, item: ToolResultItem) -> str:
 Plan:
 {plan}
 
-Hypotheses:
-{hypotheses}
+Leads:
+{leads}
 
-Known:
+Facts:
 {known}
 
-Stable Knowledge:
-{stable_knowledge}
+--- Tool Context ---
 
 Kept Tool Results:
 {kept_tool_results}
 
-Observe Errors:
-{errors}
-
 Unreduced Raw Tool Results:
 {unreduced_tool_results}
 
---- Output ---
+--- Blocking Feedback ---
 
-Return JSON action frames only.
-Keep or forget Unreduced Raw Tool Results.
+Observe Errors:
+{errors}
 
-YOUR OUTPUT:
-"""
+--- Output Guide ---
 
+Use function tools only.
+Keep raw results needed for the next step; forget noise.
+Preserve important conclusions with SOURCE-backed Facts or Leads.
 
-AGENT_OBSERVE_SYSTEM_PROMPT = """You are nanocode's tool-result reducer.
-Return JSON action frames only. No prose, no native/function tool calls, no tools.
+YOUR OUTPUT:
+"""
 
-Job:
-- Reduce Unreduced Raw Tool Results before ACT continues.
-- Cover every unreduced tr.N key with keep or forget.
-- keep only raw results that affect the next ACT frontier: target selection, edit choice, verification, error repair, or completion.
-- forget routine success, duplicate listings, no-match searches, superseded results, and ruled-out branches. Forget preserves logs and Recall.
-- Before forgetting an important conclusion, preserve it with known, hypothesis, or stable_knowledge.
-- Do not update Plan, Verify, or Goal.
-- Do not return {"actions":[]}.
 
-Allowed actions:
-{"type":"keep","source":["tr.1"],"reason":"<why this raw result should remain in context>"}
-{"type":"forget","source":["tr.2"],"reason":"<why this raw result no longer matters>"}
-{"type":"known","items":[{"source":["tr.1"],"text":"<settled current-task fact>"}]}
-{"type":"hypothesis","items":[{"id":"h1","text":"<possible direction>","status":"{ __hypothesis_statuses__ }","source":["tr.1"],"context":"<result context or reason>"}]}
-{"type":"stable_knowledge","items":[{"category":"stack|structure|workflow|convention|gotcha","text":"<rare reusable codebase fact>"}]}
+AGENT_OBSERVE_SYSTEM_PROMPT = """You are nanocode's context reducer.
+Use function tools only. No prose.
 
-Separate multiple actions with __END_ACTION__.
+Reduce raw tool results before ACT continues.
+Keep only what affects the next step.
+Forget noise; omitted results are compacted.
+Preserve durable conclusions as source-backed Facts or Leads.
 """
 
 
@@ -3187,8 +3477,9 @@ def _content(self, item: ToolResultItem) -> str:
 
 COMPACTOR_PROMPT = """You are nanocode's conversation-history compactor.
 
-Compress conversation history and Known facts so the coding agent can continue later.
+Compress conversation history and Facts so the coding agent can continue later.
 Do not solve the task or add unsupported facts.
+Use the compact function tool only.
 
 Preserve continuity-critical facts:
 - user requests and changes
@@ -3197,9 +3488,9 @@ def _content(self, item: ToolResultItem) -> str:
 - plan/status
 - files, paths, symbols, and APIs touched
 - commands run and outcomes
-- known facts and context keys needed later
+- facts and context keys needed later
 - unresolved blockers and open questions
-- verification context
+- checks context
 
 Omit noise:
 - raw logs
@@ -3209,17 +3500,14 @@ def _content(self, item: ToolResultItem) -> str:
 - context values unless needed for continuity
 
 Write the shortest complete continuation summary.
-Compress Known to concise durable facts.
-
-Output strict JSON only: {"summary": "<summary>", "known": [{"text": "<stable fact>", "source": ["tr.1"]}]}
-Known may use strings only when no source exists.
+Compress Facts to concise durable facts.
 """
 
 
 COMPACT_USER_PROMPT_TEMPLATE = """
------------ Known_To_Compact Begin ------------
+----------- Facts_To_Compact Begin ------------
 {known}
---------- Known_To_Compact End ----------------
+--------- Facts_To_Compact End ----------------
 
 ----------- Conversation_To_Compact Begin ------
 {conversation}
@@ -3227,134 +3515,15 @@ def _content(self, item: ToolResultItem) -> str:
 """
 
 
-class PromptBuilder:
-    def __init__(
-        self,
-        session: Session,
-        *,
-        system_prompt_template: str = AGENT_SYSTEM_PROMPT,
-        user_prompt_template: str = AGENT_USER_PROMPT_TEMPLATE,
-        blackboard: Blackboard | None = None,
-        runtime: AgentRuntime | None = None,
-        tool_context: ToolResultContext | None = None,
-    ):
-        self.session = session
-        self.system_prompt_template = system_prompt_template
-        self.user_prompt_template = user_prompt_template
-        self.blackboard = blackboard or Blackboard()
-        self.runtime = runtime or AgentRuntime()
-        self.tool_context = tool_context or ToolResultContext()
-
-    def system_prompt(self, template: str | None = None, *, tools: Iterable[ToolClass] | None = None) -> str:
-        tool_classes = tuple(TOOL_REGISTRY.values() if tools is None else tools)
-        return (
-            (template or self.system_prompt_template)
-            .replace("{ __tools__ }", self._format_tools(tool_classes))
-            .replace("{ __tool_names__ }", "|".join(tool.name() for tool in tool_classes))
-            .replace("{ __hypothesis_statuses__ }", HYPOTHESIS_STATUS_SCHEMA)
-            .replace("{ __hypothesis_status_text__ }", HYPOTHESIS_STATUS_TEXT)
-            .strip()
-        )
-
-    def user_prompt(
-        self,
-        *,
-        tool_result_index: str,
-        unreduced_tool_results: str,
-        latest_tool_results: str,
-        errors: str,
-    ) -> str:
-        current = self.blackboard
-        conversation = self.session.state.conversation
-        return self.user_prompt_template.format(
-            environment="\n".join(["- system: " + self.session.system, "- arch: " + self.session.arch, "- cwd: " + self.session.cwd]),
-            conversation_history="\n\n".join(item.format() for item in conversation) if conversation else "(empty)",
-            user_rules=self.session.state.user_rules.format(),
-            known="\n".join(KnownItem.format_item(item) for item in current.known) if current.known else "(empty)",
-            kept_tool_results="\n\n".join(self.tool_context.kept_results) or "(empty)",
-            stable_knowledge=self._format_stable_knowledge(),
-            tool_result_index=tool_result_index or "(empty)",
-            unreduced_tool_results=unreduced_tool_results or "(empty)",
-            latest_tool_results=latest_tool_results or "(empty)",
-            task_code=self.blackboard.task_code,
-            work_mode=self.blackboard.work_mode,
-            goal=current.goal or "(empty)",
-            plan="\n".join(item.format() for item in current.plan) if current.plan else "(empty)",
-            hypotheses="\n".join(item.format() for item in current.hypotheses) if current.hypotheses else "(empty)",
-            verification_state=current.verification.format(),
-            errors=errors or "(empty)",
-            recent_edits="\n".join(self.runtime.recent_edits) if self.runtime.recent_edits else "(empty)",
-            user_request=self._format_user_request(),
-        ).strip()
-
-    def observe_user_prompt(self, unreduced_tool_results: str, errors: str) -> str:
-        current = self.blackboard
-        return AGENT_OBSERVE_USER_PROMPT_TEMPLATE.format(
-            user_rules=self.session.state.user_rules.format(),
-            goal=current.goal or "(empty)",
-            plan="\n".join(item.format() for item in current.plan) if current.plan else "(empty)",
-            hypotheses="\n".join(item.format() for item in current.hypotheses) if current.hypotheses else "(empty)",
-            known="\n".join(KnownItem.format_item(item) for item in current.known) if current.known else "(empty)",
-            stable_knowledge=self._format_stable_knowledge(),
-            kept_tool_results="\n\n".join(self.tool_context.kept_results) or "(empty)",
-            errors=errors or "(empty)",
-            unreduced_tool_results=unreduced_tool_results or "(empty)",
-            user_request=self._format_user_request(),
-        ).strip()
-
-    def _format_user_request(self) -> str:
-        user_request = self.blackboard.user_input or "(empty)"
-        fence = "`" * max(3, max((len(match.group(0)) for match in re.finditer(r"`{3,}", user_request)), default=0) + 1)
-        return fence + "text\n" + user_request + "\n" + fence
-
-    def _format_tools(self, tools: Iterable[ToolClass]) -> str:
-        lines = []
-        for tool in tools:
-            lines.append("- " + tool.SIGNATURE)
-            for item in tool.DESCRIPTION:
-                lines.append("  - " + item)
-            for item in tool.EXAMPLE:
-                lines.append("  - " + item)
-        return "\n".join(lines)
-
-    def _format_stable_knowledge(self) -> str:
-        knowledge = self.blackboard.stable_knowledge
-        if not any(knowledge.values()):
-            return "(empty)"
-        lines = []
-        for category in STABLE_KNOWLEDGE_CATEGORIES:
-            items = [item for item in knowledge.get(category, []) if item]
-            if not items:
-                continue
-            lines.append(category + ":")
-            lines.extend("- " + item for item in items)
-            lines.append("")
-        return "\n".join(lines).rstrip()
-
-    def format_archived_tool_result_index(self, visible_result_keys: set[str] | None = None, *, limit: int = 0) -> list[str]:
-        if not self.session.state.tool_result_store:
-            return []
-        hidden_keys = visible_result_keys or set()
-        lines = []
-        for key, item in self.session.state.tool_result_store.items():
-            if key in hidden_keys:
-                continue
-            lines.append(item.format(result_key=key))
-        return lines[-limit:] if limit > 0 else lines
-
-
-############################
-# LLM Request (ModelClient)
-############################
+############################
+# LLM Request (ModelClient)
+############################
 
 
 HTTP_USER_AGENT = "nanocode/" + __version__
 
 
 class ModelClient:
-    ACTION_FRAME_END: ClassVar[str] = "__END_ACTION__"
-    ACTION_FRAME_END_SPLIT_PATTERN: ClassVar[re.Pattern[str]] = re.compile(r"\**_*\s*END[\s_-]*ACTION\s*_*\**", re.IGNORECASE)
-
     def __init__(self, session: Session):
         self.session = session
         self._timeout_reason = "request model timeout"
@@ -3368,7 +3537,9 @@ def request(
         user_prompt: str,
         *,
         activity: str = "agent",
-        parse_actions: bool = True,
+        on_stream_action: Callable[[Json], bool] | None = None,
+        tool_schemas: list[Json] | None = None,
+        required_tool: str | None = None,
     ) -> Json:
         config = self.session.config.provider
         if not config.url:
@@ -3383,123 +3554,546 @@ def request(
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": user_prompt},
         ]
-        payload: Json = {
-            "model": model,
-            "messages": messages,
-        }
-        if config.temperature is not None:
-            payload["temperature"] = config.temperature
         stream = config.stream is not False
-        if stream:
-            payload["stream"] = True
-            payload["stream_options"] = {"include_usage": True}
-        timeout, first_token_timeout = self._request_timeouts(config, activity=activity)
-        if config.reasoning is not False and config.reasoning_payload == "reasoning":
-            payload["reasoning"] = {"effort": config.reasoning_effort or "medium"}
-        if config.reasoning is not False and config.reasoning_payload == "reasoning_effort":
-            payload["reasoning_effort"] = config.reasoning_effort or "medium"
-        self._write_debug_prompt(activity=activity, messages=messages)
-        url = config.url.rstrip("/")
-
-        request = urllib.request.Request(
-            url=url if url.endswith("/chat/completions") else url + "/chat/completions",
-            data=json.dumps(payload).encode("utf-8"),
-            headers={
-                "Authorization": "Bearer " + config.key,
-                "Content-Type": "application/json",
-                "User-Agent": HTTP_USER_AGENT,
-            },
+        timeout = config.timeout if config.timeout is not None else 180
+        first_token_timeout = config.first_token_timeout if config.first_token_timeout is not None else timeout
+        api = config.resolved_api()
+        params = (
+            self._responses_params(
+                config,
+                model=model,
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                stream=stream,
+                tool_schemas=tool_schemas,
+                required_tool=required_tool,
+            )
+            if api == "responses"
+            else self._chat_completion_params(config, model=model, messages=messages, stream=stream, tool_schemas=tool_schemas, required_tool=required_tool)
         )
+        DebugTrace.prompt(self.session, activity=activity, messages=messages)
+        DebugTrace.model_request(self.session, activity=activity, api=api, model=model, stream=stream, params=params, tool_schemas=tool_schemas)
+        client = OpenAI(api_key=config.key, base_url=config.base_url(), timeout=timeout, max_retries=0, default_headers={"User-Agent": HTTP_USER_AGENT})
         request_elapsed = 0.0
         try:
-            self.session.state.current_model_call_started_at = time.monotonic()
-            self.session.state.current_model_call_label = model
-            self.session.state.current_model_call_reasoning_label = config.reasoning_effort if config.reasoning else "off"
-            self.session.state.current_model_call_activity = activity
-            self.session.state.current_model_call_has_content = False
-            self.session.state.current_model_call_streaming_chars = 0
-            request_deadline = self.session.state.current_model_call_started_at + max(0, timeout)
-            previous_handler = signal.getsignal(signal.SIGALRM)
-            signal.signal(signal.SIGALRM, self._timeout_handler)
-            self._timeout_reason = "request model timeout"
-            signal.setitimer(signal.ITIMER_REAL, max(0, timeout))
-            try:
-                with urllib.request.urlopen(request, timeout=timeout) as response:
-                    if stream:
-                        content, usage = self._read_streaming_content(
-                            response,
+            with ModelRetryShortcut(self.session):
+                self.session.state.current_model_call_started_at = time.monotonic()
+                self.session.state.current_model_call_label = model
+                self.session.state.current_model_call_reasoning_label = config.reasoning
+                self.session.state.current_model_call_activity = activity
+                self.session.state.current_model_call_has_content = False
+                self.session.state.current_model_call_streaming_chars = 0
+                request_deadline = self.session.state.current_model_call_started_at + max(0, timeout)
+                previous_handler = signal.getsignal(signal.SIGALRM)
+                signal.signal(signal.SIGALRM, self._timeout_handler)
+                self._timeout_reason = "request model timeout"
+                signal.setitimer(signal.ITIMER_REAL, max(0, timeout))
+                try:
+                    if api == "chat" and stream and tool_schemas:
+                        response, usage = self._read_chat_tool_stream(
+                            client,
+                            params,
+                            timeout=timeout,
+                            request_deadline=request_deadline,
+                            first_token_timeout=first_token_timeout,
+                            activity=activity,
+                            on_stream_action=on_stream_action,
+                        )
+                        result = {"usage": usage, **response}
+                        content = ""
+                    elif api == "responses" and stream and tool_schemas:
+                        response, usage = self._read_responses_tool_stream(
+                            client,
+                            params,
+                            timeout=timeout,
                             request_deadline=request_deadline,
                             first_token_timeout=first_token_timeout,
+                            activity=activity,
+                            on_stream_action=on_stream_action,
                         )
-                        result: Json = {"usage": usage}
+                        result = {"usage": usage, **response}
+                        content = ""
                     else:
-                        body = response.read().decode("utf-8")
-            finally:
-                signal.setitimer(signal.ITIMER_REAL, 0)
-                signal.signal(signal.SIGALRM, previous_handler)
-                if self.session.state.current_model_call_started_at > 0:
-                    request_elapsed = max(0.0, time.monotonic() - self.session.state.current_model_call_started_at)
-                    if request_elapsed > 0 and self.session.state.current_model_call_streaming_chars > 0:
-                        self.session.state.last_model_call_rate = self._estimate_stream_rate(request_elapsed)
-                self.session.state.current_model_call_started_at = 0.0
-                self.session.state.current_model_call_label = ""
-                self.session.state.current_model_call_reasoning_label = ""
-                self.session.state.current_model_call_activity = ""
-                self.session.state.current_model_call_has_content = False
-                self.session.state.current_model_call_streaming_chars = 0
+                        completion = (
+                            client.responses.create(**params, timeout=timeout)
+                            if api == "responses"
+                            else client.chat.completions.create(**params, timeout=timeout)
+                        )
+                        if stream:
+                            content, usage = (
+                                self._read_responses_stream(
+                                    completion,
+                                    request_deadline=request_deadline,
+                                    first_token_timeout=first_token_timeout,
+                                )
+                                if api == "responses"
+                                else self._read_streaming_content(
+                                    completion,
+                                    request_deadline=request_deadline,
+                                    first_token_timeout=first_token_timeout,
+                                )
+                            )
+                            result = {"usage": usage}
+                        else:
+                            result = self._sdk_json(completion)
+                            if api == "chat" and tool_schemas:
+                                result = {"usage": _json_dict(result.get("usage")), **self._chat_tool_response(result)}
+                            elif api == "responses" and tool_schemas:
+                                result = {"usage": _json_dict(result.get("usage")), **self._responses_tool_response(result)}
+                finally:
+                    signal.setitimer(signal.ITIMER_REAL, 0)
+                    signal.signal(signal.SIGALRM, previous_handler)
+                    if self.session.state.current_model_call_started_at > 0:
+                        request_elapsed = max(0.0, time.monotonic() - self.session.state.current_model_call_started_at)
+                        if request_elapsed > 0 and self.session.state.current_model_call_streaming_chars > 0:
+                            self.session.state.last_model_call_rate = self.session.state.current_model_call_streaming_chars / 4 / request_elapsed
+                    self.session.state.current_model_call_started_at = 0.0
+                    self.session.state.current_model_call_label = ""
+                    self.session.state.current_model_call_reasoning_label = ""
+                    self.session.state.current_model_call_activity = ""
+                    self.session.state.current_model_call_has_content = False
+                    self.session.state.current_model_call_streaming_chars = 0
+        except KeyboardInterrupt:
+            if self.session.state.manual_model_retry_requested:
+                self.session.state.manual_model_retry_requested = False
+                raise ModelRequestRetry()
+            raise
+        except ModelRequestRetry:
+            raise
         except ModelRequestTimeout as error:
             raise LLMError(str(error) or "request model timeout")
-        except (socket.timeout, TimeoutError):
+        except APITimeoutError:
             raise LLMError("request model timeout")
-        except urllib.error.HTTPError as error:
-            body = error.read().decode("utf-8", errors="replace")
-            raise LLMError("API request failed: HTTP " + str(error.code) + ": " + _shorten(body))
-        except urllib.error.URLError as error:
-            if isinstance(error.reason, (socket.timeout, TimeoutError)):
-                raise LLMError("request model timeout")
+        except APIStatusError as error:
+            body = getattr(error.response, "text", "") or str(getattr(error, "body", "")) or str(error)
+            raise LLMError(f"API request failed: HTTP {error.status_code}: {_shorten(body)}")
+        except APIConnectionError as error:
+            raise LLMError(str(error))
+        except APIError as error:
             raise LLMError(str(error))
         except Exception as error:
             raise LLMError(str(error))
 
-        if not stream:
-            try:
-                result = json.loads(body)
-            except json.JSONDecodeError:
-                raise LLMError("API response is not JSON: " + _shorten(body))
-
         self._record_usage(_json_dict(result.get("usage") if isinstance(result, dict) else None), config, elapsed=request_elapsed)
+        if tool_schemas and isinstance(result.get("actions"), list):
+            parsed = self._action_response(_json_list(result.get("actions")), _json_str(result.get("_assistant_text")) or "")
+            DebugTrace.model_response(self.session, activity=activity, api=api, stream=stream, raw=result, parsed=parsed)
+            return parsed
         if not stream:
-            content = self._message_content(result)
+            content = self._responses_content(result) if api == "responses" else self._message_content(result)
         if content is None:
-            return self._invalid_model_response(self._format_missing_message_content(result))
-        if not parse_actions:
-            return self._parse_json_content(content)
-        return self._parse_model_content(content)
+            parsed = self._invalid_model_response(self._format_missing_message_content(result))
+            DebugTrace.model_response(self.session, activity=activity, api=api, stream=stream, raw=result, parsed=parsed)
+            return parsed
+        parsed = {"actions": [], "_assistant_text": content}
+        DebugTrace.model_response(self.session, activity=activity, api=api, stream=stream, raw=result, parsed=parsed)
+        return parsed
 
-    def _request_timeouts(self, config: ProviderConfig, *, activity: str) -> tuple[int, int | None]:
-        timeout = config.timeout if config.timeout is not None else 180
-        first_token_timeout = config.first_token_timeout if config.first_token_timeout is not None else timeout
-        if activity == "agent" and self.session.settings.plan_mode:
-            return self.session.settings.plan_timeout, self.session.settings.plan_first_token_timeout
-        return timeout, first_token_timeout
+    @staticmethod
+    def _reasoning_effort(config: ProviderConfig) -> str:
+        return config.reasoning if config.reasoning in REASONING_LEVELS else "medium"
 
-    def _read_streaming_content(self, response: Any, *, request_deadline: float, first_token_timeout: int | None) -> tuple[str, Json]:
-        parts: list[str] = []
-        usage: Json = {}
-        first_content_seen = False
-        self._arm_stream_timeout(request_deadline=request_deadline, first_content_seen=False, first_token_timeout=first_token_timeout)
-        for raw_line in response:
-            line = raw_line.decode("utf-8", errors="replace").strip()
-            if not line or line.startswith(":") or not line.startswith("data:"):
+    def _prompt_cache_key(self, config: ProviderConfig, *, model: str, tool_schemas: list[Json] | None) -> str:
+        configured = config.prompt_cache_key
+        if configured == "off":
+            return ""
+        if configured != "auto":
+            return configured
+        payload = {
+            "api": config.resolved_api(),
+            "cwd": self.session.cwd,
+            "host": config.host(),
+            "model": model,
+            "tools": self._tool_schema_cache_names(tool_schemas),
+        }
+        digest = hashlib.sha256(json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")).hexdigest()
+        return "nanocode-" + digest[:24]
+
+    @staticmethod
+    def _tool_schema_cache_names(tool_schemas: list[Json] | None) -> str:
+        names = []
+        for schema in tool_schemas or []:
+            function = _json_dict(schema.get("function"))
+            name = _json_str(function.get("name")) or _json_str(schema.get("name")) or _json_str(schema.get("type"))
+            if name:
+                names.append(name)
+        return ",".join(sorted(names)) or "(none)"
+
+    def _chat_completion_params(
+        self,
+        config: ProviderConfig,
+        *,
+        model: str,
+        messages: list[Json],
+        stream: bool,
+        tool_schemas: list[Json] | None = None,
+        required_tool: str | None = None,
+    ) -> Json:
+        params: Json = {"model": model, "messages": messages, "stream": stream}
+        extra_body: Json = {}
+        prompt_cache_key = self._prompt_cache_key(config, model=model, tool_schemas=tool_schemas)
+        if prompt_cache_key:
+            params["prompt_cache_key"] = prompt_cache_key
+        if config.temperature is not None:
+            params["temperature"] = config.temperature
+        if stream:
+            params["stream_options"] = {"include_usage": True}
+        if tool_schemas:
+            params["tools"] = tool_schemas
+            params["tool_choice"] = {"type": "function", "function": {"name": required_tool}} if required_tool else "auto"
+            params["parallel_tool_calls"] = True
+        chat_reasoning = config.resolved_chat_reasoning()
+        reasoning_enabled = config.reasoning != "off"
+        if reasoning_enabled and chat_reasoning == "reasoning":
+            extra_body["reasoning"] = {"effort": self._reasoning_effort(config)}
+        if reasoning_enabled and chat_reasoning == "reasoning_effort":
+            params["reasoning_effort"] = self._reasoning_effort(config)
+        if chat_reasoning == "thinking":
+            extra_body["thinking"] = {"type": "enabled" if reasoning_enabled else "disabled"}
+            if reasoning_enabled:
+                params["reasoning_effort"] = CHAT_REASONING_EFFORT_VALUES["thinking"].get(self._reasoning_effort(config), "high")
+        if chat_reasoning == "enable_thinking":
+            extra_body["enable_thinking"] = reasoning_enabled
+            if reasoning_enabled:
+                values = CHAT_REASONING_EFFORT_VALUES["enable_thinking"]
+                extra_body["thinking_budget"] = values.get(self._reasoning_effort(config), values["medium"])
+        if extra_body:
+            params["extra_body"] = extra_body
+        return params
+
+    def _responses_tool_schemas(self, tool_schemas: list[Json] | None) -> list[Json]:
+        converted = []
+        for schema in tool_schemas or []:
+            function = _json_dict(schema.get("function"))
+            if not function:
+                converted.append(schema)
                 continue
-            data = line[len("data:") :].strip()
-            if data == "[DONE]":
+            converted.append({"type": "function", **function})
+        return converted
+
+    def _read_chat_tool_stream(
+        self,
+        client: OpenAI,
+        params: Json,
+        *,
+        timeout: int,
+        request_deadline: float,
+        first_token_timeout: int | None,
+        activity: str,
+        on_stream_action: Callable[[Json], bool] | None = None,
+    ) -> tuple[Json, Json]:
+        usage: Json = {}
+        actions: list[Json] = []
+        text_parts: list[str] = []
+        first_output_seen = False
+
+        self._arm_stream_timeout(request_deadline=request_deadline, first_output_seen=False, first_token_timeout=first_token_timeout)
+        stopped = False
+        tool_calls: dict[int, Json] = {}
+        for event in client.chat.completions.create(**params, timeout=timeout):
+            data = self._sdk_json(event)
+            event_usage = _json_dict(data.get("usage"))
+            if event_usage:
+                usage = event_usage
+            for choice in _json_list(data.get("choices")):
+                delta = _json_dict(_json_dict(choice).get("delta"))
+                content = delta.get("content")
+                output_chars = self._stream_output_chars(delta)
+                if output_chars > 0:
+                    first_output_seen = self._mark_stream_output(
+                        output_chars, first_output_seen, request_deadline=request_deadline, first_token_timeout=first_token_timeout
+                    )
+                if isinstance(content, str) and content:
+                    text_parts.append(content)
+                self._accumulate_chat_tool_calls(tool_calls, delta)
+        for index in sorted(tool_calls):
+            item = tool_calls[index]
+            action = self._action_from_function_call(_json_str(item.get("name")) or "", _json_str(item.get("arguments")) or "{}")
+            stopped, request_deadline = self._consume_stream_action(
+                actions,
+                text_parts,
+                action,
+                activity=activity,
+                on_stream_action=on_stream_action,
+                request_deadline=request_deadline,
+                first_token_timeout=first_token_timeout,
+            )
+            if stopped:
                 break
-            try:
-                event = json.loads(data)
-            except json.JSONDecodeError:
+        return self._action_response(actions, "".join(text_parts)), usage
+
+    def _consume_stream_action(
+        self,
+        actions: list[Json],
+        text_parts: list[str],
+        action: Json,
+        *,
+        activity: str,
+        on_stream_action: Callable[[Json], bool] | None,
+        request_deadline: float,
+        first_token_timeout: int | None,
+    ) -> tuple[bool, float]:
+        DebugTrace.stream_action(self.session, activity=activity, action=action)
+        if text_parts and on_stream_action is not None:
+            action["_assistant_text"] = "".join(text_parts).strip()
+            text_parts.clear()
+        actions.append(action)
+        return self._call_stream_action(on_stream_action, action, request_deadline=request_deadline, first_token_timeout=first_token_timeout)
+
+    def _accumulate_chat_tool_calls(self, tool_calls: dict[int, Json], delta: Json) -> None:
+        for raw in _json_list(delta.get("tool_calls")):
+            call = _json_dict(raw)
+            index = self._stream_list_index(call.get("index"), len(tool_calls))
+            function = _json_dict(call.get("function"))
+            item = tool_calls.setdefault(index, {"name": "", "arguments": ""})
+            name = _json_str(function.get("name"))
+            arguments = _json_str(function.get("arguments"))
+            if name:
+                item["name"] = name
+            if arguments:
+                item["arguments"] = _json_str(item.get("arguments")) + arguments
+        function_call = _json_dict(delta.get("function_call"))
+        if function_call:
+            item = tool_calls.setdefault(0, {"name": "", "arguments": ""})
+            name = _json_str(function_call.get("name"))
+            arguments = _json_str(function_call.get("arguments"))
+            if name:
+                item["name"] = name
+            if arguments:
+                item["arguments"] = _json_str(item.get("arguments")) + arguments
+
+    @staticmethod
+    def _stream_list_index(value: JsonValue, fallback: int) -> int:
+        if isinstance(value, int):
+            return value
+        if isinstance(value, str) and value.isdigit():
+            return int(value)
+        return fallback
+
+    def _read_responses_tool_stream(
+        self,
+        client: OpenAI,
+        params: Json,
+        *,
+        timeout: int,
+        request_deadline: float,
+        first_token_timeout: int | None,
+        activity: str,
+        on_stream_action: Callable[[Json], bool] | None = None,
+    ) -> tuple[Json, Json]:
+        usage: Json = {}
+        actions: list[Json] = []
+        text_parts: list[str] = []
+        first_output_seen = False
+        function_calls: dict[str, Json] = {}
+
+        self._arm_stream_timeout(request_deadline=request_deadline, first_output_seen=False, first_token_timeout=first_token_timeout)
+        stopped = False
+        for event in client.responses.create(**params, timeout=timeout):
+            data = self._sdk_json(event)
+            event_type = _json_str(data.get("type")) or str(getattr(event, "type", "") or "")
+            self._raise_responses_stream_error(data)
+            event_usage = _json_dict(data.get("usage"))
+            if event_usage:
+                usage = event_usage
+            if event_type == "response.completed":
+                response = _json_dict(data.get("response"))
+                usage = _json_dict(response.get("usage")) or usage
+                if not actions and not text_parts:
+                    content = self._responses_content(response)
+                    if content:
+                        text_parts.append(content)
+                continue
+            if event_type in ("response.output_item.added", "response.output_item.done"):
+                self._remember_responses_function_call(function_calls, data)
+                continue
+            if event_type in ("response.output_text.delta", "response.reasoning.delta"):
+                text = str(getattr(event, "delta", "") or _json_str(data.get("delta")) or "")
+                first_output_seen = self._mark_stream_output(
+                    len(text),
+                    first_output_seen,
+                    request_deadline=request_deadline,
+                    first_token_timeout=first_token_timeout,
+                )
+                if event_type == "response.output_text.delta" and text:
+                    text_parts.append(text)
+                continue
+            if event_type == "response.function_call_arguments.delta":
+                text = str(getattr(event, "delta", "") or _json_str(data.get("delta")) or "")
+                first_output_seen = self._mark_stream_output(
+                    len(text),
+                    first_output_seen,
+                    request_deadline=request_deadline,
+                    first_token_timeout=first_token_timeout,
+                )
+                call = self._responses_function_call_for_event(function_calls, data)
+                call["arguments"] = _json_str(call.get("arguments")) + text
+                continue
+            if event_type != "response.function_call_arguments.done":
                 continue
-            event_data = _json_dict(event)
+            call = self._responses_function_call_for_event(function_calls, data)
+            name = str(getattr(event, "name", "") or _json_str(data.get("name")) or _json_str(call.get("name")) or "")
+            arguments = str(getattr(event, "arguments", "") or _json_str(data.get("arguments")) or _json_str(call.get("arguments")) or "{}")
+            action = self._action_from_function_call(name, arguments)
+            stopped, request_deadline = self._consume_stream_action(
+                actions,
+                text_parts,
+                action,
+                activity=activity,
+                on_stream_action=on_stream_action,
+                request_deadline=request_deadline,
+                first_token_timeout=first_token_timeout,
+            )
+            if stopped:
+                break
+        return self._action_response(actions, "".join(text_parts)), usage
+
+    def _remember_responses_function_call(self, function_calls: dict[str, Json], event: Json) -> None:
+        item = _json_dict(event.get("item"))
+        if _json_str(item.get("type")) != "function_call":
+            return
+        call = function_calls.setdefault(self._responses_function_call_key(event, item, len(function_calls)), {"name": "", "arguments": ""})
+        name = _json_str(item.get("name"))
+        arguments = _json_str(item.get("arguments"))
+        if name:
+            call["name"] = name
+        if arguments:
+            call["arguments"] = arguments
+
+    def _responses_function_call_for_event(self, function_calls: dict[str, Json], event: Json) -> Json:
+        key = self._responses_function_call_key(event, {}, len(function_calls))
+        if key.startswith("fallback:") and len(function_calls) == 1:
+            return next(iter(function_calls.values()))
+        return function_calls.setdefault(key, {"name": "", "arguments": ""})
+
+    def _responses_function_call_key(self, event: Json, item: Json, fallback: int) -> str:
+        item_id = _json_str(event.get("item_id")) or _json_str(item.get("id")) or _json_str(item.get("item_id"))
+        if item_id:
+            return "item:" + item_id
+        call_id = _json_str(event.get("call_id")) or _json_str(item.get("call_id"))
+        if call_id:
+            return "call:" + call_id
+        if "output_index" in event or "output_index" in item:
+            return "index:" + str(self._stream_list_index(event.get("output_index", item.get("output_index")), fallback))
+        return "fallback:" + str(fallback)
+
+    def _chat_tool_response(self, result: JsonValue) -> Json:
+        data = _json_dict(result)
+        choices = _json_list(data.get("choices"))
+        if not choices:
+            raise LLMError("API response missing choices")
+        message = _json_dict(_json_dict(choices[0]).get("message"))
+        actions = [
+            self._action_from_function_call(
+                _json_str(_json_dict(call.get("function")).get("name")) or "",
+                _json_str(_json_dict(call.get("function")).get("arguments")) or "{}",
+            )
+            for call in (_json_dict(raw) for raw in _json_list(message.get("tool_calls")))
+            if call
+        ]
+        content = message.get("content")
+        return self._action_response(actions, content if isinstance(content, str) else "")
+
+    def _responses_tool_response(self, result: JsonValue) -> Json:
+        actions = [
+            self._action_from_function_call(_json_str(item.get("name")) or "", _json_str(item.get("arguments")) or "{}")
+            for item in (_json_dict(raw) for raw in _json_list(_json_dict(result).get("output")))
+            if _json_str(item.get("type")) == "function_call"
+        ]
+        return self._action_response(actions, self._responses_content(result) or "")
+
+    @staticmethod
+    def _action_response(actions: list[Json], assistant_text: str = "") -> Json:
+        response: Json = {"actions": actions}
+        assistant_text = assistant_text.strip()
+        if assistant_text:
+            response["_assistant_text"] = assistant_text
+        return response
+
+    def _action_from_function_call(self, name: str, arguments: str) -> Json:
+        try:
+            value = json.loads(arguments or "{}")
+        except Exception as error:
+            tool_name = name or "invalid_tool_call"
+            return {
+                "type": tool_name,
+                "_format_bad_output": arguments,
+                "_format_error": "invalid tool arguments for " + tool_name + ": " + str(error),
+            }
+        args = _json_dict(value)
+        if name in TOOL_REGISTRY:
+            return {"type": "tool", "name": name, "intention": _json_str(args.get("intention")) or "", "args": _json_list(args.get("args"))}
+        action = {"type": name}
+        action.update(args)
+        return action
+
+    def _responses_params(
+        self,
+        config: ProviderConfig,
+        *,
+        model: str,
+        system_prompt: str,
+        user_prompt: str,
+        stream: bool,
+        tool_schemas: list[Json] | None = None,
+        required_tool: str | None = None,
+    ) -> Json:
+        params: Json = {"model": model, "instructions": system_prompt, "input": user_prompt, "stream": stream, "store": False}
+        prompt_cache_key = self._prompt_cache_key(config, model=model, tool_schemas=tool_schemas)
+        if prompt_cache_key:
+            params["prompt_cache_key"] = prompt_cache_key
+        if tool_schemas:
+            params["tools"] = self._responses_tool_schemas(tool_schemas)
+            params["tool_choice"] = {"type": "function", "name": required_tool} if required_tool else "auto"
+            params["parallel_tool_calls"] = True
+        if config.temperature is not None:
+            params["temperature"] = config.temperature
+        if config.reasoning != "off":
+            effort = self._reasoning_effort(config)
+            params["reasoning"] = {"effort": "high" if effort in ("max", "xhigh") else effort}
+        return params
+
+    def _mark_stream_output(self, chars: int, seen: bool, *, request_deadline: float, first_token_timeout: int | None) -> bool:
+        if chars <= 0:
+            return seen
+        if not seen:
+            self.session.state.current_model_call_has_content = True
+            self._arm_stream_timeout(request_deadline=request_deadline, first_output_seen=True, first_token_timeout=first_token_timeout)
+        self.session.state.current_model_call_streaming_chars += chars
+        return True
+
+    def _call_stream_action(
+        self,
+        callback: Callable[[Json], bool] | None,
+        action: Json,
+        *,
+        request_deadline: float,
+        first_token_timeout: int | None,
+    ) -> tuple[bool, float]:
+        if callback is None:
+            return False, request_deadline
+        signal.setitimer(signal.ITIMER_REAL, 0)
+        callback_started = time.monotonic()
+        try:
+            stopped = callback(action)
+        finally:
+            request_deadline += max(0.0, time.monotonic() - callback_started)
+            self._arm_stream_timeout(request_deadline=request_deadline, first_output_seen=True, first_token_timeout=first_token_timeout)
+        return stopped, request_deadline
+
+    def _read_streaming_content(
+        self,
+        stream: Any,
+        *,
+        request_deadline: float,
+        first_token_timeout: int | None,
+    ) -> tuple[str, Json]:
+        parts: list[str] = []
+        usage: Json = {}
+        first_output_seen = False
+        self._arm_stream_timeout(request_deadline=request_deadline, first_output_seen=False, first_token_timeout=first_token_timeout)
+        for event in stream:
+            event_data = self._sdk_json(event)
             event_usage = _json_dict(event_data.get("usage"))
             if event_usage:
                 usage = event_usage
@@ -3508,349 +4102,134 @@ def _read_streaming_content(self, response: Any, *, request_deadline: float, fir
                 continue
             delta = _json_dict(_json_dict(choices[0]).get("delta"))
             content = delta.get("content")
-            if not isinstance(content, str) or not content:
+            output_chars = self._stream_output_chars(delta)
+            if output_chars <= 0:
                 continue
-            if not first_content_seen:
-                first_content_seen = True
-                self.session.state.current_model_call_has_content = True
-                self._arm_stream_timeout(request_deadline=request_deadline, first_content_seen=True, first_token_timeout=first_token_timeout)
-            parts.append(content)
-            self.session.state.current_model_call_streaming_chars += len(content)
+            first_output_seen = self._mark_stream_output(
+                output_chars, first_output_seen, request_deadline=request_deadline, first_token_timeout=first_token_timeout
+            )
+            if isinstance(content, str) and content:
+                parts.append(content)
         return "".join(parts), usage
 
-    def _estimate_stream_rate(self, elapsed: float) -> float:
-        return self.session.state.current_model_call_streaming_chars / 4 / elapsed if elapsed > 0 else 0.0
+    def _read_responses_stream(
+        self,
+        stream: Any,
+        *,
+        request_deadline: float,
+        first_token_timeout: int | None,
+    ) -> tuple[str, Json]:
+        parts: list[str] = []
+        usage: Json = {}
+        completed_content = ""
+        first_output_seen = False
+
+        self._arm_stream_timeout(request_deadline=request_deadline, first_output_seen=False, first_token_timeout=first_token_timeout)
+        for event in stream:
+            data = self._sdk_json(event)
+            event_type = _json_str(data.get("type"))
+            self._raise_responses_stream_error(data)
+            event_usage = _json_dict(data.get("usage"))
+            if event_usage:
+                usage = event_usage
+            if event_type == "response.completed":
+                response = _json_dict(data.get("response"))
+                usage = _json_dict(response.get("usage")) or usage
+                response_content = self._responses_content(response)
+                if response_content and not parts and not completed_content:
+                    completed_content = response_content
+                    first_output_seen = self._mark_stream_output(
+                        len(response_content), first_output_seen, request_deadline=request_deadline, first_token_timeout=first_token_timeout
+                    )
+                continue
+            fallback_content = self._responses_event_content(data)
+            if fallback_content and not parts and not completed_content:
+                completed_content = fallback_content
+                first_output_seen = self._mark_stream_output(
+                    len(fallback_content), first_output_seen, request_deadline=request_deadline, first_token_timeout=first_token_timeout
+                )
+                continue
+            output = self._responses_stream_output(data)
+            if not output:
+                continue
+            first_output_seen = self._mark_stream_output(
+                len(output[1]), first_output_seen, request_deadline=request_deadline, first_token_timeout=first_token_timeout
+            )
+            if output[0] == "content":
+                parts.append(output[1])
+        return "".join(parts) or completed_content, usage
+
+    def _raise_responses_stream_error(self, event: Json) -> None:
+        code = _json_str(event.get("code"))
+        message = _json_str(event.get("message"))
+        if code or message:
+            raise LLMError("API request failed: " + (code or "error") + (": " + message if message else ""))
+
+    def _responses_event_content(self, event: Json) -> str:
+        event_type = _json_str(event.get("type"))
+        if event_type == "response.output_text.done":
+            return _json_str(event.get("text"))
+        if event_type == "response.content_part.done":
+            return _json_str(_json_dict(event.get("part")).get("text"))
+        if event_type == "response.output_item.done":
+            item = _json_dict(event.get("item"))
+            return self._responses_content({"output": [item]}) or ""
+        if event_type == "response.done":
+            return self._responses_content(_json_dict(event.get("response"))) or ""
+        return ""
+
+    def _responses_stream_output(self, event: Json) -> tuple[str, str] | None:
+        event_type = _json_str(event.get("type"))
+        if event_type in ("response.output_text.delta", "response.message.delta"):
+            text = event.get("delta")
+            if isinstance(text, str) and text:
+                return ("content", text)
+        if event_type == "response.reasoning.delta":
+            text = event.get("delta")
+            if isinstance(text, str) and text:
+                return ("reasoning", text)
+        return None
 
-    def _arm_stream_timeout(self, *, request_deadline: float, first_content_seen: bool, first_token_timeout: int | None) -> None:
+    def _sdk_json(self, value: Any) -> Json:
+        if isinstance(value, dict):
+            return value
+        if hasattr(value, "model_dump"):
+            dumped = value.model_dump(mode="json")
+            if not isinstance(dumped, dict):
+                return {}
+            output_text = getattr(value, "output_text", None)
+            if isinstance(output_text, str):
+                dumped["_sdk_output_text"] = output_text
+            return dumped
+        return {}
+
+    def _stream_output_chars(self, delta: Json) -> int:
+        for key in ("content", "reasoning_content", "reasoning"):
+            value = delta.get(key)
+            if isinstance(value, str) and value:
+                return len(value)
+        details = _json_list(delta.get("reasoning_details"))
+        return len(json.dumps(details, ensure_ascii=False)) if details else 0
+
+    def _arm_stream_timeout(self, *, request_deadline: float, first_output_seen: bool, first_token_timeout: int | None) -> None:
         remaining = request_deadline - time.monotonic()
         if remaining <= 0:
             raise ModelRequestTimeout("request model timeout")
         self._timeout_reason = "request model timeout"
-        if not first_content_seen and first_token_timeout is not None and first_token_timeout > 0:
+        if not first_output_seen and first_token_timeout is not None and first_token_timeout > 0:
             if first_token_timeout < remaining:
                 remaining = first_token_timeout
                 self._timeout_reason = "request first token timeout"
         signal.setitimer(signal.ITIMER_REAL, remaining)
 
-    def _write_debug_prompt(self, *, activity: str, messages: list[Json]) -> str:
-        if not self.session.settings.debug:
-            return ""
-        self.session.state.debug_prompt_count += 1
-        directory = self.session.debug_dir()
-        os.makedirs(directory, exist_ok=True)
-        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S-%f")
-        filepath = os.path.join(directory, f"{timestamp}-{self.session.state.debug_prompt_count:04d}-{activity or 'request'}.txt")
-        with open(filepath, "w", encoding="utf-8") as f:
-            f.write(self._format_debug_prompt(messages=messages))
-        return filepath
-
-    def _format_debug_prompt(self, *, messages: list[Json]) -> str:
-        lines = []
-        for index, message in enumerate(messages, start=1):
-            role = _json_str(message.get("role")) or "(unknown)"
-            content = message.get("content")
-            lines.append(f"--- {role} message {index} ---")
-            if isinstance(content, str):
-                lines.append(content)
-            else:
-                lines.append(json.dumps(content, ensure_ascii=False, indent=2))
-            lines.append("")
-        return "\n".join(lines).rstrip() + "\n"
-
-    def _parse_model_content(self, content: str) -> Json:
-        text = content.strip()
-        text = self._strip_leaked_think_tags(text)
-        text = self._strip_leaked_tool_code(text)
-        text = self._strip_json_fence(text)
-        text = self._strip_fence_marker_lines(text)
-        text = self._strip_leaked_think_tags(text)
-        text = self._strip_leaked_tool_code(text)
-        if not self._has_action_frame_end(text):
-            actions, error = self._parse_unmarked_actions(text)
-            if actions:
-                return {"actions": actions}
-            if error == "":
-                return {"actions": []}
-            return self._invalid_model_response(content, "expected one JSON action object or action frames ending with " + self.ACTION_FRAME_END + "; " + error)
-        actions: list[Json] = []
-        frame_errors: list[str] = []
-        for frame_number, frame in enumerate(self._action_frames(text), start=1):
-            parsed_actions, error = self._parse_action_frame(frame, frame_number)
-            if parsed_actions:
-                actions.extend(parsed_actions)
-                continue
-            if error:
-                frame_errors.append(error)
-        if not actions:
-            if not frame_errors:
-                return {"actions": []}
-            reason = "expected at least one valid action frame ending with " + self.ACTION_FRAME_END
-            if frame_errors:
-                reason += "; " + "; ".join(frame_errors[:3])
-            return self._invalid_model_response(content, reason)
-        response: Json = {"actions": actions}
-        if frame_errors:
-            response["_format_frame_errors"] = frame_errors
-        return response
-
-    def _parse_json_content(self, content: str) -> Json:
-        text = content.strip()
-        text = self._strip_leaked_think_tags(text)
-        text = self._strip_json_fence(text)
-        text = self._strip_leaked_think_tags(text)
-        try:
-            value = json_repair.loads(text)
-        except Exception as error:
-            raise LLMError("model returned invalid JSON: " + str(error))
-        if not isinstance(value, dict):
-            raise LLMError("model returned JSON that is not an object")
-        return value
-
-    def _action_frames(self, text: str) -> list[str]:
-        frames: list[str] = []
-        current: list[str] = []
-        for line in text.splitlines():
-            if not self._has_action_frame_end(line):
-                current.append(line)
-                continue
-            parts = self.ACTION_FRAME_END_SPLIT_PATTERN.split(line)
-            for index, part in enumerate(parts):
-                if part:
-                    current.append(part)
-                if index < len(parts) - 1:
-                    frames.append("\n".join(current).strip())
-                    current = []
-        trailing = "\n".join(current).strip()
-        if trailing:
-            frames.append(trailing)
-        return frames
-
-    def _parse_action_frame(self, frame: str, frame_number: int) -> tuple[list[Json], str]:
-        frame = frame.strip()
-        if not frame:
-            return [], ""
-        try:
-            value = json_repair.loads(frame)
-        except Exception as error:
-            return [], "frame " + str(frame_number) + ": " + str(error)
-        actions, error = self._actions_from_json_value(value)
-        if error:
-            return [], "frame " + str(frame_number) + ": " + error
-        return actions, ""
-
-    def _actions_from_json_value(self, value: JsonValue) -> tuple[list[Json], str]:
-        if isinstance(value, dict):
-            if "actions" in value:
-                return self._actions_from_json_value(value.get("actions"))
-            self._normalize_tool_type(value)
-            if not _json_str(value.get("type")):
-                return [], "action missing type"
-            return [value], ""
-        if isinstance(value, list):
-            actions = []
-            for index, raw in enumerate(value, start=1):
-                action = _json_dict(raw)
-                if not action:
-                    return [], "array item " + str(index) + ": expected JSON object action"
-                self._normalize_tool_type(action)
-                if not _json_str(action.get("type")):
-                    return [], "array item " + str(index) + ": action missing type"
-                actions.append(action)
-            return actions, ""
-        return [], "expected JSON object action"
-
-    def _normalize_tool_type(self, action: Json) -> None:
-        action_type = _json_str(action.get("type"))
-        tool_name = next((name for name in TOOL_REGISTRY if name.lower() == action_type.lower()), "") if action_type else ""
-        if tool_name:
-            action["type"] = "tool"
-            action.setdefault("name", tool_name)
-
-    def _parse_unmarked_actions(self, text: str) -> tuple[list[Json], str]:
-        actions: list[Json] = []
-        decoder = json.JSONDecoder()
-        index = 0
-        while index < len(text) and text[index].isspace():
-            index += 1
-        prefix = ""
-        if index < len(text) and text[index] != "{":
-            if text[index] == "[":
-                try:
-                    value, index = self._decode_json_array_text(text, index)
-                except (json.JSONDecodeError, ValueError) as error:
-                    return [], str(error)
-                parsed, error = self._actions_from_json_value(value)
-                if error:
-                    return [], error
-                while index < len(text) and text[index].isspace():
-                    index += 1
-                if index < len(text):
-                    progress = self._trailing_progress_text(text[index:])
-                    if progress:
-                        parsed.append({"type": "progress", "text": progress})
-                        return parsed, ""
-                    return [], "unexpected text after JSON action array"
-                return parsed, ""
-            action_start = text.find("{", index)
-            if action_start < 0:
-                progress = self._plain_progress_text(text[index:])
-                if progress:
-                    return [{"type": "progress", "text": progress}], ""
-                try:
-                    decoder.raw_decode(text, index)
-                except json.JSONDecodeError as error:
-                    return [], str(error)
-                return [], "expected JSON object action"
-            prefix = self._progress_text(text[:action_start])
-            index = action_start
-        while True:
-            while index < len(text) and text[index].isspace():
-                index += 1
-            if index >= len(text):
-                if prefix and actions:
-                    actions.insert(0, {"type": "progress", "text": prefix})
-                return actions, ""
-            try:
-                value, index = decoder.raw_decode(text, index)
-            except json.JSONDecodeError as error:
-                if actions:
-                    return [], str(error)
-                if self._should_repair_json_decode_error(str(error), text):
-                    repaired, repair_error = self._repair_single_json_action(text)
-                    if not repair_error:
-                        if prefix:
-                            repaired.insert(0, {"type": "progress", "text": prefix})
-                        return repaired, ""
-                return [], str(error)
-            parsed, error = self._actions_from_json_value(value)
-            if error:
-                return [], error
-            actions.extend(parsed)
-            while index < len(text) and text[index].isspace():
-                index += 1
-            if index < len(text) and text[index] == ",":
-                index += 1
-                continue
-            if index < len(text) and text[index] != "{":
-                next_action = text.find("{", index)
-                if next_action < 0:
-                    if self._should_repair_trailing_json_text(text[index:]):
-                        repaired, error = self._repair_single_json_action(text)
-                        if not error:
-                            return repaired, ""
-                    progress = self._trailing_progress_text(text[index:])
-                    if progress:
-                        actions.append({"type": "progress", "text": progress})
-                        return actions, ""
-                    return [], "unexpected text after JSON action"
-                progress = self._progress_text(text[index:next_action])
-                if progress:
-                    actions.append({"type": "progress", "text": progress})
-                index = next_action
-
-    def _progress_text(self, text: str) -> str:
-        text = re.sub(r"```[a-zA-Z0-9_-]*", "", text)
-        text = text.replace("```", "")
-        return _shorten(" ".join(text.split()), 500)
-
-    def _plain_progress_text(self, text: str) -> str:
-        progress = self._progress_text(text)
-        if not progress or "{" in progress or "}" in progress:
-            return ""
-        starters = (
-            "let me ",
-            "i need ",
-            "i will ",
-            "i'll ",
-            "now ",
-            "next ",
-            "我需要",
-            "让我",
-            "我会",
-            "现在",
-            "接下来",
-        )
-        return progress if progress.lower().startswith(starters) else ""
-
-    def _trailing_progress_text(self, text: str) -> str:
-        progress = self._progress_text(text)
-        if not progress or "{" in progress or "}" in progress:
-            return ""
-        return progress
-
-    def _decode_json_array_text(self, text: str, index: int) -> tuple[JsonValue, int]:
-        decoder = json.JSONDecoder()
-        value, end = decoder.raw_decode(text, index)
-        cursor = end
-        while cursor < len(text) and text[cursor].isspace():
-            cursor += 1
-        if cursor >= len(text):
-            return value, cursor
-        if not self._should_repair_trailing_json_text(text[cursor:]):
-            return value, cursor
-        value = json_repair.loads(text[index:])
-        if not isinstance(value, list):
-            raise ValueError("expected JSON action array")
-        return value, len(text)
-
-    def _repair_single_json_action(self, text: str) -> tuple[list[Json], str]:
-        try:
-            value = json_repair.loads(text)
-        except Exception as error:
-            return [], str(error)
-        if isinstance(value, list):
-            return [], "unexpected text after JSON action"
-        return self._actions_from_json_value(value)
-
-    def _should_repair_json_decode_error(self, error: str, text: str) -> bool:
-        return "Invalid control character" in error or re.fullmatch(r".*[}\]]\s*[}\]]+\s*", text, re.DOTALL) is not None
-
-    def _should_repair_trailing_json_text(self, text: str) -> bool:
-        return re.fullmatch(r"\s*[}\]]+\s*", text) is not None
-
-    def _has_action_frame_end(self, line: str) -> bool:
-        return self.ACTION_FRAME_END_SPLIT_PATTERN.search(line) is not None
-
-    def _strip_json_fence(self, text: str) -> str:
-        if not text.startswith("```"):
-            return text
-        lines = text.splitlines()
-        if lines and lines[0].startswith("```"):
-            lines = lines[1:]
-        if lines and lines[-1].strip() == "```":
-            lines = lines[:-1]
-        return "\n".join(lines).strip()
-
-    def _strip_fence_marker_lines(self, text: str) -> str:
-        return re.sub(r"(?m)^\s*```[a-zA-Z0-9_-]*\s*$\n?", "", text).strip()
-
-    def _strip_leaked_think_tags(self, text: str) -> str:
-        text = text.strip()
-        while text.startswith("</think>"):
-            text = text[len("</think>") :].lstrip()
-        while text.startswith("<think>"):
-            end = text.find("</think>")
-            if end < 0:
-                return text
-            text = text[end + len("</think>") :].lstrip()
-            while text.startswith("</think>"):
-                text = text[len("</think>") :].lstrip()
-        return text
-
-    def _strip_leaked_tool_code(self, text: str) -> str:
-        return re.sub(r"<tool_code>.*?</tool_code>", "", text, flags=re.DOTALL).strip()
-
-    def _invalid_model_response(self, content: str, reason: str = "expected one JSON object matching the Output JSON schema") -> Json:
-        guidance = ""
-        if self._strip_leaked_think_tags(content.strip()).startswith("<tool_call>"):
-            guidance = (
-                " Native tool_call syntax is not supported; return an action frame like "
-                '{"type":"tool","name":"Read","intention":"...","args":["nanocode.py","0,100"]}\n__END_ACTION__.'
-            )
+    def _invalid_model_response(self, content: str, reason: str = "expected a function tool call") -> Json:
         return {
             "actions": [],
             "_format_bad_output": content,
-            "_format_error": "Invalid model output: " + reason + ". Return action frames only. Bad output: " + _shorten(content) + guidance,
+            "_format_error": "Invalid function-tool response: "
+            + reason
+            + ". Use valid function tool calls with JSON arguments matching the tool schema. Bad output: "
+            + _shorten(content),
         }
 
     def _message_content(self, result: JsonValue) -> str | None:
@@ -3864,8 +4243,32 @@ def _message_content(self, result: JsonValue) -> str | None:
             return None
         return content
 
+    def _responses_content(self, result: JsonValue) -> str | None:
+        data = _json_dict(result)
+        output_text = data.get("_sdk_output_text")
+        if isinstance(output_text, str) and output_text:
+            return output_text
+        parts = []
+        for item in _json_list(data.get("output")):
+            if _json_str(_json_dict(item).get("type")) != "message":
+                continue
+            for content in _json_list(_json_dict(item).get("content")):
+                text = _json_dict(content).get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "".join(parts) if parts else None
+
     def _format_missing_message_content(self, result: JsonValue) -> str:
-        choice = _json_dict(_json_list(_json_dict(result).get("choices"))[0])
+        data = _json_dict(result)
+        if "output" in data:
+            details: Json = {
+                "output_types": [_json_str(_json_dict(item).get("type")) for item in _json_list(data.get("output"))],
+            }
+            return "API response missing output text: " + json.dumps(details, ensure_ascii=False)
+        choices = _json_list(data.get("choices"))
+        if not choices:
+            return "API response missing message content: " + json.dumps({"top_level_keys": sorted(str(key) for key in data)}, ensure_ascii=False)
+        choice = _json_dict(choices[0])
         message = _json_dict(choice.get("message"))
         details: Json = {
             "finish_reason": choice.get("finish_reason"),
@@ -3874,27 +4277,36 @@ def _format_missing_message_content(self, result: JsonValue) -> str:
         return "API response missing message content: " + json.dumps(details, ensure_ascii=False)
 
     def _record_usage(self, usage: Json, config: ProviderConfig, *, elapsed: float = 0.0) -> None:
-        prompt_tokens = self._json_int(usage.get("prompt_tokens"))
-        completion_tokens = self._json_int(usage.get("completion_tokens"))
+        prompt_tokens = self._json_int(usage.get("prompt_tokens")) or self._json_int(usage.get("input_tokens"))
+        completion_tokens = self._json_int(usage.get("completion_tokens")) or self._json_int(usage.get("output_tokens"))
         total_tokens = self._json_int(usage.get("total_tokens"))
+        cached_prompt_tokens = self._cached_prompt_tokens(usage)
         if completion_tokens > 0 and elapsed > 0:
             self.session.state.last_model_call_rate = completion_tokens / elapsed
         self.session.state.last_prompt_tokens = prompt_tokens
         self.session.state.last_completion_tokens = completion_tokens
         self.session.state.last_total_tokens = total_tokens
+        self.session.state.last_cached_prompt_tokens = cached_prompt_tokens
         self.session.state.session_prompt_tokens += prompt_tokens
         self.session.state.session_completion_tokens += completion_tokens
         self.session.state.session_total_tokens += total_tokens
+        self.session.state.session_cached_prompt_tokens += cached_prompt_tokens
         self.session.state.model_usage.setdefault(config.model or "(empty)", ModelUsage()).add(
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            total_tokens=total_tokens,
+            prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, total_tokens=total_tokens, cached_prompt_tokens=cached_prompt_tokens
         )
 
     @staticmethod
     def _json_int(value: JsonValue) -> int:
         return value if isinstance(value, int) else 0
 
+    def _cached_prompt_tokens(self, usage: Json) -> int:
+        return (
+            self._json_int(usage.get("prompt_cache_hit_tokens"))
+            or self._json_int(usage.get("cached_tokens"))
+            or self._json_int(_json_dict(usage.get("prompt_tokens_details")).get("cached_tokens"))
+            or self._json_int(_json_dict(usage.get("input_tokens_details")).get("cached_tokens"))
+        )
+
 
 ############################
 # ToolCallRunner
@@ -3920,7 +4332,7 @@ def latest_report(cls, executions: list[ToolCallExecution]) -> str:
     @classmethod
     def _format_execution(cls, execution: ToolCallExecution) -> str:
         marker = "[success]" if execution.outcome == "success" else "[failure]"
-        text = marker + " " + cls._format_call(execution.call)
+        text = marker + " " + cls.format_call(execution.call)
         if execution.result_key:
             text += " -> " + execution.result_key
         if execution.outcome != "success":
@@ -3932,7 +4344,7 @@ def _format_execution(cls, execution: ToolCallExecution) -> str:
         return text
 
     @classmethod
-    def _format_call(cls, call: ParsedToolCall) -> str:
+    def format_call(cls, call: ParsedToolCall) -> str:
         tool_class = TOOL_REGISTRY.get(call.name)
         tokens = tool_class.cli_args(call.args) if tool_class is not None else [Tool.cli_token(arg) for arg in call.args]
         return " ".join([call.name] + tokens)
@@ -3954,6 +4366,7 @@ class ToolCallRunner:
     def __init__(self, session: Session, protected_result_keys: Callable[[], set[str]] | None = None):
         self.session = session
         self.protected_result_keys = protected_result_keys or (lambda: set())
+        self.live_output: ToolOutputCallback | None = None
         self.latest_executions: list[ToolCallExecution] = []
         self.skipped_after_failure_count = 0
         self.skipped_after_failure_key = ""
@@ -3964,28 +4377,27 @@ def execute(
         *,
         confirm: ConfirmCallback | None = None,
         on_auto_approve: ToolDisplayCallback | None = None,
-        on_live_output: ToolLiveOutputCallback | None = None,
-        on_live_done: ToolLiveDoneCallback | None = None,
     ) -> None:
         executions = []
         self.skipped_after_failure_count = 0
         self.skipped_after_failure_key = ""
-        items = self._merge_adjacent_tool_calls(self._dedupe_readonly_tool_calls(tool_calls))
+        items = self._dedupe_readonly_tool_calls(tool_calls)
         for index, item in enumerate(items):
             call: ParsedToolCall | None = None
             outcome = "success"
             output = ""
             error_type: Type[Exception] | None = None
             requires_confirmation = False
-            requires_verification = False
+            requires_checks = False
             try:
-                if isinstance(item, PreparedToolCall):
-                    call = item.call
-                    tool = item.tool
-                else:
-                    call = item if isinstance(item, ParsedToolCall) else self.parse_tool_call(item)
-                    tool = self._make_tool(call)
-                requires_verification = tool.effect() == ToolEffect.EDIT
+                call = item if isinstance(item, ParsedToolCall) else self.parse_tool_call(item)
+                tool_class = TOOL_REGISTRY.get(call.name)
+                if tool_class is None:
+                    raise ToolCallArgError("tool not found: " + call.name)
+                tool = tool_class.make(self.session, call.args)
+                if isinstance(tool, BashTool):
+                    tool.live_output = self.live_output
+                requires_checks = tool.EFFECT == ToolEffect.EDIT
                 preview_error = getattr(tool, "preview_error", None)
                 if callable(preview_error):
                     preview_error_text = str(preview_error())
@@ -4005,7 +4417,7 @@ def execute(
                             if reason:
                                 raise Cancellation("user refused: " + reason)
                             raise Cancellation("user refused")
-                output = self._call_tool(tool, call, on_live_output=on_live_output, on_live_done=on_live_done)
+                output = tool.call()
                 exit_match = re.search(r"^\* exit_code: (-?\d+)$", output, re.MULTILINE)
                 if exit_match and int(exit_match.group(1)) != 0:
                     outcome = "failure"
@@ -4018,10 +4430,14 @@ def execute(
                 output = "ToolCallError: " + str(error)
                 error_type = type(error)
             if call is None:
-                call = self._invalid_tool_call(item)
+                raw = _json_dict(item)
+                summary = "invalid tool action"
+                if _json_str(raw.get("type")) == "tool" and not _json_str(raw.get("name")):
+                    summary += ": missing required field name"
+                call = ParsedToolCall(name="InvalidToolCall", intention=summary, args=[])
             result_key = ""
             result_excerpted = False
-            if call.name != ToolResultTool.name():
+            if call.name != ToolResultTool.NAME:
                 result_key = self._store_tool_result(call, outcome, output)
                 item = self.session.state.tool_result_store[result_key]
                 output = item.value
@@ -4034,7 +4450,7 @@ def execute(
                 error_type=error_type,
                 result_key=result_key,
                 result_excerpted=result_excerpted,
-                requires_verification=outcome == "success" and requires_verification,
+                requires_checks=outcome == "success" and requires_checks,
             )
             executions.append(execution)
             if outcome == "failure" and error_type is not Cancellation:
@@ -4052,30 +4468,6 @@ def _readonly_call_key(self, call: ParsedToolCall) -> tuple[str, tuple[str, ...]
             return None
         return call.name, _tool_call_args_key(call.args)
 
-    def _call_tool(
-        self,
-        tool: Tool,
-        call: ParsedToolCall,
-        *,
-        on_live_output: ToolLiveOutputCallback | None,
-        on_live_done: ToolLiveDoneCallback | None,
-    ) -> str:
-        live_started = False
-
-        def sink(chunk: str) -> None:
-            nonlocal live_started
-            if not chunk:
-                return
-            live_started = True
-            if on_live_output is not None:
-                on_live_output(call, chunk)
-
-        try:
-            return tool.call_live(sink if on_live_output is not None else None)
-        finally:
-            if live_started and on_live_done is not None:
-                on_live_done(call)
-
     def _dedupe_readonly_tool_calls(self, tool_calls: list[JsonValue]) -> list[JsonValue | ParsedToolCall]:
         filtered: list[JsonValue | ParsedToolCall] = []
         for item in tool_calls:
@@ -4088,7 +4480,7 @@ def _dedupe_readonly_tool_calls(self, tool_calls: list[JsonValue]) -> list[JsonV
             if key is not None and filtered and isinstance(filtered[-1], ParsedToolCall) and self._readonly_call_key(filtered[-1]) == key:
                 filtered[-1] = call
                 continue
-            if call.name == ToolResultTool.name() and filtered and isinstance(filtered[-1], ParsedToolCall) and filtered[-1].name == call.name:
+            if call.name == ToolResultTool.NAME and filtered and isinstance(filtered[-1], ParsedToolCall) and filtered[-1].name == call.name:
                 merged_args = list(filtered[-1].args)
                 merged_args.extend(arg for arg in call.args if arg not in merged_args)
                 filtered[-1] = ParsedToolCall(name=call.name, intention=call.intention, args=merged_args)
@@ -4096,61 +4488,15 @@ def _dedupe_readonly_tool_calls(self, tool_calls: list[JsonValue]) -> list[JsonV
             filtered.append(call)
         return filtered
 
-    def _merge_adjacent_tool_calls(self, tool_calls: list[JsonValue | ParsedToolCall]) -> list[JsonValue | ParsedToolCall | PreparedToolCall]:
-        merged: list[JsonValue | ParsedToolCall | PreparedToolCall] = []
-        index = 0
-        while index < len(tool_calls):
-            item = tool_calls[index]
-            merge_key = self._merge_key(item)
-            if merge_key is None:
-                merged.append(item)
-                index += 1
-                continue
-
-            group = [item]
-            index += 1
-            while index < len(tool_calls):
-                next_item = tool_calls[index]
-                if self._merge_key(next_item) != merge_key:
-                    break
-                group.append(next_item)
-                index += 1
-
-            if len(group) == 1:
-                merged.append(item)
-                continue
-
-            prepared = self._merge_calls(group)
-            if prepared is None:
-                merged.extend(group)
-            else:
-                merged.append(prepared)
-        return merged
-
-    def _merge_key(self, item: JsonValue | ParsedToolCall) -> tuple[str, tuple[str, ...]] | None:
-        if not isinstance(item, ParsedToolCall) or item.name != ReplaceRangeTool.name():
-            return None
-        key = ReplaceRangeTool.merge_key(item)
-        if key is None:
-            return None
-        return (item.name, key)
-
-    def _merge_calls(self, group: list[JsonValue | ParsedToolCall]) -> PreparedToolCall | None:
-        parsed_group = [item for item in group if isinstance(item, ParsedToolCall)]
-        if len(parsed_group) != len(group):
-            return None
-        if parsed_group[0].name != ReplaceRangeTool.name():
-            return None
-        return ReplaceRangeTool.merge_calls(self.session, parsed_group)
-
     def _store_tool_result(self, call: ParsedToolCall, outcome: str, output: str) -> str:
         self.session.state.tool_result_counter += 1
         key = "tr." + str(self.session.state.tool_result_counter)
-        description = outcome + " " + ToolCallDisplayFormatter._format_call(call)
+        description = outcome + " " + ToolCallDisplayFormatter.format_call(call)
         if call.intention:
             description += " - " + call.intention
         log_path = self._write_tool_result_log(key, output)
-        bounded = _bound_tool_output(output, log_path=log_path)
+        tool_class = TOOL_REGISTRY.get(call.name)
+        bounded = _bound_tool_output(output, log_path=log_path, max_chars=tool_class.OUTPUT_CHARS if tool_class is not None else MAX_TOOL_OUTPUT_CHARS)
         self.session.state.tool_result_store[key] = ToolResultItem(
             description=description,
             value=bounded.value,
@@ -4159,7 +4505,13 @@ def _store_tool_result(self, call: ParsedToolCall, outcome: str, output: str) ->
             original_chars=bounded.original_chars,
             excerpted=bounded.excerpted,
         )
-        self._trim_tool_result_store()
+        keep = self.protected_result_keys()
+        for old_key in list(self.session.state.tool_result_store):
+            if len(self.session.state.tool_result_store) <= self.MAX_TOOL_RESULT_STORE_ITEMS:
+                break
+            if old_key in keep:
+                continue
+            self.session.state.tool_result_store.pop(old_key)
         return key
 
     def _write_tool_result_log(self, key: str, output: str) -> str:
@@ -4178,39 +4530,14 @@ def _write_tool_result_log(self, key: str, output: str) -> str:
                 continue
         return ""
 
-    def _trim_tool_result_store(self) -> None:
-        keep = self.protected_result_keys()
-        for old_key in list(self.session.state.tool_result_store):
-            if len(self.session.state.tool_result_store) <= self.MAX_TOOL_RESULT_STORE_ITEMS:
-                return
-            if old_key in keep:
-                continue
-            self.session.state.tool_result_store.pop(old_key)
-
     def parse_tool_call(self, value: JsonValue) -> ParsedToolCall:
         item = _json_dict(value)
         name = _json_str(item.get("name"))
         if not name:
             raise ToolCallArgError('tool action missing required field: name. Use {"type":"tool","name":"Read","intention":"...","args":["path"]}.')
-        if name not in TOOL_REGISTRY and name == name.lower():
-            name = next((registered_name for registered_name in TOOL_REGISTRY if registered_name.lower() == name), name)
+        name = _canonical_tool_name(name)
         intention = _json_str(item.get("intention")) or ""
-        raw_args = _json_list(item.get("args"))
-        args: list[JsonValue] = list(raw_args) if name == ReplaceRangeTool.name() else [_json_str(arg) or "" for arg in raw_args]
-        return ParsedToolCall(name=name, intention=intention, args=args)
-
-    def _invalid_tool_call(self, value: JsonValue) -> ParsedToolCall:
-        item = _json_dict(value)
-        summary = "invalid tool action"
-        if _json_str(item.get("type")) == "tool" and not _json_str(item.get("name")):
-            summary += ": missing required field name"
-        return ParsedToolCall(name="InvalidToolCall", intention=summary, args=[])
-
-    def _make_tool(self, call: ParsedToolCall) -> Tool:
-        tool_class = TOOL_REGISTRY.get(call.name)
-        if tool_class is None:
-            raise ToolCallArgError("tool not found: " + call.name)
-        return tool_class.make(self.session, call.args)
+        return ParsedToolCall(name=name, intention=intention, args=list(_json_list(item.get("args"))))
 
 
 ############################
@@ -4218,18 +4545,14 @@ def _make_tool(self, call: ParsedToolCall) -> Tool:
 ############################
 
 
-STABLE_KNOWLEDGE_CATEGORIES: tuple[str, ...] = ("stack", "structure", "workflow", "convention", "gotcha")
-
-
 class AgentStateUpdater:
     DISPLAY_LIMIT: ClassVar[int] = 5
     COMPACT_DISPLAY_LIMIT: ClassVar[int] = 3
     MAX_KNOWN_ITEMS: ClassVar[int] = 500
-    MAX_STABLE_KNOWLEDGE_ITEMS_PER_CATEGORY: ClassVar[int] = 30
-    VERIFY_STATUS_ACTIONS: ClassVar[dict[str, VerificationStatus]] = {
-        "passed": VerificationStatus.DONE,
-        "failed": VerificationStatus.FAILED,
-        "blocked": VerificationStatus.BLOCKED,
+    CHECK_STATUS_ACTIONS: ClassVar[dict[str, CheckStatus]] = {
+        "passed": CheckStatus.PASSED,
+        "failed": CheckStatus.FAILED,
+        "blocked": CheckStatus.BLOCKED,
     }
 
     def __init__(
@@ -4244,44 +4567,47 @@ def __init__(
         self.changed = False
 
     def apply(self, response: Json) -> None:
-        actions = self._actions(response)
+        actions = [action for action in (_json_dict(item) for item in _json_list(response.get("actions"))) if action]
         before_goal = self.blackboard.goal
         before_plan = [item.format() for item in self.blackboard.plan]
-        before_hypotheses = [item.format() for item in self.blackboard.hypotheses]
+        before_leads = [item.format() for item in self.blackboard.leads]
         before_known = [KnownItem.format_item(item) for item in self.blackboard.known]
         before_user_rules = self.session.state.user_rules.format()
-        before_extra_state = self._before_extra_state()
+        before_checks = self.blackboard.checks.format()
         goal_changed = self._apply_goal(actions)
-        plan_replaced = self._apply_plan(actions)
+        plan_replaced = self._apply_plan(actions, replace_by_default=goal_changed)
         if goal_changed and not plan_replaced:
             self.blackboard.plan = []
-        self._apply_work_mode(actions)
-        self._apply_known(actions)
-        self._apply_hypotheses(actions)
-        self._apply_user_rules(actions)
-        self._apply_extra_state(actions, goal_changed=goal_changed, plan_replaced=plan_replaced)
+        for raw in self._action_items(actions, "known"):
+            item = KnownItem.from_json(raw)
+            if item is not None:
+                self._add_known_item(item.text, item.source)
+        for raw in self._action_items(actions, "lead"):
+            item = Lead.from_json(raw)
+            if item is not None:
+                self._add_lead(item)
+        user_rules_changed = False
+        for action in self._actions_of_type(actions, "user_rule"):
+            rule = (_json_str(action.get("text")) or "").strip()
+            user_rules_changed = self.session.state.user_rules.add(rule) or user_rules_changed
+        if user_rules_changed:
+            self.session.save_user_rules()
+        if goal_changed:
+            self.blackboard.checks_required = False
+        self._reset_stale_checks(actions, goal_changed=goal_changed, plan_replaced=plan_replaced)
+        self._apply_checks(actions)
         self._apply_task_code(actions)
-        self.latest_report = self._format_state_report(
-            before_goal,
-            before_plan,
-            before_hypotheses,
-            before_known,
-            before_user_rules,
-            before_extra_state,
-        )
+        self.latest_report = self._format_state_report(before_goal, before_plan, before_leads, before_known, before_user_rules, before_checks)
         self.changed = bool(self.latest_report)
 
-    def _actions(self, response: Json) -> list[Json]:
-        return [action for action in (_json_dict(item) for item in _json_list(response.get("actions"))) if action]
-
     def _format_state_report(
         self,
         before_goal: str,
         before_plan: list[str],
-        before_hypotheses: list[str],
+        before_leads: list[str],
         before_known: list[str],
         before_user_rules: str,
-        before_extra_state: str,
+        before_checks: str,
     ) -> str:
         current = self.blackboard
         lines = []
@@ -4291,83 +4617,74 @@ def _format_state_report(
         self.latest_compact_plan_rows = []
         if plan != before_plan:
             self.latest_compact_plan_rows = self._compact_changed_plan_rows(before_plan, plan)
-            self._append_state_section(lines, "  Plan", self._format_plan_rows())
-        hypotheses = [item.format() for item in current.hypotheses]
-        if hypotheses != before_hypotheses:
-            self._append_state_section(lines, "  Hypotheses", self._format_hypothesis_rows())
+
+            def render_plan_row(index: int, item: PlanItem) -> list[str]:
+                rows = ["    " + str(index) + ". [" + str(item.status) + "] " + self._compact(item.text)]
+                rows += ["       context: " + self._compact(item.context)] if item.context else []
+                rows += ["       followup_action: " + item.followup_action.format()] if item.followup_action.status != PlanFollowupStatus.UNKNOWN else []
+                rows += ["       followup_check: " + item.followup_check.format()] if item.followup_check.status != PlanFollowupStatus.UNKNOWN else []
+                return rows
+
+            self._append_state_section(lines, "  Plan", self._format_rows(current.plan, render_plan_row))
+        leads = [item.format() for item in current.leads]
+        if leads != before_leads:
+            self._append_state_section(lines, "  Leads", self._format_rows(current.leads, lambda index, item: f"    {index}. {self._compact(item.format())}"))
         known = [KnownItem.format_item(item) for item in current.known]
         if known != before_known:
-            self._append_state_section(lines, "  Known", self._format_known_rows())
+            self._append_state_section(
+                lines, "  Facts", self._format_rows(current.known, lambda index, item: f"    {index}. {self._compact(KnownItem.format_item(item))}")
+            )
         user_rules = self.session.state.user_rules.format()
         if user_rules != before_user_rules:
             self._append_state_section(lines, "  User_Rules    updated")
-        self._append_extra_state_report(lines, before_extra_state)
+        checks = self.blackboard.checks.format()
+        if checks != before_checks:
+            self._append_state_section(lines, "  Checks  " + self._format_checks())
         return "\n".join(lines)
 
-    def _format_plan_rows(self) -> list[str]:
-        items = self.blackboard.plan
-        if not items:
-            return ["    (empty)"]
-        offset = max(0, len(items) - self.DISPLAY_LIMIT)
-        rows = ["    ... " + str(offset) + " older"] if offset else []
-        for index, item in enumerate(items[offset:], start=offset + 1):
-            rows.append("    " + str(index) + ". [" + str(item.status) + "] " + self._compact(item.text))
-            if item.context:
-                rows.append("       context: " + self._compact(item.context))
-        return rows
-
-    def _format_known_rows(self) -> list[str]:
-        items = self.blackboard.known
+    def _format_rows(self, items: list[Any], render: Callable[[int, Any], str | list[str]]) -> list[str]:
         if not items:
             return ["    (empty)"]
         offset = max(0, len(items) - self.DISPLAY_LIMIT)
         rows = ["    ... " + str(offset) + " older"] if offset else []
         for index, item in enumerate(items[offset:], start=offset + 1):
-            rows.append("    " + str(index) + ". " + self._compact(KnownItem.format_item(item)))
-        return rows
-
-    def _format_hypothesis_rows(self) -> list[str]:
-        items = self.blackboard.hypotheses
-        if not items:
-            return ["    (empty)"]
-        offset = max(0, len(items) - self.DISPLAY_LIMIT)
-        rows = ["    ... " + str(offset) + " older"] if offset else []
-        for index, item in enumerate(items[offset:], start=offset + 1):
-            rows.append("    " + str(index) + ". " + self._compact(item.format()))
+            rendered = render(index, item)
+            rows.extend(rendered if isinstance(rendered, list) else [rendered])
         return rows
 
     def compact_report(self) -> str:
-        sections = []
-        if "  Plan" in self.latest_report and self.blackboard.plan:
-            sections.append("Plan")
-        if "  Hypotheses" in self.latest_report and self.blackboard.hypotheses:
-            sections.append("Hypotheses")
-        if "  Known" in self.latest_report and self.blackboard.known:
-            sections.append("Known")
+        sections = [
+            (name, rows)
+            for name, changed, rows in (
+                ("Goal", "  Goal" in self.latest_report, ["  " + self._compact(self.blackboard.goal or "(empty)")]),
+                ("Plan", "  Plan" in self.latest_report and self.blackboard.plan, self.latest_compact_plan_rows or self._compact_plan_rows()),
+                (
+                    "Leads",
+                    "  Leads" in self.latest_report and self.blackboard.leads,
+                    self._compact_rows(self.blackboard.leads, lambda item: self._compact(item.format(), 100)),
+                ),
+                (
+                    "Facts",
+                    "  Facts" in self.latest_report and self.blackboard.known,
+                    self._compact_rows(self.blackboard.known, lambda item: self._compact(KnownItem.format_item(item), 100)),
+                ),
+                ("Checks", "  Checks" in self.latest_report, ["  " + self._format_checks()]),
+                ("User Rules", "  User_Rules" in self.latest_report, ["  updated"]),
+            )
+            if changed
+        ]
         if not sections:
             return ""
-        lines = [" + ".join(sections) + " Updated"]
+        lines = [" + ".join(name for name, _ in sections) + " Updated"]
         grouped = len(sections) > 1
-        if "Plan" in sections:
+        for name, rows in sections:
             if grouped:
-                lines.append("Plan")
-            lines.extend(self.latest_compact_plan_rows or self._compact_plan_rows())
-        if "Hypotheses" in sections:
-            if grouped:
-                lines.append("Hypotheses")
-            lines.extend(self._compact_hypothesis_rows())
-        if "Known" in sections:
-            if grouped:
-                lines.append("Known")
-            lines.extend(self._compact_known_rows())
+                lines.append(name)
+            lines.extend(rows)
         return "\n".join(lines)
 
     def _compact_plan_rows(self) -> list[str]:
-        items = self.blackboard.plan
-        offset = max(0, len(items) - self.COMPACT_DISPLAY_LIMIT)
-        rows = ["  ... " + str(offset) + " older"] if offset else []
-        rows.extend(self._compact_plan_row(index, item) for index, item in enumerate(items[offset:], start=offset + 1))
-        return rows
+        return self._compact_rows(self.blackboard.plan, lambda item: "[" + str(item.status) + "] " + self._compact(item.text, 90))
 
     def _compact_changed_plan_rows(self, before_plan: list[str], plan: list[str]) -> list[str]:
         if not before_plan:
@@ -4381,24 +4698,15 @@ def _compact_changed_plan_rows(self, before_plan: list[str], plan: list[str]) ->
             return self._compact_plan_rows()
         offset = max(0, len(indexes) - self.COMPACT_DISPLAY_LIMIT)
         rows = ["  ... " + str(offset) + " changed older"] if offset else []
-        rows.extend(self._compact_plan_row(index + 1, self.blackboard.plan[index]) for index in indexes[offset:])
-        return rows
-
-    def _compact_plan_row(self, index: int, item: PlanItem) -> str:
-        return "  " + str(index) + ". [" + str(item.status) + "] " + self._compact(item.text, 90)
-
-    def _compact_known_rows(self) -> list[str]:
-        items = self.blackboard.known
-        offset = max(0, len(items) - self.COMPACT_DISPLAY_LIMIT)
-        rows = ["  ... " + str(offset) + " older"] if offset else []
-        rows.extend("  " + str(index) + ". " + self._compact(KnownItem.format_item(item), 100) for index, item in enumerate(items[offset:], start=offset + 1))
+        for index in indexes[offset:]:
+            item = self.blackboard.plan[index]
+            rows.append("  " + str(index + 1) + ". [" + str(item.status) + "] " + self._compact(item.text, 90))
         return rows
 
-    def _compact_hypothesis_rows(self) -> list[str]:
-        items = self.blackboard.hypotheses
+    def _compact_rows(self, items: list[Any], render: Callable[[Any], str]) -> list[str]:
         offset = max(0, len(items) - self.COMPACT_DISPLAY_LIMIT)
         rows = ["  ... " + str(offset) + " older"] if offset else []
-        rows.extend("  " + str(index) + ". " + self._compact(item.format(), 100) for index, item in enumerate(items[offset:], start=offset + 1))
+        rows.extend("  " + str(index) + ". " + render(item) for index, item in enumerate(items[offset:], start=offset + 1))
         return rows
 
     def _compact(self, text: str, limit: int = 140) -> str:
@@ -4407,46 +4715,34 @@ def _compact(self, text: str, limit: int = 140) -> str:
 
     def _apply_goal(self, actions: list[Json]) -> bool:
         changed = False
-        for action in actions:
-            action_type = _json_str(action.get("type"))
-            if action_type == "start":
-                update = _json_str(action.get("goal"))
-                if update:
-                    goal_changed = update != self.blackboard.goal
-                    changed = changed or goal_changed
-                    self.blackboard.goal = update
-                    self.blackboard.goal_reached = False
-            if action_type == "goal":
-                update = _json_str(action.get("text"))
-                complete = action.get("complete")
-                if update is not None:
-                    goal_changed = update != self.blackboard.goal
-                    changed = changed or (goal_changed and complete is not True)
-                    self.blackboard.goal = update
-                if isinstance(complete, bool):
-                    self.blackboard.goal_reached = complete
+        for action in self._actions_of_type(actions, "goal"):
+            update = _json_str(action.get("text"))
+            complete = action.get("complete")
+            if update is not None:
+                goal_changed = update != self.blackboard.goal
+                changed = changed or (goal_changed and complete is not True)
+                self.blackboard.goal = update
+            if isinstance(complete, bool):
+                self.blackboard.goal_reached = complete
         return changed
 
-    def _apply_plan(self, actions: list[Json]) -> bool:
+    def _apply_plan(self, actions: list[Json], *, replace_by_default: bool = False) -> bool:
         replaced = False
-        for start in [action for action in actions if _json_str(action.get("type")) == "start"]:
-            items = [item for item in (self._plan_item_from_json(raw) for raw in _json_list(start.get("plan"))) if item]
-            if items:
-                self._normalize_doing_items(items)
-                self.blackboard.plan = items
-                replaced = True
-        for update in [action for action in actions if _json_str(action.get("type")) == "plan"]:
+        for update in self._actions_of_type(actions, "plan"):
             items = _json_list(update.get("items"))
-            if update.get("mode") != "patch":
-                if not items:
-                    continue
-                plan = [item for item in (self._plan_item_from_json(raw) for raw in items) if item]
-                self._normalize_doing_items(plan)
-                self.blackboard.plan = plan
-                replaced = True
+            mode = _json_str(update.get("mode"))
+            existing_ids = {item.id for item in self.blackboard.plan if item.id}
+            targets_existing = bool(existing_ids) and any(_json_str(_json_dict(raw).get("id")) in existing_ids for raw in items)
+            if mode == "patch" or (not replace_by_default and mode != "replace" and targets_existing):
+                if self._apply_plan_patches(self.blackboard.plan, items):
+                    self._normalize_doing_items(self.blackboard.plan)
                 continue
-            if self._apply_plan_patches(self.blackboard.plan, items):
-                self._normalize_doing_items(self.blackboard.plan)
+            if not items:
+                continue
+            plan = [item for item in (self._plan_item_from_json(raw) for raw in items) if item]
+            self._normalize_doing_items(plan)
+            self.blackboard.plan = plan
+            replaced = True
         return replaced
 
     def _apply_plan_patches(self, plan: list[PlanItem], value: JsonValue) -> bool:
@@ -4465,13 +4761,21 @@ def _apply_plan_patches(self, plan: list[PlanItem], value: JsonValue) -> bool:
                 text = _json_str(patch.get("text")) if "text" in patch else None
                 status = _json_str(patch.get("status")) if "status" in patch else None
                 context = _json_str(patch.get("context")) if "context" in patch else existing.context
+                followup_action = (
+                    self._plan_followup(patch.get("followup_action"), existing.followup_action) if "followup_action" in patch else existing.followup_action
+                )
+                followup_check = (
+                    self._plan_followup(patch.get("followup_check"), existing.followup_check) if "followup_check" in patch else existing.followup_check
+                )
                 updated = (
                     text or existing.text,
                     PlanStatus(status) if status in ALL_PLAN_STATUSES else existing.status,
                     context or "",
+                    followup_action,
+                    followup_check,
                 )
-                changed = changed or (existing.text, existing.status, existing.context) != updated
-                existing.text, existing.status, existing.context = updated
+                changed = changed or (existing.text, existing.status, existing.context, existing.followup_action, existing.followup_check) != updated
+                existing.text, existing.status, existing.context, existing.followup_action, existing.followup_check = updated
                 continue
             plan_item = self._plan_item_from_json(patch)
             if plan_item is None:
@@ -4481,6 +4785,9 @@ def _apply_plan_patches(self, plan: list[PlanItem], value: JsonValue) -> bool:
         return changed
 
     def _plan_item_from_json(self, value: JsonValue) -> PlanItem | None:
+        if isinstance(value, str):
+            text = value.strip()
+            return PlanItem(text=text) if text else None
         item = _json_dict(value)
         text = _json_str(item.get("text"))
         if not text:
@@ -4493,8 +4800,24 @@ def _plan_item_from_json(self, value: JsonValue) -> PlanItem | None:
             status=PlanStatus(status),
             id=_json_str(item.get("id")) or "",
             context=_json_str(item.get("context")) or "",
+            followup_action=self._plan_followup(item.get("followup_action")),
+            followup_check=self._plan_followup(item.get("followup_check")),
         )
 
+    @staticmethod
+    def _plan_followup(value: JsonValue, default: PlanFollowup | None = None) -> PlanFollowup:
+        fallback = default or PlanFollowup()
+        item = _json_dict(value)
+        if not item:
+            return fallback
+        raw_status = _json_str(item.get("status"))
+        status = PlanFollowupStatus(raw_status) if raw_status in ALL_PLAN_FOLLOWUP_STATUSES else fallback.status
+        reason_value = _json_str(item.get("reason")) if "reason" in item else fallback.reason
+        reason = _shorten(" ".join((reason_value or "").split()), 240)
+        if status != fallback.status and "reason" not in item:
+            reason = ""
+        return PlanFollowup(status=status, reason=reason)
+
     @staticmethod
     def _normalize_doing_items(plan: list[PlanItem]) -> None:
         seen = False
@@ -4506,37 +4829,14 @@ def _normalize_doing_items(plan: list[PlanItem]) -> None:
             else:
                 seen = True
 
-    def _apply_known(self, actions: list[Json]) -> None:
-        for action in actions:
-            values = _json_list(action.get("items")) if _json_str(action.get("type")) == "known" else []
-            for raw in values:
-                item = KnownItem.from_json(raw)
-                if item is not None:
-                    self._add_known_item(item.text, item.source)
-
-    def _apply_hypotheses(self, actions: list[Json]) -> None:
-        for action in actions:
-            values = _json_list(action.get("items")) if _json_str(action.get("type")) == "hypothesis" else []
-            for raw in values:
-                item = Hypothesis.from_json(raw)
-                if item is not None:
-                    self._add_hypothesis(item)
-
-    def _apply_work_mode(self, actions: list[Json]) -> None:
-        for action in actions:
-            if _json_str(action.get("type")) != "start":
-                continue
-            mode = _json_str(action.get("work_mode")) or WorkMode.NORMAL
-            self.blackboard.work_mode = WorkMode(mode) if mode in ALL_WORK_MODES else WorkMode.NORMAL
-
-    def _add_hypothesis(self, item: Hypothesis) -> None:
-        for index, existing in enumerate(self.blackboard.hypotheses):
+    def _add_lead(self, item: Lead) -> None:
+        for index, existing in enumerate(self.blackboard.leads):
             same_id = item.id and item.id == existing.id
-            same_text = self._hypothesis_key(item.text) == self._hypothesis_key(existing.text)
+            same_text = self._lead_key(item.text) == self._lead_key(existing.text)
             if not same_id and not same_text:
                 continue
             source = tuple(dict.fromkeys((*existing.source, *item.source)))
-            self.blackboard.hypotheses[index] = Hypothesis(
+            self.blackboard.leads[index] = Lead(
                 text=item.text or existing.text,
                 status=item.status,
                 id=item.id or existing.id,
@@ -4544,209 +4844,101 @@ def _add_hypothesis(self, item: Hypothesis) -> None:
                 context=item.context or existing.context,
             )
             return
-        self.blackboard.hypotheses.append(item)
+        self.blackboard.leads.append(item)
 
-    def _hypothesis_key(self, text: str) -> str:
+    def _lead_key(self, text: str) -> str:
         return re.sub(r"\s+", " ", text).strip(" \t\r\n。.;；").lower()
 
-    def _apply_user_rules(self, actions: list[Json]) -> None:
-        changed = False
-        for action in actions:
-            if _json_str(action.get("type")) != "user_rule":
-                continue
-            rule = (_json_str(action.get("text")) or "").strip()
-            changed = self.session.state.user_rules.add(rule) or changed
-        if changed:
-            self.session.save_user_rules()
-
     def _add_known_item(self, fact: str, source: tuple[str, ...] = ()) -> None:
         fact = _shorten(" ".join(fact.split()))
+        fact_key = self._known_fact_key(fact)
         for index, existing in enumerate(self.blackboard.known):
-            if self._known_facts_overlap(existing, fact):
-                text = KnownItem.text_of(existing)
-                merged_source = tuple(dict.fromkeys((*KnownItem.source_of(existing), *source)))
-                if len(fact) > len(text):
-                    self.blackboard.known[index] = KnownItem(text=fact, source=merged_source)
-                elif merged_source != KnownItem.source_of(existing):
-                    self.blackboard.known[index] = KnownItem(text=text, source=merged_source)
-                return
+            existing_key = self._known_fact_key(existing)
+            if existing_key != fact_key and not (min(len(existing_key), len(fact_key)) >= 32 and (existing_key in fact_key or fact_key in existing_key)):
+                continue
+            text = KnownItem.text_of(existing)
+            merged_source = tuple(dict.fromkeys((*KnownItem.source_of(existing), *source)))
+            if len(fact) > len(text):
+                self.blackboard.known[index] = KnownItem(text=fact, source=merged_source)
+            elif merged_source != KnownItem.source_of(existing):
+                self.blackboard.known[index] = KnownItem(text=text, source=merged_source)
+            return
         self.blackboard.known.append(KnownItem(text=fact, source=source))
         del self.blackboard.known[: max(0, len(self.blackboard.known) - self.MAX_KNOWN_ITEMS)]
 
-    def _known_facts_overlap(self, left: KnownItem | str, right: KnownItem | str) -> bool:
-        left_key = self._known_fact_key(left)
-        right_key = self._known_fact_key(right)
-        if left_key == right_key:
-            return True
-        return min(len(left_key), len(right_key)) >= 32 and (left_key in right_key or right_key in left_key)
-
     def _known_fact_key(self, fact: KnownItem | str) -> str:
         return re.sub(r"\s+", " ", KnownItem.text_of(fact)).strip(" \t\r\n。.;；").lower()
 
-    def _before_extra_state(self) -> str:
-        return json.dumps(
-            {
-                "verification": self.blackboard.verification.format(),
-                "stable_knowledge": self.blackboard.stable_knowledge,
-            },
-            ensure_ascii=False,
-        )
-
-    def _apply_extra_state(self, actions: list[Json], *, goal_changed: bool, plan_replaced: bool) -> None:
-        self._apply_stable_knowledge(actions)
-        if goal_changed:
-            self.blackboard.verification_required = False
-        self._reset_stale_verification(actions, goal_changed=goal_changed, plan_replaced=plan_replaced)
-        self._apply_verification(actions)
-        self._bind_verification_goal()
-
     def _apply_task_code(self, actions: list[Json]) -> None:
         action_types = {_json_str(action.get("type")) for action in actions}
-        if self.blackboard.verification_required or self.blackboard.verification.status == VerificationStatus.REQUIRED:
-            self.blackboard.task_code = TaskCode.VERIFYING
+        if self.blackboard.checks_required or self.blackboard.checks.status == CheckStatus.REQUIRED:
+            self.blackboard.task_code = TaskCode.CHECKING
             return
         if "verify" in action_types:
             self.blackboard.task_code = TaskCode.WORKING
             return
-        if "start" in action_types:
-            self.blackboard.task_code = TaskCode.WORKING
-            return
-        if any(action_type in action_types for action_type in ("goal", "plan", "known", "stable_knowledge", "progress", "tool")) and not self.blackboard.goal_reached:
+        tracked_state = bool(self.blackboard.goal or self.blackboard.plan or self.blackboard.leads)
+        if (
+            "goal" in action_types or "plan" in action_types or "lead" in action_types or (tracked_state and "tool" in action_types)
+        ) and not self.blackboard.goal_reached:
             self.blackboard.task_code = TaskCode.WORKING
 
     def _append_state_section(self, lines: list[str], title: str, rows: list[str] | None = None) -> None:
-        if not lines:
-            lines.append("State Updated | VERIFY:" + self.blackboard.verification.status)
         lines.append(title)
         lines.extend(rows or [])
 
-    def _append_extra_state_report(self, lines: list[str], before_extra_state: str) -> None:
-        try:
-            before = _json_dict(json.loads(before_extra_state))
-        except json.JSONDecodeError:
-            before = {}
-        if self.blackboard.stable_knowledge != before.get("stable_knowledge", []):
-            self._append_state_section(lines, "  Stable_Knowledge", self._format_stable_knowledge_rows())
-        verification = self.blackboard.verification.format()
-        if verification == before.get("verification", ""):
-            return
-        self._append_state_section(lines, "  Verify  " + self._format_verification())
-
-    def _format_stable_knowledge_rows(self) -> list[str]:
-        knowledge = self.blackboard.stable_knowledge
-        if not any(knowledge.values()):
-            return ["    (empty)"]
-        rows = []
-        for category in STABLE_KNOWLEDGE_CATEGORIES:
-            items = knowledge.get(category, [])
-            if not items:
-                continue
-            rows.append("    " + category)
-            offset = max(0, len(items) - self.DISPLAY_LIMIT)
-            if offset:
-                rows.append("      ... " + str(offset) + " older")
-            for index, item in enumerate(items[offset:], start=offset + 1):
-                rows.append("      " + str(index) + ". " + self._compact(item))
-        return rows
-
-    def _apply_stable_knowledge(self, actions: list[Json]) -> None:
-        for action in actions:
-            values = _json_list(action.get("items")) if _json_str(action.get("type")) == "stable_knowledge" else []
-            for raw in values:
-                category, fact = self._stable_knowledge_item_from_json(raw)
-                if fact:
-                    self._add_stable_knowledge_item(category, fact)
+    @staticmethod
+    def _actions_of_type(actions: list[Json], action_type: str) -> Iterator[Json]:
+        return (action for action in actions if _json_str(action.get("type")) == action_type)
 
-    def _stable_knowledge_item_from_json(self, value: JsonValue) -> tuple[str, str]:
-        item = _json_dict(value)
-        if item:
-            category = _json_str(item.get("category")) or "gotcha"
-            fact = (_json_str(item.get("text")) or _json_str(item.get("fact")) or "").strip()
-        else:
-            category = "gotcha"
-            fact = (_json_str(value) or "").strip()
-        if category not in STABLE_KNOWLEDGE_CATEGORIES:
-            category = "gotcha"
-        return category, fact
-
-    def _add_stable_knowledge_item(self, category: str, fact: str) -> None:
-        knowledge = self.blackboard.stable_knowledge
-        items = knowledge.setdefault(category, [])
-        if fact in items:
-            return
-        items.append(fact)
-        del items[: max(0, len(items) - self.MAX_STABLE_KNOWLEDGE_ITEMS_PER_CATEGORY)]
+    def _action_items(self, actions: list[Json], action_type: str) -> Iterator[JsonValue]:
+        return (raw for action in self._actions_of_type(actions, action_type) for raw in _json_list(action.get("items")))
 
-    def _format_verification(self) -> str:
-        verification = self.blackboard.verification
-        parts = [verification.status]
+    def _format_checks(self) -> str:
+        checks = self.blackboard.checks
+        parts = [checks.status]
         parts.extend(
             part
             for part in (
-                verification.kind,
-                self._compact(verification.method) if verification.method else "",
-                "criteria: " + self._compact("; ".join(verification.criteria)) if verification.criteria else "",
-                "context: " + self._compact(verification.context) if verification.context else "",
-                "blocker: " + verification.blocker if verification.blocker else "",
+                self._compact(checks.method) if checks.method else "",
+                "context: " + self._compact(checks.context) if checks.context else "",
+                "blocker: " + checks.blocker if checks.blocker else "",
             )
             if part
         )
         return " | ".join(parts)
 
-    def _apply_verification(self, actions: list[Json]) -> None:
-        for data in [action for action in actions if _json_str(action.get("type")) == "verify"]:
-            kind = _json_str(data.get("kind"))
-            if kind is not None:
-                self.blackboard.verification.kind = kind if kind and all(part in VALID_VERIFICATION_KINDS for part in kind.split("+")) else ""
-            criteria = [item for item in ((_json_str(raw) or "").strip() for raw in _json_list(data.get("criteria"))) if item]
-            if "criteria" in data:
-                self.blackboard.verification.criteria = criteria
+    def _apply_checks(self, actions: list[Json]) -> None:
+        for data in self._actions_of_type(actions, "verify"):
             method = _json_str(data.get("method"))
             if method is not None:
-                if method != self.blackboard.verification.method:
-                    self.blackboard.verification.context = ""
-                self.blackboard.verification.method = method
-            status = self.VERIFY_STATUS_ACTIONS.get(_json_str(data.get("status")) or "")
+                if method != self.blackboard.checks.method:
+                    self.blackboard.checks.context = ""
+                self.blackboard.checks.method = method
+            status = self.CHECK_STATUS_ACTIONS.get(_json_str(data.get("status")) or "")
             if status is not None:
-                self.blackboard.verification.status = status
-                self.blackboard.verification_required = False
-                if status != VerificationStatus.BLOCKED:
-                    self.blackboard.verification.blocker = VerificationBlocker.NONE
+                self.blackboard.checks.status = status
+                self.blackboard.checks_required = False
+                if status != CheckStatus.BLOCKED:
+                    self.blackboard.checks.blocker = CheckBlocker.NONE
             blocker = _json_str(data.get("blocker"))
             if blocker is not None:
-                self.blackboard.verification.blocker = VerificationBlocker(blocker) if blocker in ALL_VERIFICATION_BLOCKERS else VerificationBlocker.NONE
+                self.blackboard.checks.blocker = CheckBlocker(blocker) if blocker in ALL_CHECK_BLOCKERS else CheckBlocker.NONE
             context = _json_str(data.get("context"))
             if context is not None:
-                self.blackboard.verification.context = context
+                self.blackboard.checks.context = context
 
-    def _reset_stale_verification(self, actions: list[Json], *, goal_changed: bool, plan_replaced: bool) -> None:
-        verification = self.blackboard.verification
+    def _reset_stale_checks(self, actions: list[Json], *, goal_changed: bool, plan_replaced: bool) -> None:
+        checks = self.blackboard.checks
         if goal_changed:
-            verification.reset()
-            return
-        if verification.goal and verification.goal != self.blackboard.goal:
-            verification.reset()
+            checks.reset()
             return
         if (
             plan_replaced
             and not any(_json_str(action.get("type")) == "verify" for action in actions)
-            and verification.status
-            in {
-                VerificationStatus.REQUIRED,
-                VerificationStatus.DONE,
-                VerificationStatus.FAILED,
-                VerificationStatus.BLOCKED,
-            }
+            and checks.status in {CheckStatus.REQUIRED, CheckStatus.PASSED, CheckStatus.FAILED, CheckStatus.BLOCKED}
         ):
-            verification.reset()
-
-    def _bind_verification_goal(self) -> None:
-        verification = self.blackboard.verification
-        if not verification.has_context():
-            verification.goal = ""
-            return
-        if self.blackboard.goal:
-            verification.goal = self.blackboard.goal
+            checks.reset()
 
 
 ############################
@@ -4786,8 +4978,14 @@ def _summarize(self, items: list[ConversationItem]) -> tuple[str, list[KnownItem
             known="\n".join(KnownItem.format_item(item) for item in self.blackboard.known) or "(empty)",
             conversation="\n\n".join(item.format() for item in items),
         ).strip()
-        kwargs = {"parse_actions": False} if isinstance(self.model_client, ModelClient) else {}
-        response = self.model_client.request(COMPACTOR_PROMPT.strip(), user_prompt, activity="compact", **kwargs)
+        response = self.model_client.request(
+            COMPACTOR_PROMPT.strip(), user_prompt, activity="compact", tool_schemas=[COMPACT_TOOL_SCHEMA], required_tool="compact"
+        )
+        if "actions" in response:
+            response = next(
+                (_json_dict(action) for action in _json_list(response.get("actions")) if _json_str(_json_dict(action).get("type")) == "compact"),
+                {},
+            )
         summary = _json_str(response.get("summary"))
         if not summary:
             raise LLMError("compact response missing summary")
@@ -4797,14 +4995,6 @@ def _summarize(self, items: list[ConversationItem]) -> tuple[str, list[KnownItem
         return summary, known[-self.MAX_COMPACTED_KNOWN_ITEMS :]
 
 
-############################
-# Verification
-############################
-
-
-VALID_VERIFICATION_KINDS: set[str] = {"syntax_check", "change_syntax_check", "lint", "test", "build", "change_check", "other"}
-
-
 ############################
 # Agent
 ############################
@@ -4814,21 +5004,21 @@ def _summarize(self, items: list[ConversationItem]) -> tuple[str, list[KnownItem
 class ResponseContext:
     response: Json
     actions: list[Json]
+    assistant_text: str
     goal_was_empty: bool
     plan_was_empty: bool
     plan_was_complete: bool
-    verification_was_settled: bool
+    checks_settled: bool
     goal_will_change: bool
-    chat_message: str | None
     tool_calls: list[JsonValue]
-    pending_verify_requested: bool
-    progress_messages: list[str]
+    pending_check_requested: bool
     user_rule_message: str | None
     completion_message: str
     has_goal_action: bool
     has_plan_action: bool
     has_fresh_plan_action: bool
     has_user_rule_action: bool
+    has_edit_tool_call: bool
     has_state_update_action: bool
     state_or_work_requested: bool
 
@@ -4843,58 +5033,38 @@ class Agent:
     MAX_AGENT_FEEDBACK_ERRORS: ClassVar[int] = 8
     MAX_AGENT_FEEDBACK_ERROR_LEN: ClassVar[int] = 220
     MODEL_TIMEOUT_RETRY_DELAYS: ClassVar[tuple[int, ...]] = (3, 10, 20, 30, 60, 120)
-    blackboard: Blackboard
-    ACT_ACTION_TYPES: ClassVar[set[str]] = {
-        "chat",
-        "start",
-        "goal",
-        "plan",
-        "hypothesis",
-        "known",
-        "stable_knowledge",
-        "progress",
-        "tool",
-        "verify",
-        "user_rule",
-        "forget",
-    }
-    PLAN_ACTION_TYPES: ClassVar[set[str]] = ACT_ACTION_TYPES - {"chat", "user_rule", "forget"}
-    OBSERVE_ACTION_TYPES: ClassVar[set[str]] = {"keep", "hypothesis", "known", "stable_knowledge", "forget"}
+    ACT_ACTION_TYPES: ClassVar[set[str]] = {"goal", "plan", "lead", "known", "tool", "verify", "user_rule", "forget"}
+    OBSERVE_ACTION_TYPES: ClassVar[set[str]] = {"keep", "lead", "known", "forget"}
     COMPLETED_PLAN_STATUSES: ClassVar[set[PlanStatus]] = {PlanStatus.DONE, PlanStatus.BLOCKED}
     MAX_COMPLETED_GOAL_TOOL_RESULTS: ClassVar[int] = 50
     RECENT_EDITS: ClassVar[int] = 20
-    # Reducer trigger, not a pre-observe truncation limit: unreduced raw must stay visible until OBSERVE can keep or forget it.
-    TOOL_RESULT_RAW_CHARS: ClassVar[int] = 72_000
-    # Raw results explicitly kept by OBSERVE are bounded separately from latest/unreduced raw.
-    KEPT_TOOL_RESULT_CHARS: ClassVar[int] = 96_000
-    # Compact recall/timeline entries shown in Tool Result Index; current-task timeline has priority over archived entries.
-    TOOL_RESULT_INDEX_ITEMS: ClassVar[int] = 40
-    # Trigger observe after this many unresolved raw result blocks accumulate; raw-size pressure can still trigger earlier.
-    OBSERVE_AFTER_PENDING_RESULT_COUNT: ClassVar[int] = 12
-    PLAN_MODE_GIT_READONLY: ClassVar[frozenset[str]] = GIT_READONLY_COMMANDS
     RULE_VISIBLE_RESULTS: ClassVar[str] = "use visible tool result keys only."
-    RULE_CLOSE_SOURCE: ClassVar[str] = "close the hypothesis before forgetting its source."
-    RULE_CHANGE_FAILED_TOOL: ClassVar[str] = "change args or switch tools; after edit failures prefer ReplaceRange after Read."
+    RULE_CLOSE_SOURCE: ClassVar[str] = "close or update state that depends on the result before forgetting its source."
+    RULE_CHANGE_FAILED_TOOL: ClassVar[str] = "change args or switch tools; after edit failures use a smaller batch and reread only stale ranges."
     RULE_GOAL_PLAN_FIRST: ClassVar[str] = "set goal and a short plan before mutating tools or verify."
-    RULE_VERIFY_DIRECTLY: ClassVar[str] = 'run verification tools, then report verify status="passed"|"failed"|"blocked".'
+    RULE_VERIFY_DIRECTLY: ClassVar[str] = 'run checks, then report verify status="passed"|"failed"|"blocked".'
     RULE_TOOL_SIGNATURE: ClassVar[str] = "use the tool signature exactly."
-    RULE_EDIT_SIGNATURE: ClassVar[str] = "use ReplaceRange for read ranges or repeated text, and use the exact tool signature."
+    RULE_EDIT_SIGNATURE: ClassVar[str] = "use Edit(filepath, edits) with visible line anchors; split oversized batches."
     RULE_COMPLETE_PLAN: ClassVar[str] = "mark every Plan item done or blocked with result context before completion."
-    RULE_BLOCKED_BY_USER: ClassVar[str] = "complete blocked verification only when blocker=user."
-    RULE_FINAL_ACTION: ClassVar[str] = "continue with a useful action or finish with goal.complete=true."
-    RULE_ACTION_FRAMES: ClassVar[str] = "return valid JSON action frames only."
+    RULE_PLAN_FOLLOWUP: ClassVar[str] = "set followup_action and followup_check as {status, reason}; resolve needed before completion."
+    RULE_BLOCKED_BY_USER: ClassVar[str] = "complete blocked Checks only when blocker=user."
+    RULE_FUNCTION_TOOLS: ClassVar[str] = "use the provided function tools."
+    RULE_VALID_TOOL_JSON: ClassVar[str] = "rebuild valid function arguments; for Edit, use one file/logical block and split oversized batches."
+    STALE_TOOL_FEEDBACK_MARKERS: ClassVar[tuple[str, ...]] = (
+        "invalid function/tool response",
+        "invalid function-tool response",
+        "tool call args invalid",
+        "edit failed:",
+        "repeated same failed tool call",
+        "tool call was cancelled",
+        "state update-only turn",
+    )
 
     def __init__(self, session: Session):
         self.session = session
-        self.blackboard = Blackboard()
-        self.runtime = AgentRuntime()
+        self.blackboard: Blackboard = Blackboard()
+        self.recent_edits: list[str] = []
         self.tool_context = ToolResultContext()
-        self.prompt_builder = PromptBuilder(
-            session,
-            blackboard=self.blackboard,
-            runtime=self.runtime,
-            tool_context=self.tool_context,
-        )
         self.model_client = ModelClient(session)
         self.tool_runner = ToolCallRunner(session, self._protected_tool_result_keys)
         self.state_updater = AgentStateUpdater(session, self.blackboard)
@@ -4904,22 +5074,102 @@ def __init__(self, session: Session):
         self.agent_feedback_errors: list[str] = []
         self.observe_feedback_errors: list[str] = []
         self.task_alignment_required = False
+        self.incomplete_task_context_at_turn_start = False
+        self.stream_stop_requested = False
         self.mode = AgentMode.ACT
 
+    def context_budget(self) -> ContextBudget:
+        return CONTEXT_BUDGETS[self.session.settings.context_budget]
+
+    def apply_context_budget(self) -> None:
+        budget = self.context_budget()
+        checkpoint = self.blackboard.memory_checkpoint_tool_result_counter
+        self.tool_context.bound_kept(max_chars=budget.kept_chars, max_block_chars=budget.kept_block_chars)
+        self.tool_context.prune_recent(max_index_items=budget.index_items, checkpoint=checkpoint)
+
     def build_user_prompt(self) -> str:
         tool_result_index, unreduced_tool_results, latest_tool_results = self._format_act_tool_result_context()
-        return self.prompt_builder.user_prompt(
-            tool_result_index=tool_result_index,
-            unreduced_tool_results=unreduced_tool_results,
-            latest_tool_results=latest_tool_results,
-            errors=self._format_agent_feedback(),
-        )
+        conversation = self.session.state.conversation
+        return AGENT_USER_PROMPT_TEMPLATE.format(
+            environment=self._format_environment(),
+            conversation_history="\n\n".join(item.format() for item in conversation) if conversation else "(empty)",
+            user_rules=self.session.state.user_rules.format(),
+            kept_tool_results="\n\n".join(self.tool_context.kept_results) or "(empty)",
+            tool_result_index=tool_result_index or "(empty)",
+            unreduced_tool_results=unreduced_tool_results or "(empty)",
+            latest_tool_results=latest_tool_results or "(empty)",
+            state_sections=self._format_state_sections(),
+            errors="\n".join("! " + error for error in self.agent_feedback_errors) or "(empty)",
+            recent_edits="\n".join(self.recent_edits) if self.recent_edits else "(empty)",
+            pending_user_feedback=self.session.state.pending_user_feedback or "(empty)",
+            user_request=self._format_user_request(),
+        ).strip()
+
+    def _format_state_sections(self) -> str:
+        current = self.blackboard
+        sections: list[str] = []
+
+        def add(name: str, value: str) -> None:
+            value = value.strip()
+            if value:
+                sections.append(name + ":\n" + value)
+
+        add("Goal", current.goal)
+        if current.known:
+            add("Facts", "\n".join(KnownItem.format_item(item) for item in current.known))
+        if current.leads:
+            add("Leads", "\n".join(item.format() for item in current.leads))
+        if current.plan:
+            add("Plan", "\n".join(item.format() for item in current.plan))
+            focus = next((item for item in current.plan if item.status == PlanStatus.DOING), None) or next(
+                (item for item in current.plan if item.status == PlanStatus.TODO),
+                None,
+            )
+            add("Current Focus", focus.format() if focus else "(empty)")
+        if current.checks.has_context() or current.checks_required:
+            add("Checks", current.checks.format() if current.checks.has_context() else "status: required")
+        return "\n\n".join(sections) if sections else "(empty)"
+
+    def _format_environment(self) -> str:
+        lines = [
+            "- system: " + self.session.system,
+            "- arch: " + self.session.arch,
+            "- cwd: " + self.session.cwd,
+        ]
+        shell_tools = [name for name in ("find", "rg", "python3", "perl", "sed", "awk", "xargs", "grep", "jq") if shutil.which(name)]
+        if shell_tools:
+            lines.append("- detected-available-shell-commands: " + ", ".join(shell_tools))
+        if _code_index_available(self.session):
+            language_breakdown = _code_index_language_breakdown(self.session)
+            if language_breakdown:
+                lines.append("- indexed-language-breakdown: " + language_breakdown)
+            lines.append(
+                "- inspect_code_hint: Use InspectCode for structural code navigation: mode=find for symbol candidates, mode=inspect for anchored symbol source, mode=outline for file outlines. Do not pass natural language. Use Search/Read for text, config, logs, commands, and exact ranges."
+            )
+        return "\n".join(lines)
 
     def build_observe_prompt(self) -> str:
-        return self.prompt_builder.observe_user_prompt(
-            self._format_observe_tool_result_context(),
-            self._format_observe_feedback(),
-        )
+        current = self.blackboard
+        unreduced = "\n\n".join(self._unreferenced_unreduced_blocks())
+        return AGENT_OBSERVE_USER_PROMPT_TEMPLATE.format(
+            user_rules=self.session.state.user_rules.format(),
+            goal=current.goal or "(empty)",
+            plan="\n".join(item.format() for item in current.plan) if current.plan else "(empty)",
+            leads="\n".join(item.format() for item in current.leads) if current.leads else "(empty)",
+            known="\n".join(KnownItem.format_item(item) for item in current.known) if current.known else "(empty)",
+            kept_tool_results="\n\n".join(self.tool_context.kept_results) or "(empty)",
+            errors="\n".join("- " + error for error in self.observe_feedback_errors) or "(empty)",
+            unreduced_tool_results=unreduced or "(empty)",
+            user_request=self._format_user_request(),
+        ).strip()
+
+    def _system_prompt(self, template: str | None = None) -> str:
+        return (template or AGENT_SYSTEM_PROMPT).strip()
+
+    def _format_user_request(self) -> str:
+        user_request = self.blackboard.user_input or "(empty)"
+        fence = "`" * max(3, max((len(match.group(0)) for match in re.finditer(r"`{3,}", user_request)), default=0) + 1)
+        return fence + "text\n" + user_request + "\n" + fence
 
     def request(
         self,
@@ -4928,11 +5178,24 @@ def request(
         *,
         activity: str = "agent",
         on_message: MessageCallback | None = None,
+        on_stream_action: Callable[[Json], bool] | None = None,
+        tool_schemas: list[Json] | None = None,
     ) -> Json:
-        for attempt in range(len(self.MODEL_TIMEOUT_RETRY_DELAYS) + 1):
+        attempt = 0
+        while attempt <= len(self.MODEL_TIMEOUT_RETRY_DELAYS):
             try:
                 self.session.state.turn_model_calls += 1
-                return self.model_client.request(system_prompt, user_prompt, activity=activity)
+                return self.model_client.request(
+                    system_prompt,
+                    user_prompt,
+                    activity=activity,
+                    on_stream_action=on_stream_action,
+                    tool_schemas=tool_schemas,
+                )
+            except ModelRequestRetry:
+                if on_message is not None and self.session.settings.debug:
+                    on_message("Retrying: manual model retry requested.")
+                continue
             except LLMError as error:
                 timeout_reason = str(error)
                 if timeout_reason not in ("request model timeout", "request first token timeout") or attempt >= len(self.MODEL_TIMEOUT_RETRY_DELAYS):
@@ -4940,15 +5203,8 @@ def request(
                 delay = self.MODEL_TIMEOUT_RETRY_DELAYS[attempt]
                 self._set_status_notice("err:first_token" if timeout_reason == "request first token timeout" else "err:timeout")
                 if on_message is not None and self.session.settings.debug:
-                    on_message(
-                        "Retrying: " + timeout_reason + "; retry "
-                        + str(attempt + 1)
-                        + "/"
-                        + str(len(self.MODEL_TIMEOUT_RETRY_DELAYS))
-                        + " in "
-                        + str(delay)
-                        + "s."
-                    )
+                    on_message(f"Retrying: {timeout_reason}; retry {attempt + 1}/{len(self.MODEL_TIMEOUT_RETRY_DELAYS)} in {delay}s.")
+                attempt += 1
                 time.sleep(delay)
         raise LLMError("request model timeout")
 
@@ -4978,36 +5234,54 @@ def run_loop(
                 if on_before_step is not None:
                     on_before_step(index, max_steps)
                 response = self.step(on_message=on_message)
+                DebugTrace.loop_event(self, "loop-step", index=index + 1, response=response)
                 format_error = _json_str(response.get("_format_error"))
                 if format_error:
                     consecutive_format_errors += 1
-                    self._set_status_notice("err:format")
-                    remember_error = self._remember_observe_error if self.mode == AgentMode.OBSERVE else self._remember_agent_error
-                    remember_error(
-                        self._format_gate_user_message("Error: model returned invalid output", format_error) + " Rule: " + self.RULE_ACTION_FRAMES
-                    )
                     if consecutive_format_errors >= self.MAX_CONSECUTIVE_FORMAT_ERRORS:
                         if on_format_error_limit is not None:
+                            self._remember_format_gate(format_error)
                             return on_format_error_limit(response, format_error)
-                        self._report_gate(
-                            on_message,
-                            "Stopped: model returned invalid output " + str(self.MAX_CONSECUTIVE_FORMAT_ERRORS) + " times in a row.",
-                            "Format_Gate: stopped after "
-                            + str(self.MAX_CONSECUTIVE_FORMAT_ERRORS)
-                            + " consecutive invalid model outputs. "
-                            + self._format_gate_debug_details(response, format_error),
-                        )
-                        raise LLMError(
-                            "model returned invalid output " + str(self.MAX_CONSECUTIVE_FORMAT_ERRORS) + " times in a row: " + _shorten(format_error, 300)
-                        )
-                    self._report_gate(
-                        on_message,
-                        self._format_gate_user_message("Retrying: model returned invalid output", format_error),
-                        "Format_Gate: retrying model response. " + self._format_gate_debug_details(response, format_error),
-                    )
+                    self._handle_format_gate(response, format_error, consecutive_format_errors, on_message)
                     continue
                 consecutive_format_errors = 0
                 result = on_step(response)
+                DebugTrace.loop_event(self, "loop-result", index=index + 1, response=response, result=result)
+                if result.done:
+                    return result.value
+            return on_step_limit()
+        except KeyboardInterrupt:
+            self.cancel_current_goal()
+            raise
+
+    def run_stream_loop(
+        self,
+        *,
+        max_steps: int,
+        on_message: MessageCallback | None = None,
+        confirm: ConfirmCallback | None = None,
+        on_auto_approve: ToolDisplayCallback | None = None,
+        on_step_limit: Callable[[], JsonValue],
+        on_before_step: Callable[[int, int], None] | None = None,
+    ) -> JsonValue:
+        consecutive_format_errors = 0
+        try:
+            for index in range(max_steps):
+                if on_before_step is not None:
+                    on_before_step(index, max_steps)
+                result, response, committed = self.stream_step(
+                    confirm=confirm,
+                    on_auto_approve=on_auto_approve,
+                    on_message=on_message,
+                )
+                DebugTrace.loop_event(self, "stream-loop-step", index=index + 1, response=response, result=result, committed=committed)
+                format_error = _json_str(response.get("_format_error"))
+                if format_error:
+                    consecutive_format_errors += 1
+                    self._handle_format_gate(response, format_error, consecutive_format_errors, on_message)
+                    continue
+                if not committed:
+                    consecutive_format_errors = 0
                 if result.done:
                     return result.value
             return on_step_limit()
@@ -5015,35 +5289,50 @@ def run_loop(
             self.cancel_current_goal()
             raise
 
+    def _remember_format_gate(self, format_error: str) -> None:
+        remember_error = self._remember_observe_error if self.mode == AgentMode.OBSERVE else self._remember_agent_error
+        rule = self.RULE_VALID_TOOL_JSON if "invalid tool arguments" in format_error else self.RULE_FUNCTION_TOOLS
+        remember_error(self._format_gate_user_message("Error: invalid function/tool response", format_error) + " Next: " + rule)
+
+    def _handle_format_gate(self, response: Json, format_error: str, consecutive_errors: int, on_message: MessageCallback | None) -> None:
+        self._set_status_notice("err:format")
+        self._remember_format_gate(format_error)
+        if consecutive_errors >= self.MAX_CONSECUTIVE_FORMAT_ERRORS:
+            self._report_gate(
+                on_message,
+                f"Stopped: invalid function/tool response {self.MAX_CONSECUTIVE_FORMAT_ERRORS} times in a row.",
+                f"Format_Gate: stopped after {self.MAX_CONSECUTIVE_FORMAT_ERRORS} consecutive invalid function/tool responses. "
+                + self._format_gate_debug_details(response, format_error),
+            )
+            raise LLMError(f"invalid function/tool response {self.MAX_CONSECUTIVE_FORMAT_ERRORS} times in a row: {_shorten(format_error, 300)}")
+        self._report_gate(
+            on_message,
+            self._format_gate_user_message("Retrying: invalid function/tool response", format_error),
+            "Format_Gate: retrying function/tool response. " + self._format_gate_debug_details(response, format_error),
+        )
+
     def _finish_current_goal(self) -> None:
         self.blackboard.task_code = TaskCode.DONE
         self.blackboard.goal_reached = False
-        self.blackboard.verification_required = False
+        self.blackboard.checks_required = False
+        self.recent_edits = []
 
     def _format_act_tool_result_context(self) -> tuple[str, str, str]:
         checkpoint = self.blackboard.memory_checkpoint_tool_result_counter
-        timeline = self.tool_context.current_timeline_blocks()[-self.TOOL_RESULT_INDEX_ITEMS :]
+        budget = self.context_budget()
+        timeline = self.tool_context.current_timeline_blocks()[-budget.index_items :]
         unreduced = self.tool_context.unreduced_recent_blocks(checkpoint)
         latest = self.tool_context.latest_raw_blocks()
-        visible_keys = set(
-            ToolResultContext.blocks_by_key(timeline + unreduced + latest + self.tool_context.kept_results)
-        )
-        archived_limit = max(0, self.TOOL_RESULT_INDEX_ITEMS - len(timeline))
-        archived = self.prompt_builder.format_archived_tool_result_index(visible_keys, limit=archived_limit)
-        index = self._format_tool_result_index(archived, timeline)
-        return index, "\n\n".join(unreduced), "\n\n".join(latest)
-
-    def _format_observe_tool_result_context(self) -> str:
-        return "\n\n".join(self.tool_context.unreduced_blocks(self.blackboard.memory_checkpoint_tool_result_counter))
-
-    @staticmethod
-    def _format_tool_result_index(archived: list[str], timeline: list[str]) -> str:
+        visible_keys = set(ToolResultContext.blocks_by_key(timeline + unreduced + latest + self.tool_context.kept_results))
+        archived_limit = max(0, budget.index_items - len(timeline))
+        archived = [item.format(result_key=key) for key, item in self.session.state.tool_result_store.items() if key not in visible_keys]
+        archived = archived[-archived_limit:] if archived_limit > 0 else archived
         sections = []
         if archived:
             sections.append("Archived Recall Index:\n" + "\n".join(archived))
         if timeline:
             sections.append("Current Task Timeline:\n" + "\n".join(timeline))
-        return "\n\n".join(sections)
+        return "\n\n".join(sections), "\n\n".join(unreduced), "\n\n".join(latest)
 
     def _prune_tool_result_store(self) -> None:
         keep = self._protected_tool_result_keys()
@@ -5054,7 +5343,7 @@ def _prune_tool_result_store(self) -> None:
             self.session.state.tool_result_store.pop(key)
 
     def _protected_tool_result_keys(self) -> set[str]:
-        keys = self.blackboard.source_result_keys()
+        keys = self.blackboard.referenced_result_keys()
         keys.update(ToolResultContext.blocks_by_key(self.tool_context.kept_results))
         return keys
 
@@ -5074,27 +5363,15 @@ def _remember_agent_error(self, text: str) -> None:
     def _remember_observe_error(self, text: str) -> None:
         self._remember_feedback_error(self.observe_feedback_errors, text)
 
-    @staticmethod
-    def _feedback(level: str, text: str, rule: str = "") -> str:
-        return level + ": " + text + ((" Rule: " + rule) if rule else "")
-
     def _error(self, text: str, rule: str = "") -> str:
-        return self._feedback("Error", text, rule)
+        return "Error blocked: " + text + ((" Next: " + rule) if rule else "")
 
     def _warning(self, text: str, rule: str = "") -> str:
-        return self._feedback("Warning", text, rule)
+        return "Warning blocked: " + text + ((" Next: " + rule) if rule else "")
 
     def _warn_agent(self, text: str, rule: str = "") -> None:
         self._remember_agent_error(self._warning(text, rule))
 
-    def _warn_observe(self, text: str, rule: str = "") -> None:
-        self._remember_observe_error(self._warning(text, rule))
-
-    def _reject_agent(self, on_message: MessageCallback | None, feedback: str, retry: str, debug: str) -> bool:
-        self._remember_agent_error(feedback)
-        self._report_gate(on_message, retry, debug)
-        return True
-
     def _reject_result(
         self,
         remember_error: Callable[[str], None],
@@ -5103,33 +5380,21 @@ def _reject_result(
         retry: str,
         debug: str,
     ) -> AgentRunResult:
+        self.stream_stop_requested = True
         remember_error(feedback)
         self._report_gate(on_message, retry, debug)
         return AgentRunResult()
 
-    def _reject_completion(self, on_message: MessageCallback | None, feedback: str, retry: str, debug: str) -> AgentRunResult:
-        self.blackboard.goal_reached = False
-        return self._reject_result(self._remember_agent_error, on_message, feedback, retry, debug)
-
-    def _format_agent_feedback(self) -> str:
-        if not self.agent_feedback_errors:
-            return ""
-        return "\n".join("- " + error for error in self.agent_feedback_errors)
-
-    def _format_observe_feedback(self) -> str:
-        if not self.observe_feedback_errors:
-            return ""
-        return "\n".join("- " + error for error in self.observe_feedback_errors)
-
     def _report_gate(self, on_message: MessageCallback | None, message: str, debug_message: str) -> None:
+        is_retry = message.startswith(("Retrying:", "Continuing:"))
         if on_message is None:
             return
-        if message.startswith(("Retrying:", "Continuing:")) and self.session.state.status_notice_until <= time.monotonic():
+        if is_retry and self.session.state.status_notice_until <= time.monotonic():
             self._set_status_notice("err:gate")
         if self.session.settings.debug:
             on_message(debug_message)
             return
-        if not message.startswith(("Retrying:", "Continuing:")):
+        if not is_retry:
             on_message(message)
 
     def _format_gate_user_message(self, prefix: str, format_error: str) -> str:
@@ -5138,8 +5403,9 @@ def _format_gate_user_message(self, prefix: str, format_error: str) -> str:
             if marker in detail:
                 detail = detail.split(marker, 1)[0]
                 break
-        if detail.startswith("Invalid model output: "):
-            detail = detail[len("Invalid model output: ") :]
+        marker = "Invalid function-tool response: "
+        if detail.startswith(marker):
+            detail = detail[len(marker) :]
         return prefix + ": " + _shorten(detail, 180)
 
     def _format_gate_debug_details(self, response: Json, format_error: str) -> str:
@@ -5148,127 +5414,219 @@ def _format_gate_debug_details(self, response: Json, format_error: str) -> str:
             return _shorten(format_error, 180)
         return _shorten(format_error, 180) + "\nFull bad output:\n" + bad_output
 
-    def step(self, *, on_message: MessageCallback | None = None) -> Json:
+    def _step_prompts(self) -> tuple[str, str, str]:
         if self.mode == AgentMode.OBSERVE:
-            system_prompt = self.prompt_builder.system_prompt(AGENT_OBSERVE_SYSTEM_PROMPT, tools=())
+            system_prompt = self._system_prompt(AGENT_OBSERVE_SYSTEM_PROMPT)
             user_prompt = self.build_observe_prompt()
             activity = "observe"
         else:
-            system_prompt = self.prompt_builder.system_prompt(
-                AGENT_PLAN_SYSTEM_PROMPT if self.session.settings.plan_mode else None,
-                tools=PLAN_MODE_TOOLS if self.session.settings.plan_mode else None,
-            )
+            system_prompt = self._system_prompt()
             user_prompt = self.build_user_prompt()
             activity = "agent"
-        response = self.request(system_prompt, user_prompt, activity=activity, on_message=on_message)
-        if _json_str(response.get("_format_error")):
+        return system_prompt, user_prompt, activity
+
+    def _tool_schemas(self) -> list[Json]:
+        if self.mode == AgentMode.OBSERVE:
+            action_names = self.OBSERVE_ACTION_TYPES
+            tool_classes: Iterable[ToolClass] = ()
+        else:
+            action_names = self.ACT_ACTION_TYPES - {"tool"}
+            tool_classes = tuple(TOOL_REGISTRY.values())
+            if not _code_index_available(self.session):
+                tool_classes = tuple(tool for tool in tool_classes if tool is not InspectCodeTool)
+        actions = [_state_tool_schema(name) for name in STATE_TOOL_PARAMS if name in action_names]
+        return actions + [tool.tool_schema() for tool in tool_classes]
+
+    def step(self, *, on_message: MessageCallback | None = None) -> Json:
+        system_prompt, user_prompt, activity = self._step_prompts()
+        response = self.request(system_prompt, user_prompt, activity=activity, on_message=on_message, tool_schemas=self._tool_schemas())
+        if _json_str(response.get("_format_error")):
             return response
         invalid_response = self._validate_action_response(response)
         if invalid_response is not None:
             return invalid_response
         return response
 
+    def stream_step(
+        self,
+        *,
+        confirm: ConfirmCallback | None = None,
+        on_auto_approve: ToolDisplayCallback | None = None,
+        on_message: MessageCallback | None = None,
+    ) -> tuple[AgentRunResult, Json, bool]:
+        if not self._can_stream_tools():
+            response = self.step(on_message=on_message)
+            if _json_str(response.get("_format_error")):
+                return AgentRunResult(), response, False
+            return self.handle_response(response, confirm=confirm, on_auto_approve=on_auto_approve, on_message=on_message), response, False
+
+        committed = False
+        latest_result = AgentRunResult()
+        streamed_tool_batch_started = False
+
+        def on_stream_action(action: Json) -> bool:
+            nonlocal committed, latest_result, streamed_tool_batch_started
+            committed = True
+            self.stream_stop_requested = False
+            assistant_text = _json_str(action.pop("_assistant_text", None)) or ""
+            response = {"actions": [action]}
+            if assistant_text:
+                response["_assistant_text"] = assistant_text
+            is_tool = _json_str(action.get("type")) == "tool"
+            invalid_response = self._validate_action_response(response)
+            latest_result = (
+                self.handle_response(
+                    response,
+                    confirm=confirm,
+                    on_auto_approve=on_auto_approve,
+                    on_message=on_message,
+                    append_to_latest=is_tool and streamed_tool_batch_started,
+                )
+                if invalid_response is None
+                else self._reject_result(
+                    self._remember_agent_error,
+                    on_message,
+                    _json_str(invalid_response.get("_format_error")) or self._error("invalid streamed action."),
+                    "Retrying: invalid streamed action.",
+                    "Format_Gate: invalid streamed action.",
+                )
+            )
+            if is_tool:
+                streamed_tool_batch_started = True
+            if latest_result.done or self.stream_stop_requested:
+                return True
+            if is_tool and any(execution.outcome != "success" for execution in self.tool_runner.latest_executions):
+                return True
+            return self.mode == AgentMode.OBSERVE
+
+        system_prompt, user_prompt, activity = self._step_prompts()
+        response = self.request(
+            system_prompt,
+            user_prompt,
+            activity=activity,
+            on_message=on_message,
+            on_stream_action=on_stream_action,
+            tool_schemas=self._tool_schemas(),
+        )
+        if committed:
+            return latest_result, response, True
+        if _json_str(response.get("_format_error")):
+            return AgentRunResult(), response, False
+        invalid_response = self._validate_action_response(response)
+        if invalid_response is not None:
+            return AgentRunResult(), invalid_response, False
+        return self.handle_response(response, confirm=confirm, on_auto_approve=on_auto_approve, on_message=on_message), response, False
+
+    def _can_stream_tools(self) -> bool:
+        return self.mode == AgentMode.ACT and isinstance(self.model_client, ModelClient) and self.session.config.provider.stream is not False
+
     def apply_response(self, response: Json) -> list[str]:
         actions = self._response_actions(response)
-        if self._has_pending_verification(actions):
-            response = {**response, "actions": [action for action in actions if not self._is_pending_verify_action(action)]}
+        response = {**response, "actions": actions}
+        if any(self._is_pending_check_action(action) for action in actions):
+            response = {**response, "actions": [action for action in actions if not self._is_pending_check_action(action)]}
             actions = self._response_actions(response)
-        if self._start_changes_goal(actions):
+        if self._goal_changes_task(actions):
             self.tool_context.kept_results = []
             self.tool_context.compact_observed(self.tool_context.recent + self.tool_context.latest)
             self._mark_memory_checkpoint()
-            self.blackboard.hypotheses = []
+            self.blackboard.leads = []
         self.state_updater.apply(response)
         forgotten = self.tool_context.forget_results(ToolResultContext.forget_result_keys_from_actions(actions))
-        if self.mode != AgentMode.OBSERVE and self._has_memory_update_action(actions):
-            self._mark_memory_checkpoint()
         return forgotten
 
-    def _start_changes_goal(self, actions: list[Json]) -> bool:
+    def _goal_changes_task(self, actions: list[Json]) -> bool:
+        if not self.blackboard.goal:
+            return False
         return any(
-            _json_str(action.get("type")) == "start"
-            and bool(goal := _json_str(action.get("goal")))
+            _json_str(action.get("type")) == "goal"
+            and action.get("complete") is not True
+            and bool(goal := _json_str(action.get("text")))
             and goal != self.blackboard.goal
             for action in actions
         )
 
     def _mark_memory_checkpoint(self, counter: int = 0) -> None:
-        checkpoint = counter or self.tool_context.visible_counter() or self.session.state.tool_result_counter
+        checkpoint = counter or self.tool_context.max_counter(self.tool_context.recent + self.tool_context.latest) or self.session.state.tool_result_counter
         self.blackboard.memory_checkpoint_tool_result_counter = max(self.blackboard.memory_checkpoint_tool_result_counter, checkpoint)
 
-    def _has_memory_update_action(self, actions: list[Json]) -> bool:
-        for action in actions:
-            action_type = _json_str(action.get("type"))
-            if action_type == "keep" and _source_from_json(action):
-                return True
-            if action_type == "hypothesis" and _json_list(action.get("items")):
-                return True
-            if action_type == "known" and any(_memory_fact_from_json(raw) for raw in _json_list(action.get("items"))):
-                return True
-            if action_type == "stable_knowledge" and _json_list(action.get("items")):
-                return True
-        return False
-
     def execute_tool_calls(
         self,
         tool_calls: list[JsonValue],
         *,
         confirm: ConfirmCallback | None = None,
         on_auto_approve: ToolDisplayCallback | None = None,
-        on_live_output: ToolLiveOutputCallback | None = None,
-        on_live_done: ToolLiveDoneCallback | None = None,
+        append_to_latest: bool = False,
     ) -> str:
-        self.tool_runner.execute(
-            tool_calls,
-            confirm=confirm,
-            on_auto_approve=on_auto_approve,
-            on_live_output=on_live_output,
-            on_live_done=on_live_done,
-        )
+        self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve)
         self.tool_context.append_latest(
             self.tool_runner.latest_executions,
-            max_index_items=self.TOOL_RESULT_INDEX_ITEMS,
+            max_index_items=self.context_budget().index_items,
             checkpoint=self.blackboard.memory_checkpoint_tool_result_counter,
+            append=append_to_latest,
         )
         self.session.state.turn_tool_calls += len(self.tool_runner.latest_executions)
         self.session.state.session_tool_calls += len(self.tool_runner.latest_executions)
         for execution in self.tool_runner.latest_executions:
             self._after_tool_execution(execution)
-        self.runtime.consecutive_tool_turns += 1
         if self._should_observe_after_tools():
             self.mode = AgentMode.OBSERVE
         return "\n\n".join(self.tool_context.latest)
 
     def _should_observe_after_tools(self) -> bool:
-        pending = self.tool_context.unreduced_blocks(self.blackboard.memory_checkpoint_tool_result_counter)
+        pending = self._unreferenced_unreduced_blocks()
         if not pending:
             return False
+        budget = self.context_budget()
         # Tool failures stay visible to ACT as Latest Tool Results plus feedback.
         # Very large failures still trigger observe through raw-context pressure.
-        return len(pending) >= self.OBSERVE_AFTER_PENDING_RESULT_COUNT or self.tool_context.raw_context_chars(
-            self.blackboard.memory_checkpoint_tool_result_counter
-        ) >= self.TOOL_RESULT_RAW_CHARS
+        return (
+            len(pending) >= budget.observe_after_results
+            or self.tool_context.raw_context_chars(
+                self.blackboard.memory_checkpoint_tool_result_counter,
+                exclude_keys=self.blackboard.referenced_result_keys(),
+            )
+            >= budget.raw_chars
+        )
+
+    def _unreferenced_unreduced_blocks(self) -> list[str]:
+        return self.tool_context.unreduced_blocks(
+            self.blackboard.memory_checkpoint_tool_result_counter,
+            exclude_keys=self.blackboard.referenced_result_keys(),
+        )
 
     def _after_tool_execution(self, execution: ToolCallExecution) -> None:
         self._remember_tool_failure(execution)
+        if execution.error_type is Cancellation:
+            detail = " ".join(execution.output.split())
+            detail = detail.removeprefix("Cancelled: ")
+            self._remember_agent_error(
+                self._error(
+                    "tool call was cancelled: " + _format_tool_call_summary(execution.call) + " -> " + detail + ".",
+                    "do not repeat it unchanged; follow the cancellation or refusal reason.",
+                )
+            )
         if execution.error_type is not None and issubclass(execution.error_type, ToolCallArgError):
             detail = self._format_tool_arg_error(execution)
-            rule = self.RULE_TOOL_SIGNATURE
-            if execution.call.name in {EditTool.name(), ReplaceRangeTool.name()}:
-                rule = self.RULE_EDIT_SIGNATURE
+            tool_class = TOOL_REGISTRY.get(execution.call.name)
+            rule = self.RULE_EDIT_SIGNATURE if tool_class is not None and tool_class.EFFECT == ToolEffect.EDIT else self.RULE_TOOL_SIGNATURE
+            self._remember_agent_error(self._error("tool call args invalid: " + _format_tool_call_summary(execution.call) + " -> " + detail + ".", rule))
+        if (
+            execution.error_type is not None
+            and issubclass(execution.error_type, ToolCallError)
+            and not issubclass(execution.error_type, ToolCallArgError)
+            and (tool_class := TOOL_REGISTRY.get(execution.call.name)) is not None
+            and tool_class.EFFECT == ToolEffect.EDIT
+        ):
             self._remember_agent_error(
                 self._error(
-                    "tool call args invalid: "
-                    + _format_tool_call_summary(execution.call)
-                    + " -> "
-                    + detail
-                    + ".",
-                    rule,
+                    "edit failed: " + _format_tool_call_summary(execution.call) + " -> " + _shorten(" ".join(execution.output.split()), 120) + ".",
+                    "reread only stale ranges; if the edit is large, retry a smaller coherent batch.",
                 )
             )
-        if execution.requires_verification:
-            self.blackboard.verification_required = True
-            self.blackboard.task_code = TaskCode.VERIFYING
+        if execution.requires_checks:
+            self.blackboard.checks_required = True
+            self.blackboard.task_code = TaskCode.CHECKING
             self._remember_recent_edit(execution)
 
     def _remember_tool_failure(self, execution: ToolCallExecution) -> None:
@@ -5295,7 +5653,11 @@ def _format_tool_arg_error(self, execution: ToolCallExecution) -> str:
         tool_class = TOOL_REGISTRY.get(call.name)
         if tool_class is None:
             return execution.output
-        params = self._exact_signature_params(tool_class.SIGNATURE)
+        match = re.search(r"\(([^)]*)\)", tool_class.SIGNATURE)
+        value = match.group(1) if match else ""
+        params = list(tool_class.PARAM_NAMES)
+        if not params and value and not any(token in value for token in "[]*") and "..." not in value:
+            params = [part.strip().split("=", 1)[0].strip() for part in value.split(",") if part.strip()]
         if not params or len(call.args) == len(params):
             return execution.output
         detail = "got " + str(len(call.args)) + " args, expected " + str(len(params))
@@ -5305,15 +5667,6 @@ def _format_tool_arg_error(self, execution: ToolCallExecution) -> str:
             detail += ", extra: " + str(len(call.args) - len(params))
         return detail
 
-    def _exact_signature_params(self, signature: str) -> list[str]:
-        match = re.search(r"\(([^)]*)\)", signature)
-        if not match:
-            return []
-        value = match.group(1)
-        if "[" in value or "]" in value or "*" in value or "..." in value:
-            return []
-        return [part.strip().split("=", 1)[0].strip() for part in value.split(",") if part.strip()]
-
     def _remember_recent_edit(self, execution: ToolCallExecution) -> None:
         if not execution.call.args:
             return
@@ -5323,53 +5676,58 @@ def _remember_recent_edit(self, execution: ToolCallExecution) -> None:
         except ValueError:
             path = filepath
         intention = " ".join(execution.call.intention.split()) or execution.call.name
-        self.runtime.recent_edits.append("- " + path + ": " + _shorten(intention, 160))
-        self.runtime.recent_edits = self.runtime.recent_edits[-self.RECENT_EDITS :]
+        self.recent_edits.append("- " + path + ": " + _shorten(intention, 160))
+        self.recent_edits = self.recent_edits[-self.RECENT_EDITS :]
 
-    def _invalid_action_response(self, response: Json, reason: str) -> Json:
+    def _invalid_action_response(self, response: Json, reason: str, bad_output: str | None = None) -> Json:
+        bad_output = bad_output if bad_output is not None else json.dumps(response, ensure_ascii=False)
         return {
             "actions": [],
-            "_format_error": "Invalid model output: "
-            + reason
-            + ". Return action frames only. Bad output: "
-            + _shorten(json.dumps(response, ensure_ascii=False)),
+            "_format_bad_output": bad_output,
+            "_format_error": f"Invalid function-tool response: {reason}. Use valid function tool calls with JSON arguments matching the tool schema. Bad output: "
+            + _shorten(bad_output),
         }
 
     def _validate_action_response(self, response: Json) -> Json | None:
-        if not isinstance(response.get("actions"), list):
+        actions = response.get("actions")
+        if not isinstance(actions, list):
             return self._invalid_action_response(response, "expected actions array")
-        extra_keys = sorted(str(key) for key in response.keys() if key != "actions" and not str(key).startswith("_format_"))
+        action_bad_outputs = []
+        action_errors = []
+        for action in (_json_dict(item) for item in actions):
+            error = _json_str(action.get("_format_error"))
+            if error:
+                action_errors.append(error)
+                bad_output = _json_str(action.get("_format_bad_output"))
+                if bad_output:
+                    action_bad_outputs.append(bad_output)
+        if action_errors:
+            return self._invalid_action_response(response, "; ".join(action_errors), "\n".join(action_bad_outputs) or None)
+        extra_keys = sorted(str(key) for key in response.keys() if key not in {"actions", "_assistant_text"} and not str(key).startswith("_format_"))
         if extra_keys:
             return self._invalid_action_response(response, "unexpected top-level keys: " + ", ".join(extra_keys))
         return None
 
-    def _format_frame_error_report(self, response: Json) -> str:
-        errors = [_json_str(error) or "" for error in _json_list(response.get("_format_frame_errors"))]
-        errors = [error for error in errors if error]
-        if not errors:
-            return ""
-        return "Format_Warning: ignored invalid action frame(s).\n" + "\n".join("- " + _shorten(error, 220) for error in errors)
-
     def _response_actions(self, response: Json) -> list[Json]:
-        actions = [action for action in (_json_dict(item) for item in _json_list(response.get("actions"))) if action]
-        for action in actions:
-            self._normalize_response_action(action)
-        return actions
+        return [self._normalize_action(action) for action in (_json_dict(item) for item in _json_list(response.get("actions"))) if action]
 
-    def _normalize_response_action(self, action: Json) -> None:
+    @staticmethod
+    def _normalize_action(action: Json) -> Json:
         action_type = _json_str(action.get("type"))
-        if not action_type:
-            return
-        lowered = action_type.lower()
-        if lowered == "message":
-            action["type"] = "chat"
-            if _json_str(action.get("text")) is None:
-                text = _json_str(action.get("message")) or _json_str(action.get("content"))
-                if text is not None:
-                    action["text"] = text
-            return
-        if lowered in (self.ACT_ACTION_TYPES | self.OBSERVE_ACTION_TYPES):
-            action["type"] = lowered
+        canonical_action_type = _canonical_protocol_action_type(action_type)
+        if canonical_action_type in PROTOCOL_ACTION_TYPES:
+            if canonical_action_type == action_type:
+                return action
+            normalized = dict(action)
+            normalized["type"] = canonical_action_type
+            return normalized
+        tool_name = _canonical_tool_name(action_type)
+        if tool_name not in TOOL_REGISTRY:
+            return action
+        normalized = dict(action)
+        normalized["type"] = "tool"
+        normalized["name"] = tool_name
+        return normalized
 
     def _gate_action_types(
         self,
@@ -5385,72 +5743,14 @@ def _gate_action_types(
         if not invalid:
             return None
         (remember_error or self._remember_agent_error)(feedback_message + " Invalid action(s): " + ", ".join(invalid) + ".")
-        self._report_gate(
-            on_message,
-            retry_message,
-            "ActionType_Gate: invalid action type(s): " + ", ".join(invalid) + ".",
-        )
+        self._report_gate(on_message, retry_message, "Protocol_Gate: invalid action type(s): " + ", ".join(invalid) + ".")
         return AgentRunResult()
 
-    def _chat_message_from_actions(self, actions: list[Json]) -> str | None:
-        for action in actions:
-            action_type = _json_str(action.get("type"))
-            if action_type == "chat":
-                return _json_str(action.get("text")) or ""
-            return None
-        return None
-
-    def _progress_messages_from_actions(self, actions: list[Json]) -> list[str]:
-        messages = []
-        for action in actions:
-            if _json_str(action.get("type")) == "progress":
-                message = _json_str(action.get("text")) or _json_str(action.get("message")) or ""
-            else:
-                message = ""
-            if message:
-                messages.append(message)
-        return messages
-
-    def _completion_message_from_actions(self, actions: list[Json]) -> str:
-        for action in reversed(actions):
-            if _json_str(action.get("type")) == "goal" and action.get("complete") is True:
-                return _json_str(action.get("message_for_complete")) or ""
-        return ""
-
-    def _completion_fallback_message(self, ctx: ResponseContext) -> str:
-        if ctx.completion_message:
-            return ctx.completion_message
-        return next((message for message in reversed(ctx.progress_messages) if message.strip()), "Done.")
-
-    def _incomplete_goal_update_from_actions(self, actions: list[Json]) -> str:
-        update = ""
-        for action in actions:
-            action_type = _json_str(action.get("type"))
-            if action_type == "start":
-                update = _json_str(action.get("goal")) or update
-            elif action_type == "goal" and action.get("complete") is not True:
-                update = _json_str(action.get("text")) or update
-        return update
-
-    def _has_fresh_plan_action(self, actions: list[Json]) -> bool:
-        def has_items(value: JsonValue) -> bool:
-            return any(_json_str(_json_dict(raw).get("text")) for raw in _json_list(value))
-
-        for action in actions:
-            action_type = _json_str(action.get("type"))
-            if action_type == "start" and has_items(action.get("plan")):
-                return True
-            if action_type == "plan" and action.get("mode") != "patch" and has_items(action.get("items")):
-                return True
-        return False
-
     def _plan_is_complete(self) -> bool:
-        return bool(self.blackboard.plan) and all(
-            item.status in self.COMPLETED_PLAN_STATUSES and item.context.strip() for item in self.blackboard.plan
-        )
+        return bool(self.blackboard.plan) and all(item.status in self.COMPLETED_PLAN_STATUSES and item.context.strip() for item in self.blackboard.plan)
 
-    def _verification_is_settled(self) -> bool:
-        return self.blackboard.verification.status in {VerificationStatus.DONE, VerificationStatus.BLOCKED}
+    def _checks_are_settled(self) -> bool:
+        return self.blackboard.checks.status in {CheckStatus.PASSED, CheckStatus.BLOCKED}
 
     def _completion_plan_error(self, ctx: ResponseContext) -> str:
         if not self.blackboard.goal_reached:
@@ -5465,12 +5765,24 @@ def _completion_plan_error(self, ctx: ResponseContext) -> str:
             return "plan items missing context: " + self._format_plan_gate_items(missing_context)
         return ""
 
-    def _blocked_verification_completion_error(self) -> str:
-        if not self.blackboard.goal_reached or self.blackboard.verification.status != VerificationStatus.BLOCKED:
-            return ""
-        if self.blackboard.verification.blocker == VerificationBlocker.USER:
+    def _completion_plan_followup_error(self) -> str:
+        if not self.blackboard.goal_reached or not self.recent_edits:
             return ""
-        return "verify blocked requires blocker=user before completion"
+        completed = [item for item in self.blackboard.plan if item.status in self.COMPLETED_PLAN_STATUSES]
+        missing = [
+            item for item in completed if item.followup_action.status == PlanFollowupStatus.UNKNOWN or item.followup_check.status == PlanFollowupStatus.UNKNOWN
+        ]
+        if missing:
+            return "plan follow-up status missing: " + self._format_plan_gate_items(missing)
+        missing_reason = [item for item in completed if not item.followup_action.reason.strip() or not item.followup_check.reason.strip()]
+        if missing_reason:
+            return "plan follow-up reason missing: " + self._format_plan_gate_items(missing_reason)
+        needed = [
+            item for item in completed if item.followup_action.status == PlanFollowupStatus.NEEDED or item.followup_check.status == PlanFollowupStatus.NEEDED
+        ]
+        if needed:
+            return "plan follow-up still needed: " + self._format_plan_gate_items(needed)
+        return ""
 
     def _format_plan_gate_items(self, items: list[PlanItem]) -> str:
         rendered = []
@@ -5481,52 +5793,10 @@ def _format_plan_gate_items(self, items: list[PlanItem]) -> str:
             rendered.append("+" + str(len(items) - 3) + " more")
         return "; ".join(rendered)
 
-    def _user_rule_message_from_actions(self, actions: list[Json]) -> str | None:
-        for action in actions:
-            if _json_str(action.get("type")) == "user_rule":
-                return _json_str(action.get("message")) or "Rule saved."
-        return None
-
-    def _has_pending_verification(self, actions: list[Json]) -> bool:
-        return any(self._is_pending_verify_action(action) for action in actions)
-
     @staticmethod
-    def _has_state_update_action(actions: list[Json]) -> bool:
-        return any(_json_str(action.get("type")) in {"plan", "known", "hypothesis", "stable_knowledge"} for action in actions)
-
-    @staticmethod
-    def _is_pending_verify_action(action: Json) -> bool:
+    def _is_pending_check_action(action: Json) -> bool:
         return _json_str(action.get("type")) == "verify" and _json_str(action.get("status")) == "pending"
 
-    def _investigate_completion_error(self) -> str:
-        if self.blackboard.work_mode != WorkMode.INVESTIGATE or not self.blackboard.goal_reached:
-            return ""
-        return "" if any(item.status == HypothesisStatus.CONFIRMED for item in self.blackboard.hypotheses) else "investigate completion requires a confirmed hypothesis"
-
-    def _forget_active_hypothesis_error(self, actions: list[Json]) -> str:
-        forgotten = set(ToolResultContext.forget_result_keys_from_actions(actions))
-        if not forgotten:
-            return ""
-        released = set()
-        for action in actions:
-            values = _json_list(action.get("items")) if _json_str(action.get("type")) == "hypothesis" else []
-            for raw in values:
-                item = Hypothesis.from_json(raw)
-                if item is not None and item.status != HypothesisStatus.ACTIVE:
-                    released.update(key for key in item.source if key.startswith("tr."))
-        protected = {
-            key
-            for item in self.blackboard.hypotheses
-            if item.status == HypothesisStatus.ACTIVE
-            for key in item.source
-            if key.startswith("tr.")
-        }
-        conflict = sorted((forgotten & protected) - released)
-        return "active hypothesis source: " + ", ".join(conflict) if conflict else ""
-
-    def _plan_items_from_json(self, value: JsonValue) -> list[PlanItem]:
-        return [item for item in (self.state_updater._plan_item_from_json(raw) for raw in _json_list(value)) if item]
-
     def _repeated_tool_retry_error(self, tool_calls: list[JsonValue]) -> str:
         if self.failed_tool_call_key is None or self.failed_tool_call_count < 2:
             return ""
@@ -5539,282 +5809,224 @@ def _repeated_tool_retry_error(self, tool_calls: list[JsonValue]) -> str:
                 return "same failed tool call repeated after " + str(self.failed_tool_call_count) + " failures: " + _format_tool_call_summary(call)
         return ""
 
-    def _plan_mode_tool_error(self, tool_calls: list[JsonValue]) -> str:
-        if not self.session.settings.plan_mode:
-            return ""
+    def _build_response_context(self, response: Json) -> ResponseContext:
+        raw_actions = self._response_actions(response)
+        assistant_text = _json_str(response.get("_assistant_text")) or ""
+        pending_check_requested = any(self._is_pending_check_action(action) for action in raw_actions)
+        actions = [action for action in raw_actions if not self._is_pending_check_action(action)]
+        tool_calls = [action for action in actions if _json_str(action.get("type")) == "tool"]
+        action_types = {_json_str(action.get("type")) for action in actions}
+        has_edit_tool_call = False
         for value in tool_calls:
             try:
                 call = self.tool_runner.parse_tool_call(value)
             except ToolCallArgError:
                 continue
             tool_class = TOOL_REGISTRY.get(call.name)
-            if tool_class is None:
-                return "plan mode allows registered readonly tools only; blocked " + _format_tool_call_summary(call)
-            if tool_class.effect() == ToolEffect.READONLY:
-                continue
-            if tool_class is GitTool:
-                args = call.args[1:] if call.args and isinstance(call.args[0], str) and call.args[0].startswith("cwd=") else call.args
-                if args and args[0] in self.PLAN_MODE_GIT_READONLY:
-                    continue
-            return "plan mode allows readonly discovery only; blocked " + _format_tool_call_summary(call)
-        return ""
-
-    def _has_non_readonly_tool_call(self, tool_calls: list[JsonValue]) -> bool:
-        for value in tool_calls:
-            try:
-                call = self.tool_runner.parse_tool_call(value)
-            except ToolCallArgError:
-                return True
-            tool_class = TOOL_REGISTRY.get(call.name)
-            if tool_class is None or tool_class.effect() != ToolEffect.READONLY:
-                return True
-        return False
-
-    def _build_response_context(self, response: Json) -> ResponseContext:
-        raw_actions = self._response_actions(response)
-        pending_verify_requested = self._has_pending_verification(raw_actions)
-        actions = [action for action in raw_actions if not self._is_pending_verify_action(action)]
-        tool_calls = [action for action in actions if _json_str(action.get("type")) == "tool"]
-        progress_messages = self._progress_messages_from_actions(actions)
-        has_goal_action = any(_json_str(action.get("type")) in {"goal", "start"} for action in actions)
-        has_plan_action = any(_json_str(action.get("type")) in {"plan", "start"} for action in actions)
-        has_forget_action = any(_json_str(action.get("type")) == "forget" for action in actions)
-        has_hypothesis_action = any(_json_str(action.get("type")) == "hypothesis" for action in actions)
-        has_state_update_action = self._has_state_update_action(actions)
-        goal_update = self._incomplete_goal_update_from_actions(actions)
+            if tool_class is not None and tool_class.EFFECT == ToolEffect.EDIT:
+                has_edit_tool_call = True
+                break
+        goal_update = next(
+            (
+                text
+                for action in reversed(actions)
+                if _json_str(action.get("type")) == "goal" and action.get("complete") is not True
+                for text in [_json_str(action.get("text"))]
+                if text
+            ),
+            "",
+        )
+        has_fresh_plan_action = any(
+            _json_str(action.get("type")) == "plan"
+            and action.get("mode") != "patch"
+            and any((raw.strip() if isinstance(raw, str) else _json_str(_json_dict(raw).get("text"))) for raw in _json_list(action.get("items")))
+            for action in actions
+        )
+        completion_message = next(
+            (
+                _json_str(action.get("message_for_complete")) or ""
+                for action in reversed(actions)
+                if _json_str(action.get("type")) == "goal" and action.get("complete") is True
+            ),
+            "",
+        )
+        user_rule_message = next(
+            (_json_str(action.get("message")) or "Rule saved." for action in actions if _json_str(action.get("type")) == "user_rule"), None
+        )
         return ResponseContext(
             response=response,
             actions=actions,
+            assistant_text=assistant_text,
             goal_was_empty=not self.blackboard.goal,
             plan_was_empty=not self.blackboard.plan,
             plan_was_complete=self._plan_is_complete(),
-            verification_was_settled=self._verification_is_settled(),
+            checks_settled=self._checks_are_settled(),
             goal_will_change=bool(self.blackboard.goal and goal_update and goal_update != self.blackboard.goal),
-            chat_message=self._chat_message_from_actions(actions),
             tool_calls=tool_calls,
-            pending_verify_requested=pending_verify_requested,
-            progress_messages=progress_messages,
-            user_rule_message=self._user_rule_message_from_actions(actions),
-            completion_message=self._completion_message_from_actions(actions),
-            has_goal_action=has_goal_action,
-            has_plan_action=has_plan_action,
-            has_fresh_plan_action=self._has_fresh_plan_action(actions),
-            has_user_rule_action=any(_json_str(action.get("type")) == "user_rule" for action in actions),
-            has_state_update_action=has_state_update_action,
+            pending_check_requested=pending_check_requested,
+            user_rule_message=user_rule_message,
+            completion_message=completion_message,
+            has_goal_action="goal" in action_types,
+            has_plan_action="plan" in action_types,
+            has_fresh_plan_action=has_fresh_plan_action,
+            has_user_rule_action="user_rule" in action_types,
+            has_edit_tool_call=has_edit_tool_call,
+            has_state_update_action=bool(action_types & {"goal", "plan", "known", "lead"}),
             state_or_work_requested=bool(
                 tool_calls
-                or pending_verify_requested
-                or progress_messages
-                or has_plan_action
-                or has_forget_action
-                or has_hypothesis_action
-                or has_state_update_action
+                or pending_check_requested
+                or (assistant_text and actions and not completion_message)
+                or action_types & {"goal", "plan", "forget", "lead", "known"}
             ),
         )
 
-    def _handle_chat_response(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult | None:
-        if ctx.chat_message is None:
+    def _handle_text_response(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult | None:
+        if ctx.actions or not ctx.assistant_text:
             return None
-        self.blackboard.task_code = TaskCode.DONE
-        self.session.append_conversation(AssistantMessage(content=ctx.chat_message))
+        self.session.append_conversation(AssistantMessage(content=ctx.assistant_text))
         if on_message is not None:
-            on_message(ctx.chat_message)
+            on_message(ctx.assistant_text)
+        active_task = bool(self.blackboard.plan or self.blackboard.leads)
+        if active_task and (self.blackboard.task_code in {TaskCode.WORKING, TaskCode.CHECKING} or self.incomplete_task_context_at_turn_start):
+            return AgentRunResult()
+        self.blackboard.task_code = TaskCode.DONE
         return AgentRunResult(done=True, value=ctx.response)
 
-    def _gate_before_apply(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool:
-        action_gate = self._gate_action_types(
-            ctx.actions,
-            allowed=self.PLAN_ACTION_TYPES if self.session.settings.plan_mode else self.ACT_ACTION_TYPES,
-            on_message=on_message,
-            retry_message="Retrying: use a valid agent action.",
-            feedback_message=self._error("this step only accepts agent work actions."),
+    def _ingest_queued_user_input(self, poll_user_input: UserInputPoller | None, on_message: MessageCallback | None) -> None:
+        if poll_user_input is None:
+            return
+        while user_input := poll_user_input():
+            self.blackboard.user_input = user_input
+            self.session.state.pending_user_feedback = user_input
+            self.mode = AgentMode.ACT
+            self.session.append_conversation(UserMessage(content=user_input))
+            if on_message is not None:
+                on_message("sent: " + user_input)
+
+    def _gate_protocol_actions(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool:
+        return (
+            self._gate_action_types(
+                ctx.actions,
+                allowed=self.ACT_ACTION_TYPES,
+                on_message=on_message,
+                retry_message="Retrying: use a valid agent action.",
+                feedback_message=self._error("this step only accepts agent work actions."),
+            )
+            is not None
         )
-        if action_gate is not None:
+
+    def _gate_tool_actions(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool:
+        if self._gate_forget_actions(ctx.actions, on_message, self._remember_agent_error) is not None:
             return True
-        forget_error = self._forget_tool_result_error(ctx.actions)
-        if forget_error:
-            return self._reject_agent(
-                on_message,
-                self._error("invalid forget: " + forget_error + ".", self.RULE_VISIBLE_RESULTS),
-                "Retrying: forget only visible tool result keys.",
-                "ToolResult_Gate: " + forget_error + ".",
-            )
-        forget_hypothesis_error = self._forget_active_hypothesis_error(ctx.actions)
-        if forget_hypothesis_error:
-            return self._reject_agent(
-                on_message,
-                self._error("forget conflicts with active hypothesis: " + forget_hypothesis_error + ".", self.RULE_CLOSE_SOURCE),
-                "Retrying: close hypothesis before forgetting its source result.",
-                "ToolResult_Gate: " + forget_hypothesis_error + ".",
-            )
         repeated_tool_retry_error = self._repeated_tool_retry_error(ctx.tool_calls)
         if repeated_tool_retry_error:
-            return self._reject_agent(
+            self.stream_stop_requested = True
+            self._remember_agent_error(self._error("repeated failed tool call: " + repeated_tool_retry_error + ".", self.RULE_CHANGE_FAILED_TOOL))
+            self._report_gate(
                 on_message,
-                self._error("repeated failed tool call: " + repeated_tool_retry_error + ".", self.RULE_CHANGE_FAILED_TOOL),
                 "Retrying: change the failed tool call instead of repeating it.",
                 "ToolRetry_Gate: " + repeated_tool_retry_error + ".",
             )
-        plan_mode_tool_error = self._plan_mode_tool_error(ctx.tool_calls)
-        if plan_mode_tool_error:
-            return self._reject_agent(
-                on_message,
-                self._error(plan_mode_tool_error + ".", "produce a proposed plan without executing mutations."),
-                "Retrying: plan mode only allows readonly discovery.",
-                "PlanMode_Gate: " + plan_mode_tool_error + ".",
+            return True
+        return False
+
+    def _gate_task_state(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool:
+        if (
+            not (self.blackboard.goal or self.blackboard.plan or self.blackboard.leads)
+            and any(execution.call.name == BashTool.NAME and execution.outcome == "success" for execution in self.tool_runner.latest_executions)
+            and ctx.tool_calls
+            and not ctx.assistant_text
+            and not ctx.has_goal_action
+            and not ctx.has_plan_action
+        ):
+            self._warn_agent(
+                "last command result is visible with no active task.", "answer the user when results are sufficient; create Goal/Plan for extended work."
             )
         if (
             self.blackboard.task_code == TaskCode.NEW
             and self.task_alignment_required
-            and (ctx.tool_calls or ctx.pending_verify_requested)
+            and (ctx.tool_calls or ctx.pending_check_requested)
             and not ctx.has_goal_action
             and not ctx.has_plan_action
             and not ctx.has_user_rule_action
         ):
-            self._remember_agent_error(
-                self._error(
-                    "previous task context is still present.",
-                    "emit start for a new task; otherwise update or confirm the current plan.",
-                )
-            )
-            self._report_gate(
-                on_message,
-                "Retrying: align this request with the task before work.",
-                "GoalPlan_Gate: work before task alignment with previous task context.",
-            )
-            return True
-        if self.blackboard.task_code != TaskCode.NEW and any(_json_str(action.get("type")) == "start" for action in ctx.actions):
-            self._warn_agent("ignored repeated start after the current task became active.")
+            self._warn_agent("previous task context is still present.", "emit goal for a new task; otherwise update or confirm the current plan.")
         if self.blackboard.task_code != TaskCode.NEW and ctx.goal_will_change and not ctx.has_fresh_plan_action:
-            self._remember_agent_error(
-                self._error("cannot rewrite Goal after the task is active.", "continue the existing Goal/Plan.")
-            )
-            self._report_gate(
-                on_message,
-                "Retrying: current task is already active; continue without rewriting goal.",
-                "GoalPlan_Gate: goal rewrite while task code is " + self.blackboard.task_code + ".",
-            )
-            return True
-        if ctx.pending_verify_requested:
+            self._warn_agent("rewrote Goal after the task was active.", "replace Plan when the task scope changes.")
+        if ctx.pending_check_requested:
             self._warn_agent('ignored verify status="pending".', self.RULE_VERIFY_DIRECTLY)
-        if (
-            ctx.goal_was_empty
-            and not ctx.has_goal_action
-            and ctx.state_or_work_requested
-            and (ctx.pending_verify_requested or self._has_non_readonly_tool_call(ctx.tool_calls))
-        ):
-            return self._reject_agent(
-                on_message,
-                self._error("Goal/Plan required before mutating work.", self.RULE_GOAL_PLAN_FIRST),
-                "Retrying: set goal and plan before tools.",
-                "GoalPlan_Gate: Goal is empty before task state/work.",
-            )
-        if ctx.goal_will_change and not ctx.has_fresh_plan_action and (ctx.tool_calls or ctx.pending_verify_requested):
-            self._remember_agent_error(self._error("changed Goal without replacing Plan.", "include start.plan or a full plan action."))
-            self._report_gate(
-                on_message,
-                "Retrying: new goal requires a fresh plan.",
-                "GoalPlan_Gate: Goal changed without replacing Plan.",
+        if self.session.state.pending_user_feedback and ctx.goal_will_change:
+            self._warn_agent(
+                "Pending User Feedback is not a new task by default.",
+                "answer it without rewriting Goal unless the user explicitly replaces or cancels the task.",
             )
-            return True
+            ctx.actions[:] = [action for action in ctx.actions if _json_str(action.get("type")) != "goal" or action.get("complete") is True]
+            ctx.response["actions"] = [
+                action
+                for action in _json_list(ctx.response.get("actions"))
+                if not isinstance(action, dict) or _json_str(action.get("type")) != "goal" or action.get("complete") is True
+            ]
+        if ctx.goal_was_empty and not ctx.has_goal_action and ctx.state_or_work_requested and (ctx.pending_check_requested or ctx.has_edit_tool_call):
+            self._warn_agent("mutating work before Goal/Plan was set.", self.RULE_GOAL_PLAN_FIRST)
+        if ctx.goal_will_change and not ctx.has_fresh_plan_action and (ctx.pending_check_requested or ctx.has_edit_tool_call):
+            self._warn_agent("changed Goal without replacing Plan.", "replace Plan when the task scope changes.")
         return False
 
-    def _emit_debug_frame_errors(self, response: Json, on_message: MessageCallback | None) -> None:
-        if not self.session.settings.debug or on_message is None:
-            return
-        frame_error_report = self._format_frame_error_report(response)
-        if frame_error_report:
-            on_message(frame_error_report)
-
-    def _emit_state_and_progress(self, ctx: ResponseContext, on_message: MessageCallback | None) -> None:
+    def _emit_state_and_text(self, ctx: ResponseContext, on_message: MessageCallback | None) -> None:
         if on_message is not None and self.state_updater.latest_report:
-            report = self.state_updater.latest_report if self.session.settings.debug else self.state_updater.compact_report()
+            report = self.state_updater.compact_report()
             if report:
                 on_message(report)
-        if on_message is not None:
-            for message in ctx.progress_messages:
-                on_message(message)
+        if on_message is not None and ctx.assistant_text and ctx.actions and not ctx.completion_message:
+            on_message(ctx.assistant_text)
 
     def _gate_after_apply(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult | None:
+        if ctx.plan_was_empty and not self.blackboard.plan and (ctx.pending_check_requested or ctx.has_edit_tool_call):
+            self._warn_agent("mutating work before Plan was set.", self.RULE_GOAL_PLAN_FIRST)
         if (
             ctx.plan_was_empty
             and not self.blackboard.plan
-            and (ctx.pending_verify_requested or self._has_non_readonly_tool_call(ctx.tool_calls))
+            and ctx.tool_calls
+            and self.session.state.turn_tool_calls + len(ctx.tool_calls) >= self.context_budget().planless_discovery_tool_calls
         ):
-            return self._reject_result(
-                self._remember_agent_error,
-                on_message,
-                self._error("Plan required before mutating work.", self.RULE_GOAL_PLAN_FIRST),
-                "Retrying: create a short plan before mutating tools.",
-                "GoalPlan_Gate: Plan is empty before mutating tool/verify.",
-            )
+            self._warn_agent("Plan is empty after discovery.", "set a short Plan before more broad exploration.")
 
-        if (
-            ctx.tool_calls
-            and not any(execution.outcome != "success" for execution in self.tool_runner.latest_executions)
-            and self._verification_is_settled()
-        ):
+        if ctx.tool_calls and not any(execution.outcome != "success" for execution in self.tool_runner.latest_executions) and self._checks_are_settled():
             if self._plan_is_complete():
-                self._warn_agent("Plan and verification are complete; continuing tools without reopening Plan.")
-            elif ctx.plan_was_complete and ctx.verification_was_settled:
+                self._warn_agent("Plan and Checks are complete; continuing tools without reopening Plan.")
+            elif ctx.plan_was_complete and ctx.checks_settled:
                 self._warn_agent("Continuing tools after completed Plan; update Plan if the new work changes scope.")
 
         if not ctx.tool_calls and not ctx.plan_was_complete and self._plan_is_complete() and not self.blackboard.goal_reached:
-            if not self._verification_is_settled():
+            if not self._checks_are_settled():
                 self._warn_agent(
-                    "Plan is complete but verification is not recorded.",
-                    "run checks when files changed or verification was requested.",
+                    "Plan is complete but Checks are not recorded.",
+                    "run checks when files changed or checks were requested.",
                 )
             else:
-                self._warn_agent("Plan and verification are complete; finish with goal.complete=true when no further work is needed.")
+                self._warn_agent("Plan and Checks are complete; finish with goal.complete=true when no further work is needed.")
         if (
-            ctx.state_or_work_requested
-            and not ctx.tool_calls
-            and not ctx.pending_verify_requested
-            and not ctx.progress_messages
-            and not ctx.completion_message
-            and not self.state_updater.changed
-        ):
-            self._warn_agent("response made no effective state change; continue with tool, verify, or goal.")
-        if (
-            not self.session.settings.plan_mode
-            and ctx.has_state_update_action
+            ctx.has_state_update_action
             and self.state_updater.changed
-            and not ctx.has_goal_action
+            and not ctx.goal_was_empty
             and not ctx.tool_calls
-            and not ctx.pending_verify_requested
+            and not ctx.pending_check_requested
             and not ctx.completion_message
-            and ctx.chat_message is None
             and ctx.user_rule_message is None
         ):
             self._warn_agent("state update-only turn; include frontier tool, verify, or goal when arguments are known.")
         return None
 
-    def _plan_mode_completion_error(self, message: str) -> str:
-        if not self.session.settings.plan_mode:
-            return ""
-        text = message.strip()
-        if not text.startswith("<proposed_plan>") or not text.endswith("</proposed_plan>"):
-            return "final plan must be wrapped in <proposed_plan>...</proposed_plan>"
-        if text.count("<proposed_plan>") != 1 or text.count("</proposed_plan>") != 1:
-            return "final plan must contain exactly one proposed_plan block"
-        if not text.removeprefix("<proposed_plan>").removesuffix("</proposed_plan>").strip():
-            return "final plan block is empty"
-        return ""
-
-    def _promote_required_verification(self, ctx: ResponseContext) -> None:
-        verification = self.blackboard.verification
-        if not self.blackboard.verification_required or not self.blackboard.goal_reached:
+    def _promote_required_checks(self, ctx: ResponseContext) -> None:
+        checks = self.blackboard.checks
+        if not self.blackboard.checks_required or not self.blackboard.goal_reached:
             return
-        if verification.status in {VerificationStatus.REQUIRED, VerificationStatus.DONE, VerificationStatus.BLOCKED}:
+        if checks.status in {CheckStatus.REQUIRED, CheckStatus.PASSED, CheckStatus.BLOCKED}:
             return
-        self.blackboard.task_code = TaskCode.VERIFYING
-        verification.status = VerificationStatus.REQUIRED
-        verification.kind = verification.kind or "change_syntax_check"
-        verification.method = verification.method or self.blackboard.goal or self.blackboard.user_input
-        if not verification.criteria:
-            verification.criteria = ["changed files pass the smallest relevant syntax or compile check"]
-        verification.context = verification.context or ctx.completion_message or self.blackboard.goal
+        self.blackboard.task_code = TaskCode.CHECKING
+        checks.status = CheckStatus.REQUIRED
+        checks.method = checks.method or self.blackboard.goal or self.blackboard.user_input
+        checks.context = checks.context or ctx.completion_message or self.blackboard.goal
 
     def _run_tool_actions(
         self,
@@ -5822,9 +6034,8 @@ def _run_tool_actions(
         *,
         confirm: ConfirmCallback | None,
         on_auto_approve: ToolDisplayCallback | None,
-        on_live_output: ToolLiveOutputCallback | None,
-        on_live_done: ToolLiveDoneCallback | None,
         on_message: MessageCallback | None,
+        append_to_latest: bool = False,
     ) -> bool:
         if not ctx.tool_calls:
             return False
@@ -5832,21 +6043,14 @@ def _run_tool_actions(
             ctx.tool_calls,
             confirm=confirm,
             on_auto_approve=on_auto_approve,
-            on_live_output=on_live_output,
-            on_live_done=on_live_done,
+            append_to_latest=append_to_latest,
         )
         if on_message is not None:
             report = ToolCallDisplayFormatter.latest_report(self.tool_runner.latest_executions)
             if report:
                 on_message(report)
             if self.session.settings.debug and self.tool_runner.skipped_after_failure_count:
-                on_message(
-                    "Tool Calls Skipped: "
-                    + str(self.tool_runner.skipped_after_failure_count)
-                    + " after "
-                    + self.tool_runner.skipped_after_failure_key
-                    + " failed"
-                )
+                on_message(f"Tool Calls Skipped: {self.tool_runner.skipped_after_failure_count} after {self.tool_runner.skipped_after_failure_key} failed")
         self.compactor.maybe_compact()
         return True
 
@@ -5857,8 +6061,8 @@ def _handle_observe_response(
         *,
         on_message: MessageCallback | None,
     ) -> AgentRunResult:
-        if ctx.pending_verify_requested:
-            self._warn_observe('ignored verify status="pending".', "observe must keep or forget latest results first.")
+        if ctx.pending_check_requested:
+            self._remember_observe_error(self._warning('ignored verify status="pending".', "observe must keep or forget latest results first."))
         repeated_tool_retry_error = self._repeated_tool_retry_error(ctx.tool_calls)
         if repeated_tool_retry_error:
             return self._reject_result(
@@ -5878,83 +6082,91 @@ def _handle_observe_response(
         )
         if gate_result is not None:
             return gate_result
-        forget_error = self._forget_tool_result_error(ctx.actions)
-        if forget_error:
-            return self._reject_result(
-                self._remember_observe_error,
-                on_message,
-                self._error("invalid forget: " + forget_error + ".", self.RULE_VISIBLE_RESULTS),
-                "Retrying: forget only visible tool result keys.",
-                "ToolResult_Gate: " + forget_error + ".",
-            )
-        forget_hypothesis_error = self._forget_active_hypothesis_error(ctx.actions)
-        if forget_hypothesis_error:
-            return self._reject_result(
-                self._remember_observe_error,
-                on_message,
-                self._error("forget conflicts with active hypothesis: " + forget_hypothesis_error + ".", self.RULE_CLOSE_SOURCE),
-                "Retrying: close hypothesis before forgetting its source result.",
-                "ToolResult_Gate: " + forget_hypothesis_error + ".",
-            )
-        if not ctx.actions:
-            return self._reject_result(
-                self._remember_observe_error,
-                on_message,
-                self._error("observe returned no actions.", "keep useful results or forget latest results with a reason."),
-                "Retrying: keep or forget latest results.",
-                "Observe_Gate: empty actions are not a checkpoint; return keep or forget.",
-            )
-        observed_blocks = self.tool_context.unreduced_blocks(self.blackboard.memory_checkpoint_tool_result_counter)
+        forget_gate = self._gate_forget_actions(ctx.actions, on_message, self._remember_observe_error)
+        if forget_gate is not None:
+            return forget_gate
+        observed_blocks = self._unreferenced_unreduced_blocks()
         observed_counter = ToolResultContext.max_counter(observed_blocks)
-        covered = {
-            key
-            for action in ctx.actions
-            if _json_str(action.get("type")) in {"keep", "forget"}
-            for key in _source_from_json(action)
-            if key.startswith("tr.")
-        }
-        missing_observe_keys = [key for key in ToolResultContext.blocks_by_key(observed_blocks) if key not in covered]
-        if missing_observe_keys:
-            self._remember_observe_error(
-                self._error("observe missed result key(s): " + ", ".join(missing_observe_keys) + ".", "cover each latest result with keep or forget.")
-            )
-            self._report_gate(
-                on_message,
-                "Retrying: cover every latest result key with keep or forget.",
-                "Observe_Gate: missing coverage for result keys: " + ", ".join(missing_observe_keys) + ".",
-            )
-            return AgentRunResult()
-        self._emit_debug_frame_errors(response, on_message)
         forgotten_keys = self.apply_response(response)
-        self._emit_state_and_progress(ctx, on_message)
-        kept_keys: list[str] = []
-        if any(_json_str(action.get("type")) in {"keep", "forget", "known", "stable_knowledge"} for action in ctx.actions):
-            self.mode = AgentMode.ACT
-            self.runtime.consecutive_tool_turns = 0
-            kept_keys = self.tool_context.keep_results(ctx.actions, observed_blocks, max_chars=self.KEPT_TOOL_RESULT_CHARS)
-            self.tool_context.compact_observed(observed_blocks)
-            self._mark_memory_checkpoint(observed_counter)
-            self.observe_feedback_errors = []
-        else:
-            self.mode = AgentMode.OBSERVE
+        self._emit_state_and_text(ctx, on_message)
+        self.mode = AgentMode.ACT
+        kept_keys = self.tool_context.keep_results(
+            ctx.actions,
+            observed_blocks,
+            max_chars=self.context_budget().kept_chars,
+            max_block_chars=self.context_budget().kept_block_chars,
+        )
+        self.tool_context.compact_observed(observed_blocks)
+        self._mark_memory_checkpoint(observed_counter)
+        self.observe_feedback_errors = []
+        self._warn_weak_observe_memory(ctx.actions)
         self._emit_tool_context_update(kept_keys, forgotten_keys, on_message)
-        self._promote_required_verification(ctx)
+        self._promote_required_checks(ctx)
         return AgentRunResult()
 
+    def _warn_weak_observe_memory(self, actions: list[Json]) -> None:
+        if any(_json_str(action.get("type")) in {"keep", "forget", "lead"} for action in actions):
+            return
+        known_actions = [action for action in actions if _json_str(action.get("type")) == "known"]
+        if not known_actions:
+            return
+        for action in known_actions:
+            for raw in _json_list(action.get("items")):
+                item = KnownItem.from_json(raw)
+                if item is not None and KnownItem.source_of(item):
+                    return
+        self._remember_observe_error(
+            self._warning(
+                "weak observe memory: known facts need source tr.N or keep/forget coverage.", "use source-backed Facts/Leads or keep important raw results."
+            )
+        )
+
     def _forget_tool_result_error(self, actions: list[Json]) -> str:
         keys = ToolResultContext.forget_result_keys_from_actions(actions)
         if not any(_json_str(action.get("type")) == "forget" for action in actions):
             return ""
         if not keys:
             return "missing tr.* source"
-        visible_keys = set(
-            ToolResultContext.blocks_by_key(
-                self.tool_context.kept_results + self.tool_context.latest + self.tool_context.recent
-            )
-        )
+        visible_keys = set(ToolResultContext.blocks_by_key(self.tool_context.kept_results + self.tool_context.latest + self.tool_context.recent))
         missing = [key for key in keys if key not in visible_keys]
         return "not in visible tool results: " + ", ".join(missing) if missing else ""
 
+    def _gate_forget_actions(
+        self,
+        actions: list[Json],
+        on_message: MessageCallback | None,
+        remember_error: Callable[[str], None],
+    ) -> AgentRunResult | None:
+        forget_error = self._forget_tool_result_error(actions)
+        if forget_error:
+            return self._reject_result(
+                remember_error,
+                on_message,
+                self._error("invalid forget: " + forget_error + ".", self.RULE_VISIBLE_RESULTS),
+                "Retrying: forget only visible tool result keys.",
+                "ToolResult_Gate: " + forget_error + ".",
+            )
+        forgotten = set(ToolResultContext.forget_result_keys_from_actions(actions))
+        released = set()
+        for action in actions:
+            values = _json_list(action.get("items")) if _json_str(action.get("type")) == "lead" else []
+            for raw in values:
+                item = Lead.from_json(raw)
+                if item is not None and item.status != LeadStatus.ACTIVE:
+                    released.update(key for key in item.source if key.startswith("tr."))
+        protected = self.blackboard.protected_result_sources()
+        conflict = sorted((forgotten & set(protected)) - released)
+        forget_protected_error = "protected source: " + ", ".join(key + " (" + protected[key] + ")" for key in conflict) if conflict else ""
+        if forget_protected_error:
+            return self._reject_result(
+                remember_error,
+                on_message,
+                self._error("forget conflicts with protected result source: " + forget_protected_error + ".", self.RULE_CLOSE_SOURCE),
+                "Retrying: close dependent state before forgetting its source result.",
+                "ToolResult_Gate: " + forget_protected_error + ".",
+            )
+        return None
+
     def _emit_tool_context_update(self, kept: list[str], forgotten: list[str], on_message: MessageCallback | None) -> None:
         if on_message is None or not (kept or forgotten):
             return
@@ -5966,62 +6178,12 @@ def _emit_tool_context_update(self, kept: list[str], forgotten: list[str], on_me
         on_message("Tool Result Context: " + " / ".join(parts))
 
     def _finish_or_continue(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult:
-        if self.blackboard.verification.status == VerificationStatus.REQUIRED:
-            if self.blackboard.verification_required:
-                return self._reject_completion(
-                    on_message,
-                    self._error("edited files need verification before completion.", self.RULE_VERIFY_DIRECTLY),
-                    "Retrying: verify edited files before completion.",
-                    "Verification_Gate: edit completion requires verification.",
-                )
-            return self._reject_completion(
-                on_message,
-                self._error("verification required before completion.", self.RULE_VERIFY_DIRECTLY),
-                "Retrying: verification is required before completion.",
-                "Verification_Gate: retrying until verification is passed or blocked.",
-            )
-        if self.blackboard.verification.status == VerificationStatus.FAILED and self.blackboard.goal_reached:
-            return self._reject_completion(
-                on_message,
-                self._error("verification failed; fix the reported issue first."),
-                "Retrying: verification failed; fix the reported issue first.",
-                "Verification_Gate: verification failed; fix before completion.",
-            )
-        completion_plan_error = self._completion_plan_error(ctx)
-        if completion_plan_error:
-            return self._reject_completion(
-                on_message,
-                self._error("completion before Plan was complete.", self.RULE_COMPLETE_PLAN),
-                "Retrying: finish the plan before completing.",
-                "Completion_Gate: " + completion_plan_error + ".",
-            )
-        blocked_completion_error = self._blocked_verification_completion_error()
-        if blocked_completion_error:
-            return self._reject_completion(
-                on_message,
-                self._error("blocked verification completion invalid: " + blocked_completion_error + ".", self.RULE_BLOCKED_BY_USER),
-                "Retrying: blocked verification needs blocker=user.",
-                "Verification_Gate: " + blocked_completion_error + ".",
-            )
-        investigate_completion_error = self._investigate_completion_error()
-        if investigate_completion_error:
-            return self._reject_completion(
-                on_message,
-                self._error(investigate_completion_error + ".", "mark a hypothesis confirmed before completing."),
-                "Retrying: confirm a hypothesis before completing.",
-                "Completion_Gate: " + investigate_completion_error + ".",
-            )
+        completion_gate = self._gate_completion(ctx, on_message)
+        if completion_gate is not None:
+            return completion_gate
         if self.blackboard.goal_reached and not ctx.completion_message:
             self._warn_agent("filled missing message_for_complete with a fallback completion message.")
-        completion_message = self._completion_fallback_message(ctx) if self.blackboard.goal_reached else ""
-        plan_mode_completion_error = self._plan_mode_completion_error(completion_message) if self.blackboard.goal_reached else ""
-        if plan_mode_completion_error:
-            return self._reject_completion(
-                on_message,
-                self._error("invalid plan-mode completion: " + plan_mode_completion_error + ".", "return the proposed plan as the final message."),
-                "Retrying: finish plan mode with a proposed_plan block.",
-                "PlanMode_Gate: " + plan_mode_completion_error + ".",
-            )
+        completion_message = (ctx.completion_message or ctx.assistant_text or "Done.") if self.blackboard.goal_reached else ""
         if self.blackboard.goal_reached:
             self.session.append_conversation(AssistantMessage(content=completion_message))
             if on_message is not None:
@@ -6029,16 +6191,41 @@ def _finish_or_continue(self, ctx: ResponseContext, on_message: MessageCallback
             self._finish_current_goal()
             return AgentRunResult(done=True, value=ctx.response)
         self.blackboard.goal_reached = False
-        if not ctx.actions:
-            self._remember_agent_error(
-                self._error("no actions while goal is incomplete.", self.RULE_FINAL_ACTION)
+        return AgentRunResult()
+
+    def _gate_completion(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult | None:
+        if self.blackboard.checks.status == CheckStatus.REQUIRED:
+            if self.blackboard.checks_required:
+                self._warn_agent("edited files need Checks before completion.", self.RULE_VERIFY_DIRECTLY)
+            else:
+                self._warn_agent("Checks are required before completion.", self.RULE_VERIFY_DIRECTLY)
+        if self.blackboard.checks.status == CheckStatus.FAILED and self.blackboard.goal_reached:
+            self._warn_agent("Checks failed; fix the reported issue first.")
+        completion_plan_error = self._completion_plan_error(ctx)
+        if completion_plan_error:
+            self.blackboard.goal_reached = False
+            return self._reject_result(
+                self._remember_agent_error,
+                on_message,
+                self._error("completion before Plan was complete: " + completion_plan_error + ".", self.RULE_COMPLETE_PLAN),
+                "Retrying: finish the plan before completing.",
+                "Completion_Gate: " + completion_plan_error + ".",
             )
-            self._report_gate(
+        completion_followup_error = self._completion_plan_followup_error()
+        if completion_followup_error:
+            self.blackboard.goal_reached = False
+            return self._reject_result(
+                self._remember_agent_error,
                 on_message,
-                "Continuing: assistant must set current task's goal.",
-                "GoalPlan_Gate: goal not reached; retrying next useful action.",
+                self._error("completion before Plan follow-up was resolved: " + completion_followup_error + ".", self.RULE_PLAN_FOLLOWUP),
+                "Retrying: resolve Plan follow-up before completing.",
+                "Completion_Gate: " + completion_followup_error + ".",
             )
-        return AgentRunResult()
+        if self.blackboard.goal_reached and self.blackboard.checks.status == CheckStatus.BLOCKED and self.blackboard.checks.blocker != CheckBlocker.USER:
+            self._warn_agent("blocked Checks completion invalid: verify blocked requires blocker=user before completion.", self.RULE_BLOCKED_BY_USER)
+        if self.blackboard.goal_reached and self.blackboard.leads and not any(item.status == LeadStatus.CONFIRMED for item in self.blackboard.leads):
+            self._warn_agent("investigation completion requires a confirmed lead.", "mark a lead confirmed when claiming a root cause.")
+        return None
 
     def run(
         self,
@@ -6046,54 +6233,56 @@ def run(
         *,
         confirm: ConfirmCallback | None = None,
         on_auto_approve: ToolDisplayCallback | None = None,
-        on_live_output: ToolLiveOutputCallback | None = None,
-        on_live_done: ToolLiveDoneCallback | None = None,
         on_message: MessageCallback | None = None,
+        poll_user_input: UserInputPoller | None = None,
     ) -> Json:
         self.agent_feedback_errors = []
         self.failed_tool_call_key = None
         self.failed_tool_call_count = 0
-        self.runtime.consecutive_tool_turns = 0
         self.tool_context.prune_recent(
-            max_index_items=self.TOOL_RESULT_INDEX_ITEMS,
+            max_index_items=self.context_budget().index_items,
             checkpoint=self.blackboard.memory_checkpoint_tool_result_counter,
         )
         self._prune_tool_result_store()
-        # Range fingerprints are tied to previously read file content; require a fresh read before later edits.
-        self.session.state.range_fingerprints.clear()
         self.mode = AgentMode.ACT
         self.session.state.turn_tool_calls = 0
         self.session.state.turn_model_calls = 0
         old_goal = self.blackboard.goal
-        old_task_context = bool(self.blackboard.goal or self.blackboard.plan or self.blackboard.hypotheses)
+        old_task_context = bool(self.blackboard.goal or self.blackboard.plan or self.blackboard.leads)
         self.blackboard.user_input = user_input
         previous_task_done = self.blackboard.task_code == TaskCode.DONE
-        if previous_task_done:
-            self.blackboard.work_mode = WorkMode.NORMAL
+        self.incomplete_task_context_at_turn_start = old_task_context and not previous_task_done
         # Keep previous task state at a new user turn so short follow-ups like
         # "continue" can resume. The first response must align with it before work
         # when the new request does not match the previous goal.
         self.task_alignment_required = old_task_context and self._task_text_key(user_input) != self._task_text_key(old_goal)
         self.blackboard.task_code = TaskCode.NEW
         self.blackboard.goal_reached = False
-        self.blackboard.verification_required = False
+        self.blackboard.checks_required = False
         self.observe_feedback_errors = []
-        self.blackboard.verification.reset()
+        self.blackboard.checks.reset()
         self.compactor.maybe_compact()
         self.session.append_conversation(UserMessage(content=user_input))
 
+        def before_step(_index: int, _max_steps: int) -> None:
+            self._ingest_queued_user_input(poll_user_input, on_message)
+
+        if self._can_stream_tools():
+            return self.run_stream_loop(
+                max_steps=self.session.settings.max_agent_steps,
+                on_message=on_message,
+                confirm=confirm,
+                on_auto_approve=on_auto_approve,
+                on_step_limit=lambda: (_ for _ in ()).throw(LLMError("agent step limit reached")),
+                on_before_step=before_step,
+            )
+
         return self.run_loop(
             max_steps=self.session.settings.max_agent_steps,
             on_message=on_message,
-            on_step=lambda response: self.handle_response(
-                response,
-                confirm=confirm,
-                on_auto_approve=on_auto_approve,
-                on_live_output=on_live_output,
-                on_live_done=on_live_done,
-                on_message=on_message,
-            ),
+            on_step=lambda response: self.handle_response(response, confirm=confirm, on_auto_approve=on_auto_approve, on_message=on_message),
             on_step_limit=lambda: (_ for _ in ()).throw(LLMError("agent step limit reached")),
+            on_before_step=before_step,
         )
 
     def _task_text_key(self, text: str) -> str:
@@ -6101,58 +6290,65 @@ def _task_text_key(self, text: str) -> str:
 
     def handle_response(
         self,
-        response: Json,
-        *,
-        confirm: ConfirmCallback | None = None,
-        on_auto_approve: ToolDisplayCallback | None = None,
-        on_live_output: ToolLiveOutputCallback | None = None,
-        on_live_done: ToolLiveDoneCallback | None = None,
-        on_message: MessageCallback | None = None,
-    ) -> AgentRunResult:
-        ctx = self._build_response_context(response)
-        if self.mode == AgentMode.OBSERVE:
-            return self._handle_observe_response(
-                ctx,
-                response,
-                on_message=on_message,
-            )
-
-        if self._gate_before_apply(ctx, on_message):
-            return AgentRunResult()
-
-        chat_result = self._handle_chat_response(ctx, on_message)
-        if chat_result is not None:
-            return chat_result
-
-        self._emit_debug_frame_errors(response, on_message)
-        forgotten_keys = self.apply_response(response)
-        self._emit_state_and_progress(ctx, on_message)
-        self._emit_tool_context_update([], forgotten_keys, on_message)
-        if ctx.has_user_rule_action and not ctx.tool_calls and not ctx.pending_verify_requested:
-            message = ctx.user_rule_message or "Rule saved."
-            self.session.append_conversation(AssistantMessage(content=message))
-            if on_message is not None:
-                on_message(message)
-            self._finish_current_goal()
-            return AgentRunResult(done=True, value=response)
-
-        gate_result = self._gate_after_apply(ctx, on_message)
-        if gate_result is not None:
-            return gate_result
-
-        self._promote_required_verification(ctx)
-        if self._run_tool_actions(
-            ctx,
-            confirm=confirm,
-            on_auto_approve=on_auto_approve,
-            on_live_output=on_live_output,
-            on_live_done=on_live_done,
-            on_message=on_message,
-        ):
-            return AgentRunResult()
-
-        self.runtime.consecutive_tool_turns = 0
-        return self._finish_or_continue(ctx, on_message)
+        response: Json,
+        *,
+        confirm: ConfirmCallback | None = None,
+        on_auto_approve: ToolDisplayCallback | None = None,
+        on_message: MessageCallback | None = None,
+        append_to_latest: bool = False,
+    ) -> AgentRunResult:
+        try:
+            ctx = self._build_response_context(response)
+            feedback_checkpoint = len(self.agent_feedback_errors)
+            DebugTrace.handle_event(self, "handle-start", ctx, response)
+            if self.mode == AgentMode.OBSERVE:
+                return self._handle_observe_response(ctx, response, on_message=on_message)
+
+            if self._gate_protocol_actions(ctx, on_message) or self._gate_tool_actions(ctx, on_message) or self._gate_task_state(ctx, on_message):
+                DebugTrace.handle_event(self, "handle-gated-before-apply", ctx, response)
+                return AgentRunResult()
+
+            text_result = self._handle_text_response(ctx, on_message)
+            if text_result is not None:
+                DebugTrace.handle_event(self, "handle-text", ctx, response, result=text_result)
+                return text_result
+
+            forgotten_keys = self.apply_response(response)
+            DebugTrace.handle_event(self, "handle-applied", ctx, response, extra={"forgotten": forgotten_keys})
+            self._emit_state_and_text(ctx, on_message)
+            self._emit_tool_context_update([], forgotten_keys, on_message)
+            if ctx.has_user_rule_action and not ctx.tool_calls and not ctx.pending_check_requested:
+                message = ctx.user_rule_message or "Rule saved."
+                self.session.append_conversation(AssistantMessage(content=message))
+                if on_message is not None:
+                    on_message(message)
+                self._finish_current_goal()
+                DebugTrace.handle_event(self, "handle-user-rule", ctx, response)
+                return AgentRunResult(done=True, value=response)
+
+            gate_result = self._gate_after_apply(ctx, on_message)
+            if gate_result is not None:
+                DebugTrace.handle_event(self, "handle-gated-after-apply", ctx, response, result=gate_result)
+                return gate_result
+
+            self._promote_required_checks(ctx)
+            if self._run_tool_actions(ctx, confirm=confirm, on_auto_approve=on_auto_approve, on_message=on_message, append_to_latest=append_to_latest):
+                if (
+                    feedback_checkpoint > 0
+                    and self.tool_runner.latest_executions
+                    and all(execution.outcome == "success" for execution in self.tool_runner.latest_executions)
+                ):
+                    markers = tuple(marker.lower() for marker in self.STALE_TOOL_FEEDBACK_MARKERS)
+                    self.agent_feedback_errors[:feedback_checkpoint] = [
+                        error for error in self.agent_feedback_errors[:feedback_checkpoint] if not any(marker in error.lower() for marker in markers)
+                    ]
+                DebugTrace.handle_event(self, "handle-tools", ctx, response)
+                return AgentRunResult()
+            result = self._finish_or_continue(ctx, on_message)
+            DebugTrace.handle_event(self, "handle-finish-or-continue", ctx, response, result=result)
+            return result
+        finally:
+            self.session.state.pending_user_feedback = ""
 
 
 ############################
@@ -6184,16 +6380,19 @@ class CommandSpec:
     CommandSpec("/help", "Show commands or ask about nanocode", "Info", "/help [question]"),
     CommandSpec("/status", "Show session status", "Info", "/status"),
     CommandSpec("/rules", "Show long-term user rules", "Info", "/rules"),
-    CommandSpec("/knowledge", "Show stable knowledge", "Info", "/knowledge"),
     CommandSpec("/compact", "Compact conversation history", "Info", "/compact"),
     CommandSpec("/config", "Show resolved runtime config", "Config", "/config"),
+    CommandSpec("/context", "Show or set context budget", "Config", "/context [low|medium|high]"),
     CommandSpec("/set", "Set a runtime config override", "Config", "/set <key> <value>"),
+    CommandSpec("/api", "Show or set provider API format", "Config", "/api [auto|chat|responses]"),
     CommandSpec("/model", "Show or set model and reasoning", "Config", "/model [model_name]"),
     CommandSpec("/reason", "Set reasoning effort", "Config", "/reason"),
+    CommandSpec(
+        "/reason-payload", "Show or set chat reasoning payload", "Config", "/reason-payload [auto|off|reasoning|reasoning_effort|thinking|enable_thinking]"
+    ),
     CommandSpec("/provider", "Show or switch provider", "Config", "/provider [name]"),
-    CommandSpec("/plan", "Toggle plan mode or ask for a readonly plan", "Config", "/plan [on|off|question]"),
     CommandSpec("/yolo", "Toggle yolo mode (skip confirmations)", "Config", "/yolo"),
-    CommandSpec("/clean", "Clean all session tool result logs", "Maintenance", "/clean"),
+    CommandSpec("/index", "Initialize, sync, or rebuild code index", "Maintenance", "/index [force]"),
     CommandSpec("/exit", "Exit nanocode", "Control", "/exit"),
     CommandSpec("/quit", "Exit nanocode", "Control", "/quit"),
 )
@@ -6204,11 +6403,11 @@ class CommandSpec:
 ############################
 
 
-CONFIG_EFFORTS: tuple[str, ...] = ("minimal", "low", "medium", "high", "xhigh")
 CONFIG_PROVIDER_ATTRS: dict[str, str] = {
     "provider.model": "model",
+    "provider.prompt_cache_key": "prompt_cache_key",
     "provider.reasoning": "reasoning",
-    "provider.effort": "reasoning_effort",
+    "provider.chat_reasoning": "chat_reasoning",
     "provider.stream": "stream",
     "provider.temperature": "temperature",
     "provider.timeout": "timeout",
@@ -6218,27 +6417,25 @@ class CommandSpec:
     "runtime.compact_at": "compact_at",
     "runtime.shell_timeout": "shell_timeout",
     "runtime.max_agent_steps": "max_agent_steps",
-    "runtime.plan_timeout": "plan_timeout",
-    "runtime.plan_first_token_timeout": "plan_first_token_timeout",
+    "runtime.context_budget": "context_budget",
     "runtime.yolo": "yolo",
 }
 CONFIG_SET_KEYS: tuple[str, ...] = tuple(CONFIG_PROVIDER_ATTRS) + tuple(CONFIG_RUNTIME_ATTRS)
 CONFIG_VALUE_COMPLETIONS: dict[str, tuple[str, ...]] = {
-    "provider.reasoning": ("on", "off"),
-    "provider.effort": CONFIG_EFFORTS,
+    "provider.reasoning": REASONING_CHOICES,
+    "provider.chat_reasoning": CHAT_REASONING_CHOICES,
     "provider.stream": ("on", "off"),
     "provider.temperature": ("off",),
+    "runtime.context_budget": CONTEXT_BUDGET_CHOICES,
     "runtime.yolo": ("on", "off"),
 }
-CONFIG_BOOL_KEYS: set[str] = {"provider.reasoning", "provider.stream", "runtime.yolo"}
+CONFIG_BOOL_KEYS: set[str] = {"provider.stream", "runtime.yolo"}
 CONFIG_INT_KEYS: set[str] = {
     "provider.timeout",
     "provider.first_token_timeout",
     "runtime.compact_at",
     "runtime.shell_timeout",
     "runtime.max_agent_steps",
-    "runtime.plan_timeout",
-    "runtime.plan_first_token_timeout",
 }
 CONFIG_SET_USAGE = "Usage: /set <key> <value>"
 
@@ -6247,6 +6444,9 @@ class CommandDispatcher:
     MODEL_CONFIGURED_LABEL = "---- Configured models ----"
     MODEL_DISCOVERED_LABEL = "---- Discovered models ----"
     MODEL_LABELS = frozenset((MODEL_CONFIGURED_LABEL, MODEL_DISCOVERED_LABEL))
+    COMMAND_ALIASES = {"/context-budget": "/context", "/context_budget": "/context"}
+    API_USAGE = "Usage: /api [auto|chat|responses]"
+    REASON_PAYLOAD_USAGE = "Usage: /reason-payload [auto|off|reasoning|reasoning_effort|thinking|enable_thinking]"
 
     def __init__(
         self,
@@ -6263,21 +6463,8 @@ def __init__(
         self.select_reasoning = select_reasoning
         self.select_model = select_model
         self.select_provider = select_provider
-        self.handlers: dict[str, Callable[[str], str]] = {
-            "/help": self._help,
-            "/status": self._status,
-            "/rules": self._rules,
-            "/compact": self._compact,
-            "/config": self._config,
-            "/set": self._set,
-            "/clean": self._clean,
-            "/model": self._model,
-            "/reason": self._reason,
-            "/provider": self._provider,
-            "/plan": self._plan,
-            "/yolo": self._yolo,
-            "/knowledge": self._knowledge,
-        }
+        self.handlers = {spec.name: getattr(self, "_" + spec.name[1:].replace("-", "_")) for spec in COMMANDS if spec.category != "Control"}
+        self.handlers.update({alias: self.handlers[target] for alias, target in self.COMMAND_ALIASES.items()})
 
     def dispatch(self, user_input: str) -> CommandResult:
         stripped = user_input.strip()
@@ -6307,8 +6494,6 @@ def _help(self, args: str) -> str:
                 current_category = spec.category
                 lines.append(current_category + ":")
             lines.append("  " + spec.usage + " - " + spec.description)
-        lines.append("")
-        lines.append("Tip: use @path to autocomplete file paths in prompts.")
         return "\n".join(lines)
 
     def _format_source_help_question(self, question: str) -> str:
@@ -6351,6 +6536,18 @@ def _model(self, args: str) -> str:
             return "Usage: /model [model_name]"
         return self._set_model(model)
 
+    def _api(self, args: str) -> str:
+        value = args.strip()
+        provider = self.agent.session.config.provider
+        if not value:
+            resolved = provider.resolved_api()
+            suffix = " (" + resolved + ")" if provider.api == "auto" else ""
+            return "provider.api: " + provider.api + suffix + "\n" + self.API_USAGE
+        if value not in {"auto", "chat", "responses"}:
+            return self.API_USAGE
+        provider.api = value
+        return "Set provider.api = " + value
+
     def _model_choices(self, provider: ProviderConfig) -> tuple[str, ...]:
         configured = provider.available_models
         remote = tuple(model for model in self._fetch_remote_models(provider) if model not in configured)
@@ -6364,21 +6561,19 @@ def _model_choices(self, provider: ProviderConfig) -> tuple[str, ...]:
     def _fetch_remote_models(self, provider: ProviderConfig) -> tuple[str, ...]:
         if not provider.url or not provider.key:
             return ()
-        base_url = provider.url.rstrip("/")
-        if base_url.endswith("/chat/completions"):
-            base_url = base_url[: -len("/chat/completions")]
-        request = urllib.request.Request(
-            base_url + "/models",
-            headers={"Authorization": "Bearer " + provider.key, "User-Agent": HTTP_USER_AGENT},
-        )
         try:
-            with urllib.request.urlopen(request, timeout=3) as response:
-                data = json.loads(response.read().decode("utf-8"))
+            response = OpenAI(
+                api_key=provider.key,
+                base_url=provider.base_url(),
+                timeout=3,
+                max_retries=0,
+                default_headers={"User-Agent": HTTP_USER_AGENT},
+            ).models.list(timeout=3)
         except Exception:
             return ()
         ids = []
-        for item in _json_list(_json_dict(data).get("data")):
-            model_id = _json_dict(item).get("id")
+        for item in getattr(response, "data", response):
+            model_id = item.get("id") if isinstance(item, dict) else getattr(item, "id", None)
             if isinstance(model_id, str) and model_id:
                 ids.append(model_id)
         return tuple(dict.fromkeys(sorted(ids)))
@@ -6401,16 +6596,24 @@ def _reason(self, args: str) -> str:
             return "No change"
         return self._apply_reasoning_choice(choice)
 
+    def _reason_payload(self, args: str) -> str:
+        value = args.strip()
+        provider = self.agent.session.config.provider
+        if not value:
+            configured = provider.chat_reasoning or "off"
+            resolved = provider.resolved_chat_reasoning() or "off"
+            return "provider.chat_reasoning: " + configured + "\nprovider.resolved_chat_reasoning: " + resolved + "\n" + self.REASON_PAYLOAD_USAGE
+        if value not in CHAT_REASONING_CHOICES:
+            return self.REASON_PAYLOAD_USAGE
+        provider.chat_reasoning = value
+        return "Set provider.chat_reasoning = " + value
+
     def _apply_reasoning_choice(self, choice: str) -> str:
         provider = self.agent.session.config.provider
-        if choice == "off":
-            provider.reasoning = False
-            return "Set provider.reasoning = off"
-        if choice not in CONFIG_EFFORTS:
-            return "Invalid reasoning effort: " + choice
-        provider.reasoning = True
-        provider.reasoning_effort = choice
-        return "Set provider.reasoning = on\nSet provider.effort = " + choice
+        if choice not in REASONING_CHOICES:
+            return "Invalid reasoning: " + choice
+        provider.reasoning = choice
+        return "Set provider.reasoning = " + choice
 
     def _provider(self, args: str) -> str:
         name = args.strip()
@@ -6441,26 +6644,6 @@ def _yolo(self, args: str) -> str:
             return self._set("runtime.yolo " + ("off" if current else "on"))
         return self._set("runtime.yolo " + args)
 
-    def _plan(self, args: str) -> str:
-        text = args.strip()
-        if not text:
-            current = self.agent.session.settings.plan_mode
-            self.agent.session.settings.plan_mode = not current
-            return "Set plan mode = " + self._format_bool(self.agent.session.settings.plan_mode)
-        if text in {"on", "off"}:
-            self.agent.session.settings.plan_mode = text == "on"
-            return "Set plan mode = " + text
-        previous = self.agent.session.settings.plan_mode
-        self.agent.session.settings.plan_mode = True
-        try:
-            if self.run_agent is not None:
-                self.run_agent(text)
-            else:
-                self.agent.run(text)
-        finally:
-            self.agent.session.settings.plan_mode = previous
-        return ""
-
     def _rules(self, args: str) -> str:
         if args:
             return "Usage: /rules"
@@ -6472,52 +6655,117 @@ def _status(self, args: str) -> str:
         session = self.agent.session
         blackboard = self.agent.blackboard
         provider = session.config.provider
-        reasoning = provider.reasoning_effort if provider.reasoning else "off"
+        if provider.reasoning == "off":
+            reasoning = "off"
+        elif provider.resolved_api() != "chat":
+            reasoning = provider.reasoning
+        else:
+            reasoning = provider.reasoning + "(" + provider.resolved_chat_reasoning() + ")"
+        api = provider.resolved_api() + ("(" + provider.api + ")" if provider.api == "auto" else "")
         model_usage = (
             "\n".join(
-                "  " + (model.rsplit("/", 1)[-1] or model) + ": calls=" + str(usage.calls) + " tokens=" + _format_count(usage.total_tokens)
+                "  "
+                + (model.rsplit("/", 1)[-1] or model)
+                + ": calls="
+                + str(usage.calls)
+                + " tokens="
+                + _format_count(usage.total_tokens)
+                + ((" cached=" + _format_count(usage.cached_prompt_tokens)) if usage.cached_prompt_tokens else "")
                 for model, usage in session.state.model_usage.items()
             )
             if session.state.model_usage
             else "  (empty)"
         )
-        verification_status = blackboard.verification.status
-        return "\n".join(
-            [
-                "provider: " + session.config.active_provider,
-                "model: " + (provider.model or "(empty)") + " reasoning=" + (reasoning or "(empty)") + " stream=" + self._format_bool(provider.stream),
-                "session: " + session.session_id,
-                "runtime: yolo="
-                + self._format_bool(session.settings.yolo)
-                + " plan="
-                + self._format_bool(session.settings.plan_mode)
-                + " compact_at="
-                + str(session.settings.compact_at),
-                "conversation: " + str(len(session.state.conversation)) + "/" + str(session.settings.compact_at),
-                "tool_calls: turn=" + str(session.state.turn_tool_calls) + " session=" + str(session.state.session_tool_calls),
-                "tokens: last=" + _format_count(session.state.last_total_tokens) + " session=" + _format_count(session.state.session_total_tokens),
-                "models:",
-                model_usage,
-                "task: " + blackboard.task_code,
-                "goal: " + (blackboard.goal or "(empty)"),
-                "verification: " + verification_status,
-            ]
-        )
+        checks_status = blackboard.checks.status
+        code_index_status, code_index_message = _code_index_status(session, check=True)
+        if session.state.code_index_error:
+            code_index_status = "error"
+            code_index_message = session.state.code_index_error
+        elif session.state.code_index_refreshing:
+            code_index_status = "syncing"
+            code_index_message = session.state.status_notice.removeprefix("index:")
+        elif code_index_status in {"missing", "stale"}:
+            code_index_message = (code_index_message + "; " if code_index_message else "") + "run /index"
+        code_index = code_index_status + (": " + _shorten(code_index_message, 80) if code_index_message else "")
+        lines = [
+            "provider: " + session.config.active_provider,
+            "model: "
+            + (provider.model or "(empty)")
+            + " api="
+            + api
+            + " reasoning="
+            + (reasoning or "(empty)")
+            + " stream="
+            + self._format_bool(provider.stream),
+            "session: " + session.session_id,
+            "runtime: yolo="
+            + self._format_bool(session.settings.yolo)
+            + " compact_at="
+            + str(session.settings.compact_at)
+            + " context_budget="
+            + session.settings.context_budget,
+            "conversation: " + str(len(session.state.conversation)) + "/" + str(session.settings.compact_at),
+            "tool_calls: turn=" + str(session.state.turn_tool_calls) + " session=" + str(session.state.session_tool_calls),
+            "tools: code_index=" + code_index,
+            "tokens: last=" + _format_count(session.state.last_total_tokens) + " session=" + _format_count(session.state.session_total_tokens),
+        ]
+        if session.state.last_cached_prompt_tokens or session.state.session_cached_prompt_tokens:
+            rate = _format_percent(session.state.session_cached_prompt_tokens, session.state.session_prompt_tokens)
+            lines.append(
+                "cache: last="
+                + _format_count(session.state.last_cached_prompt_tokens)
+                + " session="
+                + _format_count(session.state.session_cached_prompt_tokens)
+                + " rate="
+                + rate
+            )
+        lines.extend(["models:", model_usage, "goal: " + (blackboard.goal or "(empty)"), "checks: " + checks_status])
+        return "\n".join(lines)
 
     def _compact(self, args: str) -> str:
         if args:
             return "Usage: /compact"
-        return self._with_status(self._compact_history)
 
-    def _compact_history(self) -> str:
-        before = len(self.agent.session.state.conversation)
-        count = self.agent.compact_history()
-        if count:
-            return "Compacted conversation history: " + str(count) + " item(s) -> " + str(len(self.agent.session.state.conversation)) + " item(s)"
-        return (
-            "Conversation history is empty"
-            if before == 0
-            else "Nothing to compact: " + str(before) + " item(s), keeping recent " + str(ConversationCompactor.KEEP_RECENT) + "."
+        def compact_history() -> str:
+            before = len(self.agent.session.state.conversation)
+            count = self.agent.compact_history()
+            if count:
+                return "Compacted conversation history: " + str(count) + " item(s) -> " + str(len(self.agent.session.state.conversation)) + " item(s)"
+            return (
+                "Conversation history is empty"
+                if before == 0
+                else "Nothing to compact: " + str(before) + " item(s), keeping recent " + str(ConversationCompactor.KEEP_RECENT) + "."
+            )
+
+        return self._with_status(compact_history)
+
+    def _index(self, args: str) -> str:
+        value = args.strip()
+        if value not in {"", "force"}:
+            return "Usage: /index [force]"
+        return self._with_status(lambda: _code_index_sync(self.agent.session, force=value == "force"))
+
+    def _context(self, args: str) -> str:
+        value = args.strip()
+        if value:
+            if value not in CONTEXT_BUDGET_CHOICES:
+                return "Usage: /context [low|medium|high]"
+            self.agent.session.settings.context_budget = value
+            self.agent.apply_context_budget()
+            return "Set runtime.context_budget = " + value + "\n" + self._format_context_budget()
+        return self._format_context_budget()
+
+    def _format_context_budget(self) -> str:
+        budget = self.agent.context_budget()
+        return "\n".join(
+            [
+                "context_budget: " + self.agent.session.settings.context_budget,
+                "raw_chars: " + str(budget.raw_chars),
+                "kept_chars: " + str(budget.kept_chars),
+                "kept_block_chars: " + str(budget.kept_block_chars),
+                "index_items: " + str(budget.index_items),
+                "observe_after_results: " + str(budget.observe_after_results),
+            ]
         )
 
     def _config(self, args: str) -> str:
@@ -6533,10 +6781,12 @@ def _config(self, args: str) -> str:
                 "provider.url: " + (provider_config.url or "(empty)"),
                 "provider.key: " + ("(set)" if provider_config.key else "(empty)"),
                 "provider.model: " + (provider_config.model or "(empty)"),
+                "provider.api: " + provider_config.api,
+                "provider.prompt_cache_key: " + provider_config.prompt_cache_key,
                 "provider.available_models: " + (", ".join(provider_config.available_models) or "(empty)"),
-                "provider.reasoning: " + self._format_bool(provider_config.reasoning),
-                "provider.effort: " + (provider_config.reasoning_effort or "(empty)"),
-                "provider.reasoning_payload: " + (provider_config.reasoning_payload or "(empty)"),
+                "provider.reasoning: " + provider_config.reasoning,
+                "provider.chat_reasoning: " + (provider_config.chat_reasoning or "(empty)"),
+                "provider.resolved_chat_reasoning: " + (provider_config.resolved_chat_reasoning() or "(empty)"),
                 "provider.stream: " + self._format_bool(provider_config.stream),
                 "provider.temperature: " + self._format_optional(provider_config.temperature),
                 "provider.timeout: " + self._format_optional(provider_config.timeout),
@@ -6548,31 +6798,16 @@ def _config(self, args: str) -> str:
                 "runtime.compact_at: " + str(session.settings.compact_at),
                 "runtime.shell_timeout: " + str(session.settings.shell_timeout),
                 "runtime.max_agent_steps: " + str(session.settings.max_agent_steps),
-                "runtime.plan_timeout: " + str(session.settings.plan_timeout),
-                "runtime.plan_first_token_timeout: " + str(session.settings.plan_first_token_timeout),
+                "runtime.context_budget: " + session.settings.context_budget,
                 "runtime.auto_clean_recent: " + session.settings.auto_clean_recent,
                 "runtime.yolo: " + self._format_bool(session.settings.yolo),
-                "runtime.plan_mode: " + self._format_bool(session.settings.plan_mode),
             ]
         )
 
-    def _knowledge(self, args: str) -> str:
-        if args:
-            return "Usage: /knowledge"
-        knowledge = self.agent.blackboard.stable_knowledge
-        if not any(knowledge.values()):
-            return "No stable knowledge stored."
-        lines = ["Stable knowledge:"]
-        for category in STABLE_KNOWLEDGE_CATEGORIES:
-            items = knowledge.get(category, [])
-            if not items:
-                continue
-            lines.append(category + ":")
-            lines.extend("- " + item for item in items)
-        return "\n".join(lines)
-
     def _set(self, args: str) -> str:
-        key, value = self._parse_set_args(args)
+        key, separator, raw_value = args.partition(" ")
+        key = key.strip()
+        value = (raw_value.strip() or None) if separator else None
         if not key:
             return CONFIG_SET_USAGE
         if key not in CONFIG_SET_KEYS:
@@ -6588,10 +6823,6 @@ def _set(self, args: str) -> str:
             suffix = " and compacted history" if compacted else ""
         return "Set " + key + " = " + self._config_value(key) + suffix
 
-    def _parse_set_args(self, args: str) -> tuple[str, str | None]:
-        key, separator, value = args.partition(" ")
-        return key.strip(), (value.strip() or None) if separator else None
-
     def _config_value(self, key: str) -> str:
         target, attr = self._config_target(key)
         value = getattr(target, attr)
@@ -6605,16 +6836,17 @@ def _config_value(self, key: str) -> str:
 
     def _apply_config_value(self, key: str, value: str) -> str:
         target, attr = self._config_target(key)
+        if key == "provider.prompt_cache_key":
+            try:
+                setattr(target, attr, ProviderConfig.clean_prompt_cache_key(value))
+            except ConfigError:
+                return "Usage: /set provider.prompt_cache_key [auto|off|<stable-key>]"
+            return ""
         if key in CONFIG_BOOL_KEYS:
             if value not in {"on", "off"}:
                 return "Usage: /set " + key + " [on|off]"
             setattr(target, attr, value == "on")
             return ""
-        if key == "provider.effort":
-            if value not in CONFIG_EFFORTS:
-                return "Usage: /set " + key + " [" + "|".join(CONFIG_EFFORTS) + "]"
-            setattr(target, attr, value)
-            return ""
         if key == "provider.temperature":
             if value == "off":
                 setattr(target, attr, None)
@@ -6627,6 +6859,14 @@ def _apply_config_value(self, key: str, value: str) -> str:
                 return "Usage: /set " + key + " <number|off>"
             setattr(target, attr, parsed_float)
             return ""
+        choices = CONFIG_VALUE_COMPLETIONS.get(key)
+        if choices:
+            if value not in choices:
+                return "Usage: /set " + key + " [" + "|".join(choices) + "]"
+            setattr(target, attr, value)
+            if key == "runtime.context_budget":
+                self.agent.apply_context_budget()
+            return ""
         if key in CONFIG_INT_KEYS:
             try:
                 parsed_int = int(value)
@@ -6644,20 +6884,6 @@ def _config_target(self, key: str) -> tuple[object, str]:
             return self.agent.session.config.provider, CONFIG_PROVIDER_ATTRS[key]
         return self.agent.session.settings, CONFIG_RUNTIME_ATTRS[key]
 
-    def _clean(self, args: str) -> str:
-        if args:
-            return "Usage: /clean"
-        sessions_dir = self.agent.session.data_path("sessions")
-        if not os.path.isdir(sessions_dir):
-            return f"No session logs directory found at {sessions_dir}"
-        result = SessionLogCleaner(self.agent.session).clean()
-        msg = f"Cleaned {result.cleaned} log file(s) from {sessions_dir}"
-        if result.skipped:
-            msg += f" ({result.skipped} active session(s) skipped)"
-        if result.failed:
-            msg += f" ({result.failed} failed)"
-        return msg
-
     def _format_bool(self, value: bool | None) -> str:
         return "(fallback)" if value is None else ("on" if value else "off")
 
@@ -6678,6 +6904,10 @@ def _format_count(value: int) -> str:
     return str(value)
 
 
+def _format_percent(value: int, total: int) -> str:
+    return "-" if value <= 0 or total <= 0 else str(round(value * 100 / total)) + "%"
+
+
 ############################
 # Interactive Loop
 ############################
@@ -6689,7 +6919,6 @@ class StatusBar:
     def __init__(self, session: Session):
         self.session = session
         self.started_at = 0.0
-        self.last_elapsed = 0.0
         self.stop_event = threading.Event()
         self.thread: threading.Thread | None = None
         self.rendered = False
@@ -6704,7 +6933,6 @@ def __exit__(self, *args) -> None:
 
     def reset_timer(self) -> None:
         self.started_at = time.monotonic()
-        self.last_elapsed = 0.0
 
     def elapsed(self) -> float:
         if self.started_at <= 0:
@@ -6714,9 +6942,6 @@ def elapsed(self) -> float:
     def is_running(self) -> bool:
         return self.thread is not None
 
-    def snapshot(self, turn_elapsed: float = 0.0) -> str:
-        return "".join(text for _, text in self._fragments(turn_elapsed, now=time.monotonic(), show_sweep=False, show_elapsed=False))
-
     def resume(self) -> None:
         if self.thread is not None or not sys.stderr.isatty():
             return
@@ -6727,7 +6952,6 @@ def resume(self) -> None:
     def pause(self) -> None:
         if self.thread is None:
             return
-        self.last_elapsed = self.elapsed()
         self.stop_event.set()
         self.thread.join()
         self.thread = None
@@ -6737,7 +6961,6 @@ def _run(self) -> None:
         while not self.stop_event.is_set():
             now = time.monotonic()
             elapsed = self.elapsed()
-            self.last_elapsed = elapsed
             self.output.write_raw("\r")
             self.output.erase_end_of_line()
             print_formatted_text(FormattedText(self._fragments(elapsed, now=now, show_sweep=True, show_elapsed=True)), output=self.output, end="", flush=True)
@@ -6763,42 +6986,30 @@ def _format_line(self, turn_elapsed: float, *, now: float, show_elapsed: bool) -
         session = self.session
         active_model = session.state.current_model_call_label or session.config.provider.model
         model = active_model.rsplit("/", 1)[-1] or active_model or "(no model)"
-        reasoning = session.state.current_model_call_reasoning_label or (
-            session.config.provider.reasoning_effort if session.config.provider.reasoning else "off"
-        )
-        modes = "".join(" | " + label for label, enabled in (("yolo", session.settings.yolo), ("plan", session.settings.plan_mode)) if enabled)
+        reasoning = session.state.current_model_call_reasoning_label or (session.config.provider.reasoning)
+        modes = " | yolo" if session.settings.yolo else ""
         context = str(len(session.state.conversation)) + "/" + str(session.settings.compact_at)
         last_tokens = _format_count(session.state.last_total_tokens)
         session_tokens = _format_count(session.state.session_total_tokens)
         rate = session.state.last_model_call_rate
         token_summary = "last:" + last_tokens + " sess:" + session_tokens
         parts = [model + " (" + reasoning + ")" + modes, "ctx:" + context, "tool:" + str(session.state.turn_tool_calls), "tok:" + token_summary]
+        if session.state.status_notice and session.state.status_notice_until > now:
+            parts.insert(1, session.state.status_notice)
         if show_elapsed:
             parts.append(f"turn:{turn_elapsed:.1f}s")
         if session.state.current_model_call_started_at > 0:
-            activity = self._activity_label(session.state.current_model_call_activity)
+            activity = {"compact": "compacting", "observe": "observing"}.get(session.state.current_model_call_activity, "working")
             if session.state.current_model_call_has_content:
                 activity += "*"
             elapsed = max(0.0, now - session.state.current_model_call_started_at)
             if session.state.current_model_call_has_content and elapsed > 0:
                 rate = session.state.current_model_call_streaming_chars / 4 / elapsed
-            parts.append(
-                activity
-                + "("
-                + str(session.state.turn_model_calls)
-                + "):"
-                + f"{elapsed:.1f}s"
-            )
+            parts.append(activity + "(" + str(session.state.turn_model_calls) + "):" + f"{elapsed:.1f}s")
         if rate > 0:
             parts[3] += " " + _format_count(int(rate)) + "t/s"
-        if session.state.status_notice and session.state.status_notice_until > now:
-            parts.append(session.state.status_notice)
         return " | ".join(parts)
 
-    @staticmethod
-    def _activity_label(activity: str) -> str:
-        return {"compact": "compacting", "observe": "observing"}.get(activity, "working")
-
     def _sweep_fragments(self, text: str, now: float) -> list[tuple[str, str]]:
         if not text:
             return [("", "")]
@@ -6819,11 +7030,67 @@ def _sweep_fragments(self, text: str, now: float) -> list[tuple[str, str]]:
         return fragments
 
 
+class ModelRetryShortcut:
+    CTRL_G = 0x07
+
+    def __init__(self, session: Session):
+        self.session = session
+        self.fd: int | None = None
+        self.original_attrs = None
+        self.previous_handler = None
+
+    def __enter__(self) -> Self:
+        if not sys.stdin.isatty() or not hasattr(signal, "SIGQUIT"):
+            return self
+        try:
+            import termios
+        except ImportError:
+            return self
+        try:
+            self.fd = sys.stdin.fileno()
+            self.original_attrs = termios.tcgetattr(self.fd)
+            attrs = list(self.original_attrs)
+            attrs[6] = list(attrs[6])
+            attrs[6][termios.VQUIT] = self._control_char(attrs[6], self.CTRL_G)
+            if hasattr(termios, "VREPRINT"):
+                attrs[6][termios.VREPRINT] = self._control_char(attrs[6], os.fpathconf(self.fd, "PC_VDISABLE"))
+            termios.tcsetattr(self.fd, termios.TCSADRAIN, attrs)
+            self.previous_handler = signal.getsignal(signal.SIGQUIT)
+            signal.signal(signal.SIGQUIT, self._handle_signal)
+        except (AttributeError, OSError, ValueError, termios.error):
+            self.fd = None
+            self.original_attrs = None
+        return self
+
+    def __exit__(self, *args) -> None:
+        try:
+            import termios
+        except ImportError:
+            return
+        if self.previous_handler is not None:
+            signal.signal(signal.SIGQUIT, self.previous_handler)
+            self.previous_handler = None
+        if self.fd is not None and self.original_attrs is not None:
+            try:
+                termios.tcsetattr(self.fd, termios.TCSADRAIN, self.original_attrs)
+            except termios.error:
+                pass
+        self.fd = None
+        self.original_attrs = None
+
+    @staticmethod
+    def _control_char(chars: list[Any], value: int) -> int | bytes:
+        return bytes([value]) if chars and isinstance(chars[0], bytes) else value
+
+    def _handle_signal(self, signum: int, frame: Any) -> None:
+        if self.session.state.current_model_call_started_at > 0:
+            self.session.state.manual_model_retry_requested = True
+            raise KeyboardInterrupt
+
+
 class AgentLoop:
-    LIVE_PREVIEW_MAX_LINES: ClassVar[int] = 10
-    LIVE_PREVIEW_MAX_CHARS: ClassVar[int] = 20_000
-    LIVE_PREVIEW_REFRESH_INTERVAL: ClassVar[float] = 0.12
-    LIVE_PREVIEW_INTERRUPT_HINT_AFTER: ClassVar[float] = 3.0
+    BASH_LIVE_PREVIEW_LINES: ClassVar[int] = 6
+    BASH_LIVE_PREVIEW_CHARS: ClassVar[int] = 8000
 
     def __init__(
         self,
@@ -6839,20 +7106,25 @@ def __init__(
         self.status_bar = StatusBar(agent.session)
         self.history_path = agent.session.history_path()
         self.prompt_session = prompt_session
-        self._live_preview_active = False
-        self._live_preview_resume_status = False
-        self._live_preview_text = ""
-        self._live_preview_rendered_lines = 0
-        self._live_preview_last_render = 0.0
-        self._live_preview_started_at = 0.0
-        self._live_preview_hint_shown = False
+        self._queued_input_lock = threading.Lock()
+        self._queued_input_messages: list[str] = []
+        self._runtime_ui_thread: threading.Thread | None = None
+        self._runtime_ui_app: Application | None = None
+        self._runtime_ui_ready = threading.Event()
+        self._runtime_ui_stop = threading.Event()
+        self._tool_live_preview_lock = threading.Lock()
+        self._tool_live_preview_text = ""
+        self._exit_after_current_turn = False
         if self.prompt_session is None and input_fn is input and sys.stdin.isatty():
             self.prompt_session = self._make_prompt_session()
 
     def run(self) -> int:
         self._print_welcome()
         with SessionLock(self.agent.session.lock_path()), self.status_bar:
-            self._auto_clean_logs()
+            seconds = RuntimeSettings.clean_retention_seconds(self.agent.session.settings.auto_clean_recent)
+            if seconds > 0:
+                clean_sessions(self.agent.session, older_than_seconds=seconds)
+            self._start_existing_code_index_refresh()
             dispatcher = CommandDispatcher(
                 self.agent,
                 run_agent=self._run_agent,
@@ -6862,8 +7134,16 @@ def run(self) -> int:
                 select_provider=self._select_provider,
             )
             while True:
+                _code_index_reload_if_ready(self.agent.session)
+                if self._exit_after_current_turn:
+                    return 0
                 try:
-                    user_input = self._read_input(self._prompt()).strip()
+                    queued_input = self._pop_queued_input()
+                    if queued_input is not None:
+                        user_input = queued_input
+                        self._emit("sent: " + user_input)
+                    else:
+                        user_input = self._read_input(self._prompt()).strip()
                 except EOFError:
                     self._emit("")
                     return 0
@@ -6872,6 +7152,7 @@ def run(self) -> int:
                     continue
                 if not user_input:
                     continue
+                _code_index_reload_if_ready(self.agent.session)
                 try:
                     result = dispatcher.dispatch(user_input)
                 except Exception as error:
@@ -6885,19 +7166,18 @@ def run(self) -> int:
                     continue
                 self._run_agent(user_input)
 
-    def _auto_clean_logs(self) -> None:
-        seconds = RuntimeSettings.clean_retention_seconds(self.agent.session.settings.auto_clean_recent)
-        if seconds > 0:
-            SessionLogCleaner(self.agent.session).clean(older_than_seconds=seconds)
-
     def _prompt(self) -> str:
         labels = []
         if self.agent.session.settings.yolo:
             labels.append("yolo")
-        if self.agent.session.settings.plan_mode:
-            labels.append("plan")
         return "[" + ",".join(labels) + "] > " if labels else "> "
 
+    def _start_existing_code_index_refresh(self) -> None:
+        def progress(event: str, *, done: int = 0, total: int = 0, **_kwargs: object) -> None:
+            _set_code_index_notice(self.agent.session, event, done=done, total=total)
+
+        _code_index_refresh_existing_async(self.agent.session, progress=progress)
+
     def _read_input(self, prompt: str) -> str:
         if self.prompt_session is None:
             return self.input_fn(prompt)
@@ -6907,25 +7187,42 @@ def _read_input(self, prompt: str) -> str:
                 multiline=False,
                 enable_history_search=True,
                 refresh_interval=StatusBar.INTERVAL,
-                bottom_toolbar=lambda: self.status_bar._fragments(
-                    0.0,
-                    now=time.monotonic(),
-                    show_sweep=False,
-                    show_elapsed=False,
-                ),
+                bottom_toolbar=self._status_bar_fragments,
             )
 
+    def _append_queued_input(self, text: str) -> None:
+        text = text.strip()
+        if not text:
+            return
+        with self._queued_input_lock:
+            self._queued_input_messages.append(text)
+
+    def _pop_queued_input(self) -> str | None:
+        with self._queued_input_lock:
+            if not self._queued_input_messages:
+                return None
+            return self._queued_input_messages.pop(0)
+
+    def _clear_queued_input(self) -> int:
+        with self._queued_input_lock:
+            count = len(self._queued_input_messages)
+            self._queued_input_messages.clear()
+            return count
+
     def _choice_style(self) -> Style:
         return Style.from_dict(
             {
+                "runtime-prompt": "#67e8f9",
+                "queue-input": "#e5e7eb",
                 "selected-option": "bold #0f4c5c bg:#e6f2f3",
                 "choice-hint": "#6b7280",
+                "bash-preview": "#6b7280",
                 "bottom-toolbar": "noreverse bg:default fg:default",
                 "bottom-toolbar.text": "noreverse bg:default fg:default",
             }
         )
 
-    def _choice_bottom_toolbar(self):
+    def _status_bar_fragments(self):
         return self.status_bar._fragments(
             0.0,
             now=time.monotonic(),
@@ -6933,6 +7230,164 @@ def _choice_bottom_toolbar(self):
             show_elapsed=False,
         )
 
+    def _runtime_status_fragments(self):
+        return self.status_bar._fragments(
+            self.status_bar.elapsed(),
+            now=time.monotonic(),
+            show_sweep=True,
+            show_elapsed=True,
+        )
+
+    def _start_runtime_ui(self) -> bool:
+        if self.input_fn is not input or not sys.stdin.isatty() or not sys.stderr.isatty() or self._runtime_ui_thread is not None:
+            return False
+        self._runtime_ui_ready.clear()
+        self._runtime_ui_stop.clear()
+        self._runtime_ui_thread = threading.Thread(target=self._run_runtime_ui, daemon=True)
+        self._runtime_ui_thread.start()
+        self._runtime_ui_ready.wait(timeout=0.2)
+        if self._runtime_ui_thread is not None and not self._runtime_ui_thread.is_alive():
+            self._runtime_ui_thread = None
+            return False
+        return True
+
+    def _stop_runtime_ui(self) -> bool:
+        thread = self._runtime_ui_thread
+        if thread is None:
+            return False
+        self._runtime_ui_stop.set()
+        self._runtime_ui_ready.wait(timeout=0.2)
+        app = self._runtime_ui_app
+        if app is not None:
+            try:
+                app.exit()
+            except Exception:
+                pass
+        thread.join(timeout=0.8)
+        stopped = not thread.is_alive()
+        if stopped:
+            self._runtime_ui_thread = None
+            self._runtime_ui_app = None
+        return stopped
+
+    def _with_runtime_ui_paused(self, action: Callable[[], JsonValue]) -> JsonValue:
+        was_running = self._stop_runtime_ui()
+        try:
+            return action()
+        finally:
+            if was_running:
+                self._start_runtime_ui()
+
+    def _interrupt_current_turn(self, *, exit_after: bool = False) -> None:
+        self._exit_after_current_turn = self._exit_after_current_turn or exit_after
+        app = self._runtime_ui_app
+        if app is not None:
+            app.exit()
+        try:
+            os.kill(os.getpid(), signal.SIGINT)
+        except Exception:
+            _thread.interrupt_main()
+
+    def _retry_current_model_call(self) -> None:
+        if self.agent.session.state.current_model_call_started_at <= 0:
+            return
+        self.agent.session.state.manual_model_retry_requested = True
+        try:
+            os.kill(os.getpid(), signal.SIGINT)
+        except Exception:
+            _thread.interrupt_main()
+
+    def _run_runtime_ui(self) -> None:
+        buffer = Buffer(multiline=False)
+        buffer_control = BufferControl(buffer=buffer, focusable=True)
+        bindings = KeyBindings()
+
+        def print_queued(text: str) -> None:
+            print_formatted_text(FormattedText([("ansibrightblack", "queued: " + text)]), output=self.status_bar.output)
+
+        def queue_text(event, text: str) -> None:
+            buffer.reset()
+            event.app.invalidate()
+            if not text:
+                return
+            self._append_queued_input(text)
+            terminal_task = run_in_terminal(lambda: print_queued(text), in_executor=False)
+            if inspect.iscoroutine(terminal_task):
+                event.app.create_background_task(terminal_task)
+
+        @bindings.add("enter", eager=True)
+        def _accept(event):
+            queue_text(event, buffer.text.strip())
+
+        @bindings.add("c-d", eager=True)
+        def _eof(event):
+            if buffer.text:
+                buffer.delete()
+                event.app.invalidate()
+            else:
+                self._interrupt_current_turn(exit_after=True)
+
+        @bindings.add("c-c", eager=True)
+        @bindings.add("<sigint>", eager=True)
+        def _interrupt(event):
+            self._interrupt_current_turn()
+
+        @bindings.add("c-g", eager=True)
+        def _retry(event):
+            self._retry_current_model_call()
+
+        input_line = VSplit(
+            [
+                Window(FormattedTextControl([("class:runtime-prompt", "> ")]), width=2, dont_extend_width=True),
+                Window(buffer_control, style="class:queue-input", dont_extend_height=True),
+            ],
+            height=Dimension(min=1),
+        )
+        status_line = Window(
+            FormattedTextControl(self._runtime_status_fragments, style="class:bottom-toolbar.text"),
+            style="class:bottom-toolbar",
+            height=Dimension(min=1),
+            dont_extend_height=True,
+        )
+        bash_preview = ConditionalContainer(
+            Window(
+                FormattedTextControl(self._tool_live_preview_fragments, style="class:bash-preview"),
+                height=Dimension.exact(self.BASH_LIVE_PREVIEW_LINES),
+                dont_extend_height=True,
+            ),
+            filter=Condition(self._has_tool_live_preview),
+        )
+        app = Application(
+            layout=Layout(
+                HSplit(
+                    [
+                        bash_preview,
+                        status_line,
+                        input_line,
+                    ]
+                ),
+                focused_element=buffer_control,
+            ),
+            style=self._choice_style(),
+            full_screen=False,
+            key_bindings=bindings,
+            refresh_interval=StatusBar.INTERVAL,
+            erase_when_done=True,
+            output=self.status_bar.output,
+        )
+        self._runtime_ui_app = app
+        self._runtime_ui_ready.set()
+        if self._runtime_ui_stop.is_set():
+            return
+        try:
+            app.run(handle_sigint=False)
+        except BaseException:
+            return
+        finally:
+            self._runtime_ui_ready.set()
+            if self._runtime_ui_app is app:
+                self._runtime_ui_app = None
+
     def _visible_choices(self, choices: tuple[str, ...], labels: dict[str, str], disabled: set[str], query: str) -> tuple[str, ...]:
         if not query:
             return choices
@@ -6959,12 +7414,6 @@ def flush() -> None:
         flush()
         return tuple(visible)
 
-    def _choice_enabled(self, choices: tuple[str, ...], disabled: set[str]) -> tuple[str, ...]:
-        return tuple(choice for choice in choices if choice not in disabled)
-
-    def _choice_initial_index(self, enabled_choices: tuple[str, ...], current: str) -> int:
-        return enabled_choices.index(current) if current in enabled_choices else 0
-
     def _run_choice_application(
         self,
         title: str,
@@ -6976,7 +7425,7 @@ def _run_choice_application(
         state: dict[str, str | int | bool] = {"query": "", "selected": 0, "searching": False}
 
         def enabled() -> tuple[str, ...]:
-            return self._choice_enabled(self._visible_choices(choices, labels, disabled, str(state["query"])), disabled)
+            return tuple(choice for choice in self._visible_choices(choices, labels, disabled, str(state["query"])) if choice not in disabled)
 
         def clamp_selection() -> None:
             options = enabled()
@@ -6988,7 +7437,7 @@ def clamp_selection() -> None:
         def choice_fragments():
             query = str(state["query"])
             visible = self._visible_choices(choices, labels, disabled, query)
-            options = self._choice_enabled(visible, disabled)
+            options = tuple(choice for choice in visible if choice not in disabled)
             clamp_selection()
             suffix = (" /" + query) if query else ""
             if query and not state["searching"]:
@@ -7019,29 +7468,27 @@ def choice_fragments():
         bindings = KeyBindings()
         searching = Condition(lambda: bool(state["searching"]))
 
+        def move(event, delta: int) -> None:
+            options = enabled()
+            if options:
+                state["selected"] = min(max(int(state["selected"]) + delta, 0), len(options) - 1)
+            event.app.invalidate()
+
         @bindings.add("up", eager=True)
         def _up(event):
-            state["selected"] = max(0, int(state["selected"]) - 1)
-            event.app.invalidate()
+            move(event, -1)
 
         @bindings.add("k", filter=~searching, eager=True)
         def _k(event):
-            state["selected"] = max(0, int(state["selected"]) - 1)
-            event.app.invalidate()
+            move(event, -1)
 
         @bindings.add("down", eager=True)
         def _down(event):
-            options = enabled()
-            if options:
-                state["selected"] = min(len(options) - 1, int(state["selected"]) + 1)
-            event.app.invalidate()
+            move(event, 1)
 
         @bindings.add("j", filter=~searching, eager=True)
         def _j(event):
-            options = enabled()
-            if options:
-                state["selected"] = min(len(options) - 1, int(state["selected"]) + 1)
-            event.app.invalidate()
+            move(event, 1)
 
         @bindings.add("/", eager=True)
         def _search(event):
@@ -7072,6 +7519,10 @@ def _cancel_search(event):
 
         @bindings.add("enter", eager=True)
         def _accept(event):
+            if state["searching"]:
+                state["searching"] = False
+                event.app.invalidate()
+                return
             options = enabled()
             if options:
                 event.app.exit(result=options[int(state["selected"])])
@@ -7104,7 +7555,7 @@ def _type(event):
             event.app.invalidate()
 
         options = enabled()
-        state["selected"] = self._choice_initial_index(options, current) if options else 0
+        state["selected"] = options.index(current) if current in options else 0
         content = FormattedTextControl(choice_fragments, focusable=True)
         choice_window = Window(content, dont_extend_height=True)
         app = Application(
@@ -7113,7 +7564,7 @@ def _type(event):
                     [
                         choice_window,
                         Window(
-                            FormattedTextControl(lambda: self._choice_bottom_toolbar(), style="class:bottom-toolbar.text"),
+                            FormattedTextControl(self._status_bar_fragments, style="class:bottom-toolbar.text"),
                             style="class:bottom-toolbar",
                             dont_extend_height=True,
                             height=Dimension(min=1),
@@ -7203,13 +7654,13 @@ def _select_provider(self, providers: tuple[str, ...], current_provider: str) ->
 
     def _select_reasoning(self) -> SelectionResult:
         provider = self.agent.session.config.provider
-        current = provider.reasoning_effort if provider.reasoning else "off"
+        current = provider.reasoning
         labels = {"off": "off - disable reasoning"}
         if current == "off":
             labels["off"] = "off - disable reasoning (current)"
-        elif current in CONFIG_EFFORTS:
+        elif current in REASONING_LEVELS:
             labels[current] = current + " (current)"
-        return self._select_choice("Reasoning effort", ("off", *CONFIG_EFFORTS), labels, current=current)
+        return self._select_choice("Reasoning effort", REASONING_CHOICES, labels, current=current)
 
     def _discard_pending_tty_input(self) -> None:
         if not sys.stdin.isatty():
@@ -7227,12 +7678,9 @@ def _make_prompt_session(self):
         os.makedirs(os.path.dirname(self.history_path), exist_ok=True)
         return PromptSession(
             history=FileHistory(self.history_path),
-            completer=ReferenceFileCompleter(
-                self.agent.session.cwd,
-                CommandCompleter(
-                    lambda: self.agent.session.config.providers,
-                    lambda: self.agent.session.config.provider.available_models,
-                ),
+            completer=CommandCompleter(
+                lambda: self.agent.session.config.providers,
+                lambda: self.agent.session.config.provider.available_models,
             ),
             lexer=CommandLexer(),
             complete_while_typing=True,
@@ -7246,114 +7694,45 @@ def _make_prompt_session(self):
         )
 
     def _run_agent(self, user_input: str) -> None:
+        runtime_ui_running = False
+        tool_runner = getattr(self.agent, "tool_runner", None)
+        old_live_output = getattr(tool_runner, "live_output", None)
         try:
             self.status_bar.reset_timer()
-            self.status_bar.resume()
-            self.agent.run(
-                user_input,
-                confirm=self._confirm_tool_call,
-                on_auto_approve=self._show_auto_tool_call,
-                **self._live_preview_callbacks(),
-                on_message=self._emit,
-            )
+            runtime_ui_running = self._start_runtime_ui()
+            if not runtime_ui_running:
+                self.status_bar.resume()
+            if tool_runner is not None:
+                tool_runner.live_output = self._show_tool_live_output
+            with patch_stdout() if runtime_ui_running else nullcontext():
+                self.agent.run(
+                    user_input,
+                    confirm=self._confirm_tool_call,
+                    on_auto_approve=self._show_auto_tool_call,
+                    on_message=self._emit,
+                    poll_user_input=self._pop_queued_input,
+                )
         except KeyboardInterrupt:
             self.agent.cancel_current_goal()
             self._emit("Cancelled")
+            cleared = self._clear_queued_input()
+            if cleared:
+                self._emit("queued cleared: " + str(cleared))
         except Cancellation as error:
             self.agent.cancel_current_goal()
             self._emit("Cancelled: " + str(error))
         except Exception as error:
             self._emit("Error: " + str(error))
         finally:
-            self._finish_live_tool_output()
-            self.status_bar.pause()
-
-    def _live_preview_callbacks(self) -> dict[str, ToolLiveOutputCallback | ToolLiveDoneCallback]:
-        if not self._live_preview_enabled():
-            return {}
-        return {"on_live_output": self._show_live_tool_output, "on_live_done": self._finish_live_tool_output}
-
-    def _live_preview_enabled(self) -> bool:
-        return self.output_fn is print and sys.stderr.isatty()
-
-    def _show_live_tool_output(self, call: ParsedToolCall, chunk: str) -> None:
-        if not self._live_preview_enabled() or not chunk:
-            return
-        if not self._live_preview_active:
-            self._start_live_tool_output()
-        self._live_preview_text = (self._live_preview_text + chunk)[-self.LIVE_PREVIEW_MAX_CHARS :]
-        self._render_live_tool_output(throttled=True)
-
-    def _start_live_tool_output(self) -> None:
-        self._live_preview_active = True
-        self._live_preview_text = ""
-        self._live_preview_rendered_lines = 0
-        self._live_preview_last_render = 0.0
-        self._live_preview_started_at = time.monotonic()
-        self._live_preview_hint_shown = False
-        self._live_preview_resume_status = self.status_bar.is_running()
-        if self._live_preview_resume_status:
+            if tool_runner is not None:
+                tool_runner.live_output = old_live_output
+            self._clear_tool_live_preview()
+            self.agent.session.state.manual_model_retry_requested = False
+            if runtime_ui_running:
+                self._stop_runtime_ui()
+            _code_index_update_pending(self.agent.session)
             self.status_bar.pause()
 
-    def _finish_live_tool_output(self, call: ParsedToolCall | None = None) -> None:
-        if not self._live_preview_active:
-            return
-        self._render_live_tool_output(throttled=False)
-        # Keep the final live preview in terminal history instead of treating it
-        # as an active redraw region.
-        self._live_preview_rendered_lines = 0
-        self._live_preview_active = False
-        self._live_preview_text = ""
-        self._live_preview_started_at = 0.0
-        self._live_preview_hint_shown = False
-        if self._live_preview_resume_status:
-            self._live_preview_resume_status = False
-            self.status_bar.resume()
-
-    def _render_live_tool_output(self, *, throttled: bool) -> None:
-        lines = self._live_preview_lines()
-        if not any(line.strip() for line in lines):
-            return
-        now = time.monotonic()
-        if throttled and now - self._live_preview_last_render < self.LIVE_PREVIEW_REFRESH_INTERVAL:
-            return
-        self._live_preview_last_render = now
-        self._clear_live_tool_output()
-        segments: list[tuple[str, str]] = []
-        hint_visible = self._live_preview_interrupt_hint(now)
-        if hint_visible:
-            segments.append(("ansibrightblack", "  Ctrl-C interrupts current Bash; press again after it stops to cancel the session.\n"))
-        for line in lines:
-            segments.extend([("ansibrightblack", "  "), ("ansibrightblack", line + "\n")])
-        print_formatted_text(FormattedText(segments), output=self.status_bar.output, end="", flush=True)
-        self._live_preview_rendered_lines = len(lines) + (1 if hint_visible else 0)
-
-    def _live_preview_interrupt_hint(self, now: float) -> bool:
-        if self._live_preview_hint_shown:
-            return True
-        if self._live_preview_started_at <= 0:
-            return False
-        if now - self._live_preview_started_at < self.LIVE_PREVIEW_INTERRUPT_HINT_AFTER:
-            return False
-        self._live_preview_hint_shown = True
-        return True
-
-    def _clear_live_tool_output(self) -> None:
-        if self._live_preview_rendered_lines <= 0:
-            return
-        self.status_bar.output.cursor_up(self._live_preview_rendered_lines)
-        self.status_bar.output.erase_down()
-        self.status_bar.output.flush()
-        self._live_preview_rendered_lines = 0
-
-    def _live_preview_lines(self) -> list[str]:
-        text = self._live_preview_text.replace("\r", "\n")
-        text = re.sub(r"\x1b\[[0-?]*[ -/]*[@-~]", "", text)
-        text = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", text)
-        lines = [line for line in text.splitlines() if line.strip()][-self.LIVE_PREVIEW_MAX_LINES :]
-        width = max(20, shutil.get_terminal_size((120, 20)).columns - 6)
-        return [_shorten(line, width) for line in lines]
-
     def _run_with_status(self, action: StatusAction) -> str:
         self.status_bar.reset_timer()
         self.status_bar.resume()
@@ -7364,13 +7743,61 @@ def _run_with_status(self, action: StatusAction) -> str:
 
     def _confirm_tool_call(self, call: ParsedToolCall, tool: Tool) -> ConfirmationResult:
         def action() -> ConfirmationResult:
+            self._clear_tool_live_preview()
             self._print_tool_call_display("Confirm Tool Call", "manual approval required", call, tool, title_style="bold ansiyellow")
             return self._wait_confirm("Proceed?", default=True)
 
-        return self._with_status_paused(action)
+        return self._with_runtime_ui_paused(lambda: self._with_status_paused(action))
 
     def _show_auto_tool_call(self, call: ParsedToolCall, tool: Tool) -> None:
-        self._with_status_paused(lambda: self._print_tool_call_display("Auto Tool Call", "auto approved", call, tool, title_style="bold ansiblue"))
+        def action() -> None:
+            self._clear_tool_live_preview()
+            self._print_tool_call_display("Auto Tool Call", "auto approved", call, tool, title_style="bold ansiblue")
+
+        self._with_runtime_ui_paused(lambda: self._with_status_paused(action))
+
+    def _show_tool_live_output(self, _stream: str, text: str) -> None:
+        if self.output_fn is not print:
+            return
+        if not text:
+            self._finish_tool_live_preview()
+            return
+        app = self._runtime_ui_app
+        if app is None:
+            print_formatted_text(FormattedText([("ansibrightblack", text)]), end="", flush=True)
+            return
+        with self._tool_live_preview_lock:
+            self._tool_live_preview_text = (self._tool_live_preview_text + text)[-self.BASH_LIVE_PREVIEW_CHARS :]
+        app.invalidate()
+
+    def _finish_tool_live_preview(self) -> None:
+        frame = self._tool_live_preview_frame()
+        app = self._runtime_ui_app
+        self._clear_tool_live_preview()
+        if app is not None and frame:
+            print_formatted_text(FormattedText([("ansibrightblack", frame + "\n")]), end="", flush=True)
+
+    def _clear_tool_live_preview(self) -> None:
+        with self._tool_live_preview_lock:
+            self._tool_live_preview_text = ""
+        app = self._runtime_ui_app
+        if app is not None:
+            app.invalidate()
+
+    def _has_tool_live_preview(self) -> bool:
+        with self._tool_live_preview_lock:
+            return bool(self._tool_live_preview_text)
+
+    def _tool_live_preview_fragments(self):
+        frame = self._tool_live_preview_frame()
+        return [("class:bash-preview", frame)] if frame else [("", "")]
+
+    def _tool_live_preview_frame(self) -> str:
+        with self._tool_live_preview_lock:
+            text = self._tool_live_preview_text
+        if not text:
+            return ""
+        return "\n".join(text.splitlines()[-self.BASH_LIVE_PREVIEW_LINES :])
 
     def _with_status_paused(self, action: Callable[[], JsonValue]) -> JsonValue:
         was_running = self.status_bar.is_running()
@@ -7406,7 +7833,7 @@ def _print_tool_call_display(
                 [("ansibrightblack", "  Why     "), ("ansimagenta", call.intention + "\n")],
                 "  Why     " + call.intention,
             )
-        if tool.effect() == ToolEffect.EDIT:
+        if tool.EFFECT == ToolEffect.EDIT:
             preview = tool.preview()
             if preview:
                 self._emit_segments(self._preview_segments(preview), "  Preview\n" + preview)
@@ -7415,14 +7842,33 @@ def _emit(self, message: str) -> None:
         self._with_status_paused(lambda: self._print_message(message))
 
     def _print_welcome(self) -> None:
-        self._emit_segments([("bold ansicyan", "nanocode"), ("ansiwhite", " - AI coding assistant\n")], "nanocode - AI coding assistant")
-        self._emit_segments(
-            [("ansibrightblack", "  "), ("ansicyan", "/help [question]"), ("ansiwhite", " for help or source-aware questions\n")],
-            "  /help [question] for help or source-aware questions",
+        index_status, _index_message = _code_index_status(self.agent.session)
+        index_tip = (
+            [("ansibrightblack", "  tip: "), ("ansicyan", "/index"), ("ansiwhite", " initializes indexed code tools\n")] if index_status == "missing" else []
         )
+        plain_tip = "  tip: /index initializes indexed code tools\n" if index_status == "missing" else ""
         self._emit_segments(
-            [("ansibrightblack", "  "), ("ansicyan", "/status"), ("ansiwhite", " for current session state\n")],
-            "  /status for current session state",
+            [("bold ansicyan", "nanocode"), ("ansiwhite", " - AI coding assistant\n")]
+            + [
+                ("ansibrightblack", "  "),
+                ("ansicyan", "/help [question]"),
+                ("ansiwhite", " for help or source-aware questions\n"),
+                ("ansibrightblack", "  "),
+                ("ansicyan", "/status"),
+                ("ansiwhite", " for current session state;\n"),
+                ("ansibrightblack", "  "),
+                ("ansiwhite", "during work: enter queues, "),
+                ("ansicyan", "c-c"),
+                ("ansiwhite", " cancels, "),
+                ("ansicyan", "c-d"),
+                ("ansiwhite", " exits\n\n"),
+            ]
+            + index_tip,
+            "nanocode - AI coding assistant\n"
+            "  /help [question] for help or source-aware questions\n"
+            "  /status for current session state;\n"
+            "  during work: enter queues, c-c cancels, c-d exits\n" + plain_tip,
+            end="",
         )
 
     def _wait_confirm(self, prompt: str, *, default: bool) -> ConfirmationResult:
@@ -7444,10 +7890,25 @@ def _wait_confirm(self, prompt: str, *, default: bool) -> ConfirmationResult:
             return raw_answer
 
     def _print_message(self, message: str) -> None:
-        if message.startswith("State Updated"):
-            self._emit_segments(self._state_segments(message), message)
-            return
-        if message.startswith(("Plan Updated", "Known Updated", "Hypotheses Updated", "Plan + Known Updated", "Plan + Hypotheses Updated", "Hypotheses + Known Updated", "Plan + Hypotheses + Known Updated")):
+        if message.startswith(
+            (
+                "Plan Updated",
+                "Facts Updated",
+                "Leads Updated",
+                "Checks Updated",
+                "Plan + Facts Updated",
+                "Plan + Leads Updated",
+                "Plan + Checks Updated",
+                "Leads + Facts Updated",
+                "Leads + Checks Updated",
+                "Facts + Checks Updated",
+                "Plan + Leads + Facts Updated",
+                "Plan + Facts + Checks Updated",
+                "Plan + Leads + Checks Updated",
+                "Leads + Facts + Checks Updated",
+                "Plan + Leads + Facts + Checks Updated",
+            )
+        ):
             self._emit_segments(self._compact_state_segments(message), message)
             return
         if message.startswith("Tool Result Context:"):
@@ -7458,12 +7919,17 @@ def _print_message(self, message: str) -> None:
             plain = "  skipped: " + message.removeprefix("Tool Calls Skipped:").strip()
             self._emit_segments([("ansibrightblack", plain + "\n")], plain)
             return
-        if self._is_tool_report(message):
-            self._emit_segments(self._indent_segments(self._tool_segments(message), "  "), self._tool_plain(message, indent="  "), end="")
+        lines = message.splitlines()
+        if lines and (lines[0].startswith("  ...") or self._is_tool_call_line(lines[0])):
+            plain = "\n".join("  " + line.replace("[success] ", "").replace("[failure] ", "") for line in lines)
+            self._emit_segments(self._indent_segments(self._tool_segments(message), "  "), plain, end="")
             return
         if message.startswith("Retrying:"):
             self._emit_segments([("ansibrightblack", message + "\n")], message)
             return
+        if message.startswith("sent:"):
+            self._emit_segments([("#67e8f9", message + "\n")], message)
+            return
         if message.startswith("Error:"):
             self._emit_segments([("bold ansired", message + "\n")], message)
             return
@@ -7476,16 +7942,6 @@ def _print_message(self, message: str) -> None:
             return
         self._emit_segments([("ansicyan", message + "\n")], message)
 
-    def _tool_plain(self, message: str, *, indent: str) -> str:
-        return "\n".join(indent + line.replace("[success] ", "").replace("[failure] ", "") for line in message.splitlines())
-
-    def _is_tool_report(self, message: str) -> bool:
-        lines = message.splitlines()
-        if not lines:
-            return False
-        first = lines[0]
-        return first.startswith("  ...") or self._is_tool_call_line(first)
-
     def _is_tool_call_line(self, line: str) -> bool:
         return line.startswith("[success] ") or line.startswith("[failure] ")
 
@@ -7498,23 +7954,21 @@ def _emit_segments(self, segments: list[tuple[str, str]], plain: str, *, end: st
     def _preview_segments(self, preview: str) -> list[tuple[str, str]]:
         segments: list[tuple[str, str]] = [("ansibrightblack", "  Preview\n")]
         content_indent = "  "
-        diff_start = self._unified_diff_start(preview)
+        preview_lines = preview.splitlines()
+        diff_start = -1
+        for index, line in enumerate(preview_lines):
+            body = "\n".join(preview_lines[index:])
+            if line.startswith("--- ") and "\n+++ " in body and "\n@@ " in body:
+                diff_start = index
+                break
         if diff_start >= 0:
-            prefix = "\n".join(preview.splitlines()[:diff_start])
-            diff = "\n".join(preview.splitlines()[diff_start:])
+            prefix = "\n".join(preview_lines[:diff_start])
+            diff = "\n".join(preview_lines[diff_start:])
             if prefix:
                 segments += self._indented_text_segments(prefix, indent=content_indent, style="ansiyellow")
             return segments + self._indent_segments(self._diff_segments(diff), content_indent)
         return segments + self._indented_text_segments(preview, indent=content_indent, style="ansicyan")
 
-    def _unified_diff_start(self, text: str) -> int:
-        lines = text.splitlines()
-        for index, line in enumerate(lines):
-            body = "\n".join(lines[index:])
-            if line.startswith("--- ") and "\n+++ " in body and "\n@@ " in body:
-                return index
-        return -1
-
     def _diff_segments(self, text: str) -> list[tuple[str, str]]:
         segments: list[tuple[str, str]] = []
         lines = text.splitlines()
@@ -7586,39 +8040,12 @@ def _indent_segments(self, segments: list[tuple[str, str]], indent: str) -> list
                 at_line_start = part.endswith("\n")
         return indented
 
-    def _state_segments(self, message: str) -> list[tuple[str, str]]:
-        lines = message.splitlines()
-        segments: list[tuple[str, str]] = [("ansibrightblack", "-" * 48 + "\n")]
-        for index, line in enumerate(lines):
-            if index == 0:
-                title, _, badge = line.partition("|")
-                badge = badge.strip()
-                segments.extend([("bold ansicyan", title.strip()), ("ansibrightblack", " | "), (self._verify_style(badge), badge), ("", "\n")])
-            elif line.startswith("  Goal"):
-                segments.extend([("ansibrightblack", line[:10]), ("bold ansigreen", line[10:] + "\n")])
-            elif line.startswith("  Plan"):
-                segments.extend([("ansibrightblack", "  "), ("bold ansicyan", line.strip()), ("", "\n")])
-            elif line.startswith("  Hypotheses"):
-                segments.extend([("ansibrightblack", "  "), ("bold ansimagenta", line.strip()), ("", "\n")])
-            elif line.startswith("  Known"):
-                segments.extend([("ansibrightblack", "  "), ("bold ansiyellow", line.strip()), ("", "\n")])
-            elif line.startswith("  Verify"):
-                status = line[10:].strip().split(" ", 1)[0]
-                segments.extend([("ansibrightblack", line[:10]), (self._verify_style("VERIFY:" + status), line[10:] + "\n")])
-            elif line.startswith("    ..."):
-                segments.extend([("ansibrightblack", line + "\n")])
-            elif line.startswith("    "):
-                segments.extend([("ansibrightblack", "    "), ("ansiwhite", line[4:] + "\n")])
-            else:
-                segments.extend([("ansiwhite", line + "\n")])
-        return segments
-
     def _compact_state_segments(self, message: str) -> list[tuple[str, str]]:
         segments: list[tuple[str, str]] = []
         for line in message.splitlines():
             if line.endswith("Updated"):
                 segments.append(("bold ansicyan", line + "\n"))
-            elif line in {"Plan", "Hypotheses", "Known"}:
+            elif line in {"Plan", "Leads", "Facts", "Checks"}:
                 segments.append(("ansicyan", line + "\n"))
             elif line.startswith("  ..."):
                 segments.append(("ansibrightblack", line + "\n"))
@@ -7651,15 +8078,6 @@ def _tool_call_segments(self, tail: str, status_style: str) -> list[tuple[str, s
         segments.append(("", "\n"))
         return segments
 
-    def _verify_style(self, badge: str) -> str:
-        if "required" in badge:
-            return "bold ansimagenta"
-        if "done" in badge:
-            return "bold ansigreen"
-        if "failed" in badge or "blocked" in badge:
-            return "bold ansired"
-        return "ansibrightblack"
-
 
 ############################
 # Helpers
@@ -7707,21 +8125,6 @@ def _json_str(value: JsonValue) -> str | None:
     return str(value)
 
 
-def _memory_fact_from_json(value: JsonValue) -> str | None:
-    item = _json_dict(value)
-    if item:
-        fact = (_json_str(item.get("text")) or _json_str(item.get("fact")) or "").strip()
-    else:
-        fact = (_json_str(value) or "").strip()
-    if not fact:
-        return None
-    if fact.startswith("<") and fact.endswith(">"):
-        inner = fact[1:-1].strip().lower()
-        if inner and any(word in inner for word in ("fact", "target", "arg", "path", "criterion", "result", "context", "message", "goal")):
-            return None
-    return fact
-
-
 def _source_from_json(item: Json) -> tuple[str, ...]:
     source_values = _json_list(item.get("source")) or _json_list(item.get("sources"))
     source = [(_json_str(raw) or "").strip() for raw in source_values]
@@ -7770,9 +8173,15 @@ def get_completions(self, document, complete_event):
                 if model.startswith(text):
                     yield Completion(model, start_position=-len(text))
             return
-        if text.startswith("/plan "):
-            text = text[len("/plan ") :]
-            for value in ("on", "off"):
+        if text.startswith("/api "):
+            text = text[len("/api ") :]
+            for value in ("auto", "chat", "responses"):
+                if value.startswith(text):
+                    yield Completion(value, start_position=-len(text))
+            return
+        if text.startswith("/reason-payload "):
+            text = text[len("/reason-payload ") :]
+            for value in CHAT_REASONING_CHOICES:
                 if value.startswith(text):
                     yield Completion(value, start_position=-len(text))
             return
@@ -7798,34 +8207,6 @@ def get_line(lineno: int):
         return get_line
 
 
-class ReferenceFileCompleter(Completer):
-    def __init__(self, cwd: str, command_completer: Completer):
-        self.cwd = cwd
-        self.command_completer = command_completer
-
-    def get_completions(self, document, complete_event):
-        match = re.search(r"(?:^|\s)@([^\s]*)$", document.text_before_cursor)
-        if match is None:
-            yield from self.command_completer.get_completions(document, complete_event)
-            return
-
-        partial = match.group(1)
-        dirname, prefix = os.path.split(partial)
-        base_dir = os.path.abspath(os.path.join(self.cwd, dirname))
-        try:
-            names = sorted(os.listdir(base_dir))
-        except OSError:
-            return
-
-        for name in names:
-            if not name.startswith(prefix):
-                continue
-            full_path = os.path.join(base_dir, name)
-            suffix = "/" if os.path.isdir(full_path) else ""
-            candidate = os.path.join(dirname, name) + suffix if dirname else name + suffix
-            yield Completion(candidate, start_position=-len(partial), display="@" + candidate)
-
-
 ############################
 # Entrypoint
 ############################
@@ -7836,7 +8217,6 @@ def main(argv: list[str] | None = None) -> int:
         parser = argparse.ArgumentParser(description="nanocode: AI coding assistant")
         parser.add_argument("-v", "--version", action="version", version=__version__)
         parser.add_argument("--yolo", action="store_true", help="Skip tool execution confirmations")
-        parser.add_argument("--plan", action="store_true", help="Plan changes without editing or running commands")
         parser.add_argument("--debug", action="store_true", help="Write request prompts to the current session debug directory")
         parser.add_argument("--config", default=None, help="Path to config file (default: ~/.nanocode/config.toml)")
         parser.add_argument("--init-config", action="store_true", help="Create a default config file at --config or ~/.nanocode/config.toml")
@@ -7845,13 +8225,15 @@ def main(argv: list[str] | None = None) -> int:
             config_path, created = ConfigFile.init(args.config)
             print(("Created config: " if created else "Config already exists: ") + config_path)
             return 0
-        session = Session.from_config_file(path=args.config, yolo=args.yolo, plan_mode=args.plan, debug=args.debug)
+        session = Session.from_config_file(path=args.config, yolo=args.yolo, debug=args.debug)
         missing = session.missing_required_config()
         if missing:
             print("Missing config: " + ", ".join(missing), file=sys.stderr)
             print("Edit " + (os.path.expanduser(args.config) if args.config else ConfigFile.path()) + " or run `nanocode --init-config`.", file=sys.stderr)
             return 2
-        return AgentLoop(Agent(session)).run()
+        exit_code = AgentLoop(Agent(session)).run()
+        print("session: " + session.session_id, file=sys.stderr)
+        return exit_code
     except ConfigError as error:
         print("Error: " + str(error), file=sys.stderr)
         return 2
diff --git a/pyproject.toml b/pyproject.toml
index 39f082e..13a7483 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "nanocode-cli"
-version = "0.3.35"
+version = "0.4.9"
 description = "A lightweight terminal-based AI coding assistant"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -27,8 +27,10 @@ classifiers = [
     "Topic :: Terminals",
 ]
 dependencies = [
-    "json-repair>=0.39",
+    "code-symbol-index>=0.1.13",
+    "openai>=2.37.0",
     "prompt-toolkit>=3.0",
+    "socksio>=1.0.0",
 ]
 
 [project.urls]
diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py
index 78bfdd5..5195355 100644
--- a/tests/test_nanocode_agent.py
+++ b/tests/test_nanocode_agent.py
@@ -1,8 +1,10 @@
-import json
 import os
+import re
+from dataclasses import replace
+from types import SimpleNamespace
 
 import nanocode
-from nanocode import Agent, LLMError, ParsedToolCall, Session, VerificationStatus
+from nanocode import Agent, LLMError, ParsedToolCall, Session, CheckStatus
 
 
 def _verify_passed_action():
@@ -25,6 +27,20 @@ def _blocks_text(blocks):
     return "\n".join(blocks)
 
 
+def _observe_tool_result_context(agent):
+    return "\n\n".join(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter))
+
+
+def _set_context_budget(monkeypatch, agent, **overrides):
+    agent.session.settings.context_budget = "medium"
+    monkeypatch.setitem(nanocode.CONTEXT_BUDGETS, "medium", replace(nanocode.CONTEXT_BUDGETS["medium"], **overrides))
+
+
+def _read_anchors(session: Session, filepath: str) -> list[str]:
+    result = nanocode.ReadTool.make(session, [filepath]).call()
+    return re.findall(r"^(\d+:[0-9a-f]{6})\|", result, re.MULTILINE)
+
+
 def _session(
     tmp_path,
     *,
@@ -35,13 +51,18 @@ def _session(
     timeout: int | None = None,
     first_token_timeout: int | None = None,
     temperature: float | None = None,
-    reasoning_effort: str = "",
-    reasoning_payload: str = "",
+    reasoning: str = "",
+    chat_reasoning: str = "",
     yolo: bool = False,
-    plan_mode: bool = False,
     debug: bool = False,
+    api: str = "",
+    prompt_cache_key: str = "",
 ) -> Session:
     provider: dict[str, object] = {"url": api_url, "key": api_key, "model": model}
+    if api:
+        provider["api"] = api
+    if prompt_cache_key:
+        provider["prompt_cache_key"] = prompt_cache_key
     if stream is not None:
         provider["stream"] = stream
     if timeout is not None:
@@ -50,18 +71,81 @@ def _session(
         provider["first_token_timeout"] = first_token_timeout
     if temperature is not None:
         provider["temperature"] = temperature
-    if reasoning_effort:
-        provider["reasoning_effort"] = reasoning_effort
-    if reasoning_payload:
-        provider["reasoning_payload"] = reasoning_payload
+    if reasoning:
+        provider["reasoning"] = reasoning
+    if chat_reasoning:
+        provider["chat_reasoning"] = chat_reasoning
     data = {"provider": {"active": "default", "default": provider}, "paths": {"data_dir": str(tmp_path / ".nanocode")}}
     return Session(
         cwd=str(tmp_path),
         config=nanocode.Config.from_dict(data),
-        settings=nanocode.RuntimeSettings.from_dict(data, yolo=yolo, plan_mode=plan_mode, debug=debug),
+        settings=nanocode.RuntimeSettings.from_dict(data, yolo=yolo, debug=debug),
     )
 
 
+def _chat_response(content: str = "ok", usage: dict | None = None) -> dict:
+    return {"choices": [{"message": {"content": content}}], "usage": usage or {}}
+
+
+def _stream_chunk(delta: dict | None = None, usage: dict | None = None, choices: bool = True) -> dict:
+    return {"choices": [{"delta": delta or {}}] if choices else [], "usage": usage}
+
+
+def _responses_response(content: str = "ok", usage: dict | None = None) -> dict:
+    return {"output": [{"type": "message", "content": [{"type": "output_text", "text": content}]}], "usage": usage or {}}
+
+
+def _responses_text_delta(text: str) -> dict:
+    return {"type": "response.output_text.delta", "delta": text}
+
+
+def _responses_reasoning_delta(text: str) -> dict:
+    return {"type": "response.reasoning.delta", "delta": text}
+
+
+def _responses_completed(usage: dict | None = None) -> dict:
+    return {"type": "response.completed", "response": {"usage": usage or {}}}
+
+
+def _sdk_payload(call: dict) -> dict:
+    payload = dict(call)
+    payload.update(payload.pop("extra_body", {}) or {})
+    payload.pop("timeout", None)
+    return payload
+
+
+def _patch_openai(monkeypatch, responses):
+    calls = []
+    response_calls = []
+    client_kwargs = []
+    queue = list(responses) if isinstance(responses, tuple) else [responses]
+
+    class FakeCompletions:
+        def create(self, **kwargs):
+            calls.append(kwargs)
+            response = responses() if callable(responses) else queue.pop(0)
+            if isinstance(response, Exception):
+                raise response
+            return response
+
+    class FakeResponses:
+        def create(self, **kwargs):
+            response_calls.append(kwargs)
+            response = responses() if callable(responses) else queue.pop(0)
+            if isinstance(response, Exception):
+                raise response
+            return response
+
+    class FakeOpenAI:
+        def __init__(self, **kwargs):
+            client_kwargs.append(kwargs)
+            self.chat = type("FakeChat", (), {"completions": FakeCompletions()})()
+            self.responses = FakeResponses()
+
+    monkeypatch.setattr(nanocode, "OpenAI", FakeOpenAI)
+    return calls, response_calls, client_kwargs
+
+
 def test_agent_tool_results_go_to_latest_tool_results_and_store(tmp_path):
     path = tmp_path / "sample.txt"
     path.write_text("alpha\n", encoding="utf-8")
@@ -73,13 +157,13 @@ def test_agent_tool_results_go_to_latest_tool_results_and_store(tmp_path):
             {
                 "name": "Read",
                 "intention": "read sample",
-                "args": ["sample.txt", "0", "1"],
+                "args": ["sample.txt", "0,1"],
             }
         ]
     )
 
     assert "alpha" in latest
-    assert '- ok tool=Read args=["sample.txt","0","1"] key=tr.1' in latest
+    assert '- ok tool=Read args=["sample.txt","0,1"] key=tr.1' in latest
     assert "why: read sample" in latest
     assert "output:\n<ReadToolResult>" in latest
     assert session.state.tool_result_store["tr.1"].value.startswith("<ReadToolResult>")
@@ -93,27 +177,6 @@ def test_agent_tool_results_go_to_latest_tool_results_and_store(tmp_path):
     assert os.path.isdir(session.tool_results_dir())
 
 
-def test_agent_accepts_lowercase_tool_name_without_prompting_it(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    agent = Agent(session)
-
-    latest = agent.execute_tool_calls(
-        [
-            {
-                "name": "read",
-                "intention": "read sample",
-                "args": ["sample.txt", "0", "1"],
-            }
-        ]
-    )
-
-    assert "alpha" in latest
-    assert '- ok tool=Read args=["sample.txt","0","1"] key=tr.1' in latest
-    assert agent.tool_runner.latest_executions[0].call.name == "Read"
-
-
 def test_agent_dedupes_same_batch_readonly_tool_calls_keeping_latest(tmp_path):
     path = tmp_path / "sample.txt"
     path.write_text("alpha\n", encoding="utf-8")
@@ -134,6 +197,22 @@ def test_agent_dedupes_same_batch_readonly_tool_calls_keeping_latest(tmp_path):
     assert "first read" not in latest
 
 
+def test_agent_can_append_streamed_tool_calls_to_latest_batch(tmp_path):
+    (tmp_path / "one.txt").write_text("one\n", encoding="utf-8")
+    (tmp_path / "two.txt").write_text("two\n", encoding="utf-8")
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}], append_to_latest=True)
+
+    latest = _blocks_text(agent.tool_context.latest)
+    assert "one" in latest
+    assert "two" in latest
+    assert 'tool=Read args=["one.txt","0,1"]' in latest
+    assert 'tool=Read args=["two.txt","0,1"]' in latest
+    assert agent.tool_context.recent == []
+
+
 def test_agent_does_not_dedupe_nonconsecutive_same_batch_readonly_tool_calls(tmp_path):
     path = tmp_path / "sample.txt"
     path.write_text("alpha\nbeta\n", encoding="utf-8")
@@ -175,11 +254,12 @@ def test_agent_does_not_dedupe_same_batch_edit_tool_calls(tmp_path):
     path.write_text("old\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
+    anchor = _read_anchors(session, "sample.txt")[0]
 
     agent.execute_tool_calls(
         [
-            {"name": "Edit", "intention": "first edit", "args": ["sample.txt", "old", "new"]},
-            {"name": "Edit", "intention": "second edit", "args": ["sample.txt", "old", "new"]},
+            {"name": "Edit", "intention": "first edit", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]},
+            {"name": "Edit", "intention": "second edit", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]},
         ],
         confirm=lambda call, tool: True,
     )
@@ -196,7 +276,7 @@ def test_agent_tool_results_are_bounded_and_logged(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
-    latest = agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": ["sample.txt", "0", "1"]}])
+    latest = agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": ["sample.txt", "0,1"]}])
 
     item = session.state.tool_result_store["tr.1"]
     assert item.excerpted is True
@@ -212,16 +292,28 @@ def test_agent_tool_results_are_bounded_and_logged(tmp_path):
     assert (tmp_path / item.log_path).read_text(encoding="utf-8").startswith("<ReadToolResult>")
 
 
-def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path):
+def test_search_tool_result_uses_larger_output_budget(tmp_path):
+    sample = tmp_path / "sample.txt"
+    sample.write_text("".join(f"needle {'x' * 180} {index}\n" for index in range(200)), encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+
+    agent.execute_tool_calls([{"name": "Search", "intention": "search large result", "args": ["needle", "sample.txt", "context=0"]}])
+
+    item = session.state.tool_result_store["tr.1"]
+    assert item.excerpted is False
+    assert nanocode.MAX_TOOL_OUTPUT_CHARS < len(item.value) <= nanocode.SearchTool.OUTPUT_CHARS
+
+
+def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path, monkeypatch):
     for name in ["one.txt", "two.txt", "three.txt", "four.txt"]:
         (tmp_path / name).write_text(name + "\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
-    agent.TOOL_RESULT_INDEX_ITEMS = 2
-    agent.OBSERVE_AFTER_PENDING_RESULT_COUNT = 4
+    _set_context_budget(monkeypatch, agent, index_items=2, observe_after_results=4)
 
     for name in ["one.txt", "two.txt", "three.txt", "four.txt"]:
-        agent.execute_tool_calls([{"name": "Read", "intention": "read " + name, "args": [name, "0", "1"]}])
+        agent.execute_tool_calls([{"name": "Read", "intention": "read " + name, "args": [name, "0,1"]}])
 
     latest = _blocks_text(agent.tool_context.latest)
     recent = _blocks_text(agent.tool_context.recent)
@@ -234,7 +326,7 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path):
     assert "<ReadToolResult>" in recent
     assert len(agent.tool_context.recent) == 3
     assert agent.mode == nanocode.AgentMode.OBSERVE
-    context = agent._format_observe_tool_result_context()
+    context = _observe_tool_result_context(agent)
     assert "one.txt" in context
     assert "two.txt" in context
     assert "three.txt" in context
@@ -243,17 +335,16 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path):
     assert len(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) == 4
 
 
-def test_agent_observes_full_latest_result_when_it_becomes_recent(tmp_path):
+def test_agent_observes_full_latest_result_when_it_becomes_recent(tmp_path, monkeypatch):
     (tmp_path / "one.txt").write_text("one\n", encoding="utf-8")
     (tmp_path / "two.txt").write_text("two\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.TOOL_RESULT_RAW_CHARS = 10_000
-    agent.OBSERVE_AFTER_PENDING_RESULT_COUNT = 2
+    _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2)
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0", "1"]}])
-    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}])
 
-    context = agent._format_observe_tool_result_context()
+    context = _observe_tool_result_context(agent)
     assert agent.mode == nanocode.AgentMode.OBSERVE
     assert "one.txt" in context
     assert "<ReadToolResult>" in context
@@ -281,14 +372,54 @@ def test_agent_observes_full_latest_result_when_it_becomes_recent(tmp_path):
     assert "recall=tr.2" in _blocks_text(agent.tool_context.latest)
 
 
-def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path):
+def test_referenced_unreduced_results_do_not_count_toward_observe_threshold(tmp_path, monkeypatch):
+    for name in ["one.txt", "two.txt", "three.txt"]:
+        (tmp_path / name).write_text(name + "\n", encoding="utf-8")
+    agent = Agent(Session(cwd=str(tmp_path)))
+    _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2)
+
+    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}])
+    agent.apply_response({"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "one.txt was inspected."}]}]})
+    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}])
+
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert agent.blackboard.memory_checkpoint_tool_result_counter == 0
+    assert len(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) == 2
+    assert [nanocode.ToolResultContext.result_key(block) for block in agent._unreferenced_unreduced_blocks()] == ["tr.2"]
+
+    agent.execute_tool_calls([{"name": "Read", "intention": "read three", "args": ["three.txt", "0,1"]}])
+
+    assert agent.mode == nanocode.AgentMode.OBSERVE
+    observe_prompt = agent.build_observe_prompt()
+    observe_raw = observe_prompt.split("Unreduced Raw Tool Results:\n", 1)[1].split("\n--- Blocking Feedback ---", 1)[0]
+    assert "one.txt" not in observe_raw
+    assert "two.txt" in observe_raw
+    assert "three.txt" in observe_raw
+
+
+def test_unsourced_known_does_not_cover_unreduced_result(tmp_path, monkeypatch):
+    (tmp_path / "one.txt").write_text("one\n", encoding="utf-8")
+    (tmp_path / "two.txt").write_text("two\n", encoding="utf-8")
+    agent = Agent(Session(cwd=str(tmp_path)))
+    _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2)
+
+    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}])
+    agent.apply_response({"actions": [{"type": "known", "items": ["one.txt was inspected."]}]})
+    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}])
+
+    assert agent.mode == nanocode.AgentMode.OBSERVE
+    assert agent.blackboard.memory_checkpoint_tool_result_counter == 0
+    assert [nanocode.ToolResultContext.result_key(block) for block in agent._unreferenced_unreduced_blocks()] == ["tr.1", "tr.2"]
+
+
+def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monkeypatch):
     (tmp_path / "one.txt").write_text("one\n", encoding="utf-8")
     (tmp_path / "two.txt").write_text("two\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.TOOL_RESULT_RAW_CHARS = 10_000
+    _set_context_budget(monkeypatch, agent, raw_chars=10_000)
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0", "1"]}])
-    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}])
 
     assert agent.mode == nanocode.AgentMode.ACT
     assert "key=tr.1" in _blocks_text(agent.tool_context.recent)
@@ -302,31 +433,28 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path):
     assert "output:\n<ReadToolResult>" not in index
 
 
-def test_observe_progress_does_not_checkpoint_tool_results(tmp_path):
+def test_empty_observe_compacts_unreduced_tool_results(tmp_path, monkeypatch):
     (tmp_path / "one.txt").write_text("one\n", encoding="utf-8")
     (tmp_path / "two.txt").write_text("two\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.TOOL_RESULT_RAW_CHARS = 300
-    agent.OBSERVE_AFTER_PENDING_RESULT_COUNT = 2
+    _set_context_budget(monkeypatch, agent, raw_chars=300, observe_after_results=2)
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0", "1"]}])
-    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}])
 
-    agent.handle_response({"actions": [{"type": "progress", "text": "checking result"}]})
+    agent.handle_response({"actions": [], "_assistant_text": "checking result"})
 
-    assert agent.blackboard.memory_checkpoint_tool_result_counter == 0
-    assert agent.mode == nanocode.AgentMode.OBSERVE
-    unreduced = _blocks_text(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter))
-    assert "one.txt" in unreduced
-    assert "two.txt" in unreduced
+    assert agent.blackboard.memory_checkpoint_tool_result_counter == 2
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == []
 
 
-def test_progress_does_not_mark_memory_checkpoint(tmp_path):
+def test_assistant_text_does_not_mark_memory_checkpoint(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
 
-    agent.apply_response({"actions": [{"type": "progress", "text": "reading sample"}]})
+    agent.apply_response({"actions": [], "_assistant_text": "reading sample"})
 
     assert agent.blackboard.memory_checkpoint_tool_result_counter == 0
 
@@ -352,11 +480,10 @@ def test_observe_prompt_uses_narrow_context(tmp_path):
     agent.blackboard.user_input = "fix bug"
     agent.blackboard.goal = "fix bug goal"
     agent.blackboard.plan = [nanocode.PlanItem(id="p1", text="inspect failing path", status=nanocode.PlanStatus.DOING)]
-    agent.blackboard.hypotheses = [nanocode.Hypothesis(id="h1", text="cache branch", status=nanocode.HypothesisStatus.ACTIVE, source=("tr.1",))]
+    agent.blackboard.leads = [nanocode.Lead(id="h1", text="cache branch", status=nanocode.LeadStatus.ACTIVE, source=("tr.1",))]
     agent.blackboard.known = ["known fact"]
-    agent.blackboard.stable_knowledge = {"workflow": ["use pytest"]}
     agent.tool_context.kept_results = ['- ok tool=Read args=["old.py"] key=tr.1\n  output:\nselected result']
-    agent.runtime.recent_edits = ["- sample.py: old edit"]
+    agent.recent_edits = ["- sample.py: old edit"]
     agent.agent_feedback_errors = ["act error"]
     agent.observe_feedback_errors = ["observe error"]
     agent.tool_context.latest = ['- ok tool=Read args=["sample.py"] key=tr.2\n  output:\nraw alpha']
@@ -369,7 +496,6 @@ def test_observe_prompt_uses_narrow_context(tmp_path):
     assert "inspect failing path" in prompt
     assert "cache branch" in prompt
     assert "known fact" in prompt
-    assert "use pytest" in prompt
     assert "selected result" in prompt
     assert "raw alpha" in prompt
     assert "Observe Errors" in prompt
@@ -385,6 +511,178 @@ def test_observe_prompt_uses_narrow_context(tmp_path):
     assert "old edit" not in prompt
 
 
+def test_act_prompt_includes_current_focus_from_doing_plan_item(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.plan = [
+        nanocode.PlanItem(id="p1", text="inspect config", status=nanocode.PlanStatus.DONE),
+        nanocode.PlanItem(id="p2", text="edit command handler", status=nanocode.PlanStatus.DOING, context="next change"),
+        nanocode.PlanItem(id="p3", text="run tests", status=nanocode.PlanStatus.TODO),
+    ]
+
+    prompt = agent.build_user_prompt()
+
+    assert "Current Focus:\n- [◔ doing] edit command handler (id=p2)\n  context: next change" in prompt
+
+
+def test_act_prompt_uses_first_todo_as_current_focus(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.plan = [
+        nanocode.PlanItem(id="p1", text="inspect config", status=nanocode.PlanStatus.DONE),
+        nanocode.PlanItem(id="p2", text="edit command handler", status=nanocode.PlanStatus.TODO),
+        nanocode.PlanItem(id="p3", text="run tests", status=nanocode.PlanStatus.TODO),
+    ]
+
+    prompt = agent.build_user_prompt()
+
+    assert "Current Focus:\n- [○ todo] edit command handler (id=p2)" in prompt
+
+
+def test_inspect_code_tools_is_hidden_until_available(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_available", lambda session: False)
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    tool_names = [schema["function"]["name"] for schema in agent._tool_schemas() if schema.get("type") == "function"]
+
+    assert "InspectCode" not in tool_names
+
+
+def test_inspect_code_tools_is_visible_when_available(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_available", lambda session: True)
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    tool_names = [schema["function"]["name"] for schema in agent._tool_schemas() if schema.get("type") == "function"]
+
+    assert "InspectCode" in tool_names
+
+
+def test_one_shot_bash_does_not_require_goal_or_plan(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.task_code = nanocode.TaskCode.NEW
+
+    result = agent.handle_response(
+        {
+            "actions": [
+                {"type": "tool", "name": "Bash", "intention": "run one-shot check", "args": ["printf ok"]}
+            ]
+        },
+        confirm=lambda call, tool: True,
+    )
+
+    assert result.done is False
+    assert len(agent.tool_runner.latest_executions) == 1
+    assert agent.blackboard.task_code == nanocode.TaskCode.NEW
+    assert "Current Phase:" not in agent.build_user_prompt()
+    assert not any("mutating work before" in error for error in agent.agent_feedback_errors)
+
+
+def test_tracked_task_tool_keeps_working_phase(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.goal = "inspect sample"
+    agent.blackboard.task_code = nanocode.TaskCode.NEW
+
+    result = agent.handle_response(
+        {"actions": [{"type": "tool", "name": "Bash", "intention": "run check", "args": ["printf ok"]}]},
+        confirm=lambda call, tool: True,
+    )
+
+    assert result.done is False
+    assert agent.blackboard.task_code == nanocode.TaskCode.WORKING
+
+
+def test_planless_successful_bash_warns_before_more_tools(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.task_code = nanocode.TaskCode.NEW
+
+    first = agent.handle_response(
+        {"actions": [{"type": "tool", "name": "Bash", "intention": "run check", "args": ["printf ok"]}]},
+        confirm=lambda call, tool: True,
+    )
+    second = agent.handle_response(
+        {"actions": [{"type": "tool", "name": "Bash", "intention": "repeat check", "args": ["printf ok"]}]},
+        confirm=lambda call, tool: True,
+    )
+
+    assert first.done is False
+    assert second.done is False
+    assert agent.session.state.turn_tool_calls == 2
+    assert any("last command result is visible" in error for error in agent.agent_feedback_errors)
+
+
+def test_planless_successful_bash_allows_tracked_task_before_more_tools(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.task_code = nanocode.TaskCode.NEW
+
+    agent.handle_response(
+        {"actions": [{"type": "tool", "name": "Bash", "intention": "run check", "args": ["printf ok"]}]},
+        confirm=lambda call, tool: True,
+    )
+    result = agent.handle_response(
+        {
+            "actions": [
+                {"type": "goal", "text": "run more checks", "complete": False},
+                {"type": "tool", "name": "Bash", "intention": "run another check", "args": ["printf ok"]},
+            ]
+        },
+        confirm=lambda call, tool: True,
+    )
+
+    assert result.done is False
+    assert agent.session.state.turn_tool_calls == 2
+    assert agent.blackboard.goal == "run more checks"
+
+
+def test_edit_tool_without_goal_or_plan_warns(tmp_path):
+    (tmp_path / "sample.txt").write_text("old\n", encoding="utf-8")
+    agent = Agent(Session(cwd=str(tmp_path)))
+    anchor = _read_anchors(agent.session, "sample.txt")[0]
+
+    result = agent.handle_response(
+        {
+            "actions": [
+                {"type": "tool", "name": "Edit", "intention": "edit sample", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]}
+            ]
+        },
+        confirm=lambda call, tool: True,
+    )
+
+    assert result.done is False
+    assert (tmp_path / "sample.txt").read_text(encoding="utf-8") == "new\n"
+    assert any("mutating work before Goal/Plan was set" in error for error in agent.agent_feedback_errors)
+    assert any("mutating work before Plan was set" in error for error in agent.agent_feedback_errors)
+
+
+def test_act_prompt_lists_available_shell_tools_in_environment(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode.shutil, "which", lambda name: "/bin/" + name if name in {"rg", "python3", "jq"} else None)
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    prompt = agent.build_user_prompt()
+
+    assert "- detected-available-shell-commands: rg, python3, jq" in prompt
+    assert "- detected-available-shell-commands: find" not in prompt
+    assert "- shell_tools:" not in prompt
+
+
+def test_act_prompt_lists_indexed_language_breakdown_in_environment(tmp_path, monkeypatch):
+    def status_fn(root, *, db_path=None, check=False, max_pending_files=50, format="object"):
+        return SimpleNamespace(
+            status="ready",
+            reason="",
+            message="",
+            languages=("python", "typescript"),
+            language_breakdown=(
+                {"language": "python", "files": 80, "percent": 62.5},
+                {"language": "typescript", "files": 48, "percent": 37.5},
+            ),
+        )
+
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: SimpleNamespace(status=status_fn))
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    prompt = agent.build_user_prompt()
+
+    assert "- indexed-language-breakdown: python 80 files (62.5%), typescript 48 files (37.5%)" in prompt
+
+
 def test_act_prompt_includes_kept_tool_results(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha unique\n", encoding="utf-8")
     (tmp_path / "other.txt").write_text("beta unique\n", encoding="utf-8")
@@ -392,8 +690,8 @@ def test_act_prompt_includes_kept_tool_results(tmp_path):
 
     agent.execute_tool_calls(
         [
-            {"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]},
-            {"name": "Read", "intention": "read other", "args": ["other.txt", "0", "1"]},
+            {"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]},
+            {"name": "Read", "intention": "read other", "args": ["other.txt", "0,1"]},
         ]
     )
     agent.mode = nanocode.AgentMode.OBSERVE
@@ -417,7 +715,7 @@ def test_kept_tool_results_deduplicate_by_tool_key(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
     agent.mode = nanocode.AgentMode.OBSERVE
     agent.handle_response(
         {
@@ -435,7 +733,7 @@ def test_kept_tool_results_deduplicate_by_tool_key(tmp_path):
 def test_observe_reports_kept_tool_result_keys(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
     agent.mode = nanocode.AgentMode.OBSERVE
     messages = []
 
@@ -466,7 +764,7 @@ def test_forget_removes_kept_tool_result_but_keeps_known_source(tmp_path):
     assert messages == ["Tool Result Context: -tr.1"]
 
 
-def test_hypothesis_action_updates_blackboard_and_report(tmp_path):
+def test_lead_action_updates_blackboard_and_report(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     _seed_plan(agent, "debug branch")
     messages = []
@@ -475,7 +773,7 @@ def test_hypothesis_action_updates_blackboard_and_report(tmp_path):
         {
             "actions": [
                 {
-                    "type": "hypothesis",
+                    "type": "lead",
                     "items": [
                         {
                             "id": "h1",
@@ -492,45 +790,45 @@ def test_hypothesis_action_updates_blackboard_and_report(tmp_path):
     )
 
     assert result.done is False
-    assert agent.blackboard.hypotheses == [
-        nanocode.Hypothesis(
+    assert agent.blackboard.leads == [
+        nanocode.Lead(
             text="admin filtering drops history events",
-            status=nanocode.HypothesisStatus.ACTIVE,
+            status=nanocode.LeadStatus.ACTIVE,
             id="h1",
             source=("tr.1",),
             context="feed search",
         )
     ]
-    assert messages == ["Hypotheses Updated\n  1. [active] h1: admin filtering drops history events [tr.1] context: feed search"]
+    assert messages == ["Leads Updated\n  1. [active] h1: admin filtering drops history events [tr.1] context: feed search"]
 
 
-def test_forget_rejects_active_hypothesis_source(tmp_path):
+def test_forget_rejects_active_lead_source(tmp_path):
     agent = Agent(_session(tmp_path, debug=True))
     _seed_plan(agent, "debug branch")
     agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n  output:\na']
-    agent.blackboard.hypotheses = [nanocode.Hypothesis(text="branch still possible", source=("tr.1",))]
+    agent.blackboard.leads = [nanocode.Lead(text="branch still possible", source=("tr.1",))]
     messages = []
 
     result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "branch ruled out"}]}, on_message=messages.append)
 
     assert result.done is False
     assert "tr.1" in _blocks_text(agent.tool_context.kept_results)
-    assert any("active hypothesis source: tr.1" in error for error in agent.agent_feedback_errors)
-    assert messages == ["ToolResult_Gate: active hypothesis source: tr.1."]
+    assert any("protected source: tr.1 (active lead)" in error for error in agent.agent_feedback_errors)
+    assert messages == ["ToolResult_Gate: protected source: tr.1 (active lead)."]
 
 
-def test_forget_allows_source_when_hypothesis_is_closed_same_response(tmp_path):
+def test_forget_allows_source_when_lead_is_closed_same_response(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     _seed_plan(agent, "debug branch")
     agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n  output:\na']
-    agent.blackboard.hypotheses = [nanocode.Hypothesis(id="h1", text="branch still possible", source=("tr.1",))]
+    agent.blackboard.leads = [nanocode.Lead(id="h1", text="branch still possible", source=("tr.1",))]
     messages = []
 
     result = agent.handle_response(
         {
             "actions": [
                 {
-                    "type": "hypothesis",
+                    "type": "lead",
                     "items": [{"id": "h1", "text": "branch ruled out", "status": "ruled_out", "source": ["tr.1"]}],
                 },
                 {"type": "forget", "source": ["tr.1"], "reason": "branch ruled out"},
@@ -540,25 +838,25 @@ def test_forget_allows_source_when_hypothesis_is_closed_same_response(tmp_path):
     )
 
     assert result.done is False
-    assert agent.blackboard.hypotheses[0].status == nanocode.HypothesisStatus.RULED_OUT
+    assert agent.blackboard.leads[0].status == nanocode.LeadStatus.RULED_OUT
     assert "tr.1" not in _blocks_text(agent.tool_context.kept_results)
     assert messages == [
-        "Hypotheses Updated\n  1. [ruled_out] h1: branch ruled out [tr.1]",
+        "Leads Updated\n  1. [ruled_out] h1: branch ruled out [tr.1]",
         "Tool Result Context: -tr.1",
     ]
 
 
-def test_forget_allows_source_when_hypothesis_is_dropped_same_response(tmp_path):
+def test_forget_allows_source_when_lead_is_dropped_same_response(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     _seed_plan(agent, "debug branch")
     agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n  output:\na']
-    agent.blackboard.hypotheses = [nanocode.Hypothesis(id="h1", text="branch lost priority", source=("tr.1",))]
+    agent.blackboard.leads = [nanocode.Lead(id="h1", text="branch lost priority", source=("tr.1",))]
     messages = []
 
     result = agent.handle_response(
         {
             "actions": [
-                {"type": "hypothesis", "items": [{"id": "h1", "text": "branch no longer matters", "status": "dropped", "source": ["tr.1"]}]},
+                {"type": "lead", "items": [{"id": "h1", "text": "branch no longer matters", "status": "dropped", "source": ["tr.1"]}]},
                 {"type": "forget", "source": ["tr.1"], "reason": "branch no longer matters"},
             ]
         },
@@ -566,10 +864,10 @@ def test_forget_allows_source_when_hypothesis_is_dropped_same_response(tmp_path)
     )
 
     assert result.done is False
-    assert agent.blackboard.hypotheses[0].status == nanocode.HypothesisStatus.DROPPED
+    assert agent.blackboard.leads[0].status == nanocode.LeadStatus.DROPPED
     assert "tr.1" not in _blocks_text(agent.tool_context.kept_results)
     assert messages == [
-        "Hypotheses Updated\n  1. [dropped] h1: branch no longer matters [tr.1]",
+        "Leads Updated\n  1. [dropped] h1: branch no longer matters [tr.1]",
         "Tool Result Context: -tr.1",
     ]
 
@@ -598,10 +896,10 @@ def test_observe_forget_does_not_cover_latest_result_key(tmp_path):
     result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "old branch ruled out"}]}, on_message=messages.append)
 
     assert result.done is False
-    assert agent.mode == nanocode.AgentMode.OBSERVE
-    assert "tr.1" in _blocks_text(agent.tool_context.kept_results)
-    assert any("tr.2" in error for error in agent.observe_feedback_errors)
-    assert messages == ["Observe_Gate: missing coverage for result keys: tr.2."]
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert "tr.1" not in _blocks_text(agent.tool_context.kept_results)
+    assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == []
+    assert messages == ["Tool Result Context: -tr.1"]
 
 
 def test_observe_can_forget_old_kept_result_while_forgetting_latest(tmp_path):
@@ -628,11 +926,24 @@ def test_observe_can_forget_old_kept_result_while_forgetting_latest(tmp_path):
     assert messages == ["Tool Result Context: -tr.1 -tr.2"]
 
 
+def test_pending_user_feedback_does_not_rewrite_goal_by_default(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    _seed_plan(agent, "implement demo")
+    agent.session.state.pending_user_feedback = "how many lines?"
+
+    result = agent.handle_response({"actions": [{"type": "goal", "text": "answer line count"}]})
+
+    assert result.done is False
+    assert agent.blackboard.goal == "implement demo"
+    assert agent.session.state.pending_user_feedback == ""
+    assert any("Pending User Feedback is not a new task" in error for error in agent.agent_feedback_errors)
+
+
 def test_keep_tool_results_ignore_non_tool_sources(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
     agent.mode = nanocode.AgentMode.OBSERVE
     agent.handle_response(
         {
@@ -657,21 +968,24 @@ def test_keep_action_is_observe_only(tmp_path):
     assert any("Invalid action(s): keep" in error for error in agent.agent_feedback_errors)
 
 
-def test_observe_rejects_progress_and_empty_actions(tmp_path):
+def test_observe_rejects_invalid_action_and_allows_empty_actions(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
     agent.mode = nanocode.AgentMode.OBSERVE
 
-    agent.handle_response({"actions": [{"type": "progress", "text": "checking"}]})
-    agent.handle_response({"actions": []})
-
+    agent.handle_response({"actions": [{"type": "goal", "text": "answer", "complete": False}]})
     assert any("latest results must be observed" in error for error in agent.observe_feedback_errors)
-    assert any("observe returned no actions" in error for error in agent.observe_feedback_errors)
     assert agent.mode == nanocode.AgentMode.OBSERVE
 
+    agent.handle_response({"actions": []})
+
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert agent.observe_feedback_errors == []
+    assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == []
+
 
-def test_observe_requires_every_result_key_to_be_covered(tmp_path):
+def test_observe_compacts_unmentioned_result_keys_by_default(tmp_path):
     agent = Agent(_session(tmp_path, debug=True))
     agent.mode = nanocode.AgentMode.OBSERVE
     agent.tool_context.latest = [
@@ -686,10 +1000,10 @@ def test_observe_requires_every_result_key_to_be_covered(tmp_path):
     )
 
     assert result.done is False
-    assert agent.mode == nanocode.AgentMode.OBSERVE
-    assert "tr.2" in _blocks_text(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter))
-    assert any("tr.2" in error for error in agent.observe_feedback_errors)
-    assert messages == ["Observe_Gate: missing coverage for result keys: tr.2."]
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert "tr.1" in _blocks_text(agent.tool_context.kept_results)
+    assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == []
+    assert messages == ["Tool Result Context: +tr.1"]
 
 
 def test_observe_forget_source_covers_result_key(tmp_path):
@@ -705,22 +1019,34 @@ def test_observe_forget_source_covers_result_key(tmp_path):
     assert agent.tool_context.kept_results == []
 
 
-def test_observe_known_source_does_not_cover_result_key(tmp_path):
+def test_observe_known_source_compacts_result_key_by_default(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     agent.mode = nanocode.AgentMode.OBSERVE
     agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n  output:\na']
 
     agent.handle_response({"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "a exists"}]}]})
 
-    assert agent.mode == nanocode.AgentMode.OBSERVE
-    assert agent.blackboard.known == []
-    assert any("tr.1" in error for error in agent.observe_feedback_errors)
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert [nanocode.KnownItem.format_item(item) for item in agent.blackboard.known] == ["[tr.1] a exists"]
+    assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == []
 
 
-def test_kept_tool_results_respect_char_budget(tmp_path):
+def test_observe_warns_on_weak_known_without_source_or_coverage(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     agent.mode = nanocode.AgentMode.OBSERVE
-    agent.KEPT_TOOL_RESULT_CHARS = 100
+    agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n  output:\na']
+
+    agent.handle_response({"actions": [{"type": "known", "items": ["a exists"]}]})
+
+    assert agent.mode == nanocode.AgentMode.ACT
+    assert any("weak observe memory" in error for error in agent.observe_feedback_errors)
+    assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == []
+
+
+def test_kept_tool_results_respect_char_budget(tmp_path, monkeypatch):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.mode = nanocode.AgentMode.OBSERVE
+    _set_context_budget(monkeypatch, agent, kept_chars=100)
     agent.tool_context.latest = [
         '- ok tool=Read args=["a"] key=tr.1\n  output:\n' + ("a" * 30),
         '- ok tool=Read args=["b"] key=tr.2\n  output:\n' + ("b" * 30),
@@ -739,10 +1065,25 @@ def test_kept_tool_results_respect_char_budget(tmp_path):
     assert "key=tr.2" in context
 
 
+def test_kept_tool_results_respect_per_block_char_budget(tmp_path, monkeypatch):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.mode = nanocode.AgentMode.OBSERVE
+    _set_context_budget(monkeypatch, agent, kept_chars=10_000, kept_block_chars=300)
+    agent.tool_context.latest = [
+        '- ok tool=Read args=["large.py"] key=tr.1\n  output:\n' + ("head\n" + ("x" * 2000) + "\ntail")
+    ]
+
+    agent.handle_response({"actions": [{"type": "keep", "source": ["tr.1"], "reason": "large output matters"}]})
+
+    assert len(agent.tool_context.kept_results[0]) <= agent.context_budget().kept_block_chars
+    assert "key=tr.1" in agent.tool_context.kept_results[0]
+    assert "[tool result excerpt]" in agent.tool_context.kept_results[0]
+
+
 def test_observe_checkpoint_clears_observe_errors(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
     agent.mode = nanocode.AgentMode.OBSERVE
     agent.observe_feedback_errors = ["old observe error"]
 
@@ -752,34 +1093,47 @@ def test_observe_checkpoint_clears_observe_errors(tmp_path):
     assert agent.observe_feedback_errors == []
 
 
-def test_agent_tool_result_raw_budget_triggers_observe(tmp_path):
+def test_agent_tool_result_raw_budget_triggers_observe(tmp_path, monkeypatch):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
-    agent.TOOL_RESULT_RAW_CHARS = 180
-    agent.OBSERVE_AFTER_PENDING_RESULT_COUNT = 99
+    _set_context_budget(monkeypatch, agent, raw_chars=180, observe_after_results=99)
     path = tmp_path / "sample.txt"
     path.write_text("x" * 400 + "\n", encoding="utf-8")
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
 
     assert agent.mode == nanocode.AgentMode.OBSERVE
-    assert agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter) >= agent.TOOL_RESULT_RAW_CHARS
-    observe_context = agent._format_observe_tool_result_context()
+    assert agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter) >= agent.context_budget().raw_chars
+    observe_context = _observe_tool_result_context(agent)
     assert "sample.txt" in observe_context
     assert "x" * 50 in observe_context
 
 
-def test_agent_tool_result_index_has_count_limit(tmp_path):
+def test_referenced_raw_context_does_not_force_observe(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=99)
+    path = tmp_path / "sample.txt"
+    path.write_text("x" * 400 + "\n", encoding="utf-8")
+
+    agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}])
+    agent.apply_response(
+        {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "sample.txt content was inspected."}]}]}
+    )
+    _set_context_budget(monkeypatch, agent, raw_chars=180, observe_after_results=99)
+
+    assert agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter, exclude_keys=agent.blackboard.referenced_result_keys()) == 0
+    assert agent._should_observe_after_tools() is False
+
+
+def test_agent_tool_result_index_has_count_limit(tmp_path, monkeypatch):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
-    agent.TOOL_RESULT_INDEX_ITEMS = 2
+    _set_context_budget(monkeypatch, agent, index_items=2)
 
     for index in range(4):
-        agent.tool_context.append_recent(
-            ['- ok tool=Read args=["' + str(index) + '"] key=tr.' + str(index + 1) + "\n  output:\n" + ("x" * 20)],
-            max_index_items=agent.TOOL_RESULT_INDEX_ITEMS,
-            checkpoint=999,
-        )
+        agent.tool_context.recent.append('- ok tool=Read args=["' + str(index) + '"] key=tr.' + str(index + 1) + "\n  output:\n" + ("x" * 20))
+        agent.tool_context.prune_recent(max_index_items=agent.context_budget().index_items, checkpoint=999)
 
     recent = _blocks_text(agent.tool_context.recent)
     assert "recall=tr.1" not in recent
@@ -802,10 +1156,10 @@ def test_tool_result_store_keeps_latest_256_items(tmp_path):
     assert session.state.tool_result_counter == 257
 
 
-def test_tool_result_store_trim_keeps_hypothesis_source_keys(tmp_path):
+def test_tool_result_store_trim_keeps_lead_source_keys(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
-    agent.blackboard.hypotheses = [nanocode.Hypothesis(id="h1", text="kept branch", source=("tr.1",))]
+    agent.blackboard.leads = [nanocode.Lead(id="h1", text="kept branch", source=("tr.1",))]
 
     for index in range(257):
         agent.tool_runner._store_tool_result(ParsedToolCall(name="Read", intention="", args=[str(index)]), "success", "output " + str(index))
@@ -824,7 +1178,7 @@ def test_agent_prunes_tool_result_store_but_keeps_referenced_result_keys(tmp_pat
         key = "tr." + str(index + 1)
         session.state.tool_result_store[key] = nanocode.ToolResultItem(description=key, value="value")
     agent.tool_context.kept_results = ['- ok tool=Read args=["sample.txt"] key=tr.1\n  output:\nvalue']
-    agent.blackboard.hypotheses = [nanocode.Hypothesis(id="h1", text="kept branch", source=("tr.2",))]
+    agent.blackboard.leads = [nanocode.Lead(id="h1", text="kept branch", source=("tr.2",))]
 
     agent._prune_tool_result_store()
 
@@ -835,84 +1189,147 @@ def test_agent_prunes_tool_result_store_but_keeps_referenced_result_keys(tmp_pat
     assert "tr.52" in session.state.tool_result_store
 
 
-def test_agent_request_calls_chat_completions_and_parses_json(tmp_path, monkeypatch):
-    captured = {}
-
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def read(self):
-            return json.dumps(
-                {
-                    "choices": [{"message": {"content": json.dumps({"type": "message", "text": "ok"})}}],
-                    "usage": {"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5},
-                }
-            ).encode("utf-8")
-
-    def fake_urlopen(request, timeout):
-        captured["url"] = request.full_url
-        captured["timeout"] = timeout
-        captured["payload"] = json.loads(request.data.decode("utf-8"))
-        captured["authorization"] = request.headers["Authorization"]
-        return FakeResponse()
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", timeout=12, stream=False)
+def test_agent_request_calls_chat_completions_and_returns_text(tmp_path, monkeypatch):
+    calls, _response_calls, client_kwargs = _patch_openai(monkeypatch, _chat_response(usage={"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5}))
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", timeout=12, stream=False)
 
     response = Agent(session).request("system", "user")
-
-    assert response == {"actions": [{"type": "message", "text": "ok"}]}
-    assert captured["url"] == "https://example.test/v1/chat/completions"
-    assert captured["timeout"] == 12
-    assert captured["authorization"] == "Bearer key"
-    assert captured["payload"]["model"] == "model"
-    assert captured["payload"]["messages"] == [{"role": "system", "content": "system"}, {"role": "user", "content": "user"}]
-    assert "temperature" not in captured["payload"]
-    assert "response_format" not in captured["payload"]
-    assert "reasoning_effort" not in captured["payload"]
-    assert "reasoning" not in captured["payload"]
+    payload = _sdk_payload(calls[0])
+
+    assert response == {"actions": [], "_assistant_text": "ok"}
+    assert client_kwargs[0]["base_url"] == "https://example.test/v1"
+    assert client_kwargs[0]["api_key"] == "key"
+    assert client_kwargs[0]["timeout"] == 12
+    assert calls[0]["timeout"] == 12
+    assert payload["model"] == "model"
+    assert payload["messages"] == [{"role": "system", "content": "system"}, {"role": "user", "content": "user"}]
+    assert "temperature" not in payload
+    assert "response_format" not in payload
+    assert "reasoning_effort" not in payload
+    assert "reasoning" not in payload
+    assert payload["prompt_cache_key"].startswith("nanocode-")
     assert session.state.last_prompt_tokens == 2
     assert session.state.last_completion_tokens == 3
     assert session.state.last_total_tokens == 5
 
 
-def test_agent_request_sends_temperature_only_when_configured(tmp_path, monkeypatch):
-    captured = {}
+def test_agent_request_manual_retry_resends_same_model_prompt(tmp_path):
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
+    agent = Agent(session)
 
-    class FakeResponse:
-        def __enter__(self):
-            return self
+    class FakeModelClient:
+        def __init__(self):
+            self.calls = 0
+
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            self.calls += 1
+            if self.calls == 1:
+                raise nanocode.ModelRequestRetry()
+            return {"actions": [{"type": "message", "text": system_prompt + "/" + user_prompt + "/" + activity}]}
+
+    fake_client = FakeModelClient()
+    agent.model_client = fake_client
 
-        def __exit__(self, *args):
-            return None
+    response = agent.request("system", "user", activity="observe")
 
-        def read(self):
-            return json.dumps({"choices": [{"message": {"content": json.dumps({"type": "message", "text": "ok"})}}]}).encode("utf-8")
+    assert response == {"actions": [{"type": "message", "text": "system/user/observe"}]}
+    assert fake_client.calls == 2
+    assert session.state.status_notice == ""
 
-    def fake_urlopen(request, timeout):
-        captured["payload"] = json.loads(request.data.decode("utf-8"))
-        return FakeResponse()
 
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
+def test_agent_request_sends_temperature_only_when_configured(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, _chat_response())
     session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False, temperature=0.2)
 
     Agent(session).request("system", "user")
 
-    assert captured["payload"]["temperature"] == 0.2
+    assert _sdk_payload(calls[0])["temperature"] == 0.2
+
+
+def test_agent_request_prompt_cache_key_can_be_custom_or_off(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, (_chat_response(), _chat_response()))
+
+    Agent(_session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False, prompt_cache_key="project-cache")).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False, prompt_cache_key="off")).request("system", "user")
+
+    assert _sdk_payload(calls[0])["prompt_cache_key"] == "project-cache"
+    assert "prompt_cache_key" not in _sdk_payload(calls[1])
 
 
-def test_plan_mode_uses_runtime_plan_timeouts(tmp_path):
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", timeout=12, first_token_timeout=5, plan_mode=True)
-    session.settings.plan_timeout = 240
-    session.settings.plan_first_token_timeout = 80
-    client = nanocode.ModelClient(session)
+def test_agent_request_auto_prompt_cache_key_is_stable_per_tool_set(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, (_chat_response(), _chat_response(), _chat_response()))
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
+    agent = Agent(session)
+
+    agent.request("system", "user", tool_schemas=[nanocode.ReadTool.tool_schema(), nanocode.SearchTool.tool_schema()])
+    agent.request("system", "changed user", tool_schemas=[nanocode.SearchTool.tool_schema(), nanocode.ReadTool.tool_schema()])
+    agent.request("system", "user", tool_schemas=[nanocode.ReadTool.tool_schema()])
 
-    assert client._request_timeouts(session.config.provider, activity="agent") == (240, 80)
-    assert client._request_timeouts(session.config.provider, activity="compact") == (12, 5)
+    keys = [_sdk_payload(call)["prompt_cache_key"] for call in calls]
+    assert keys[0] == keys[1]
+    assert keys[2] != keys[0]
+
+
+def test_agent_request_uses_responses_api_and_sdk_output_text(tmp_path, monkeypatch):
+    class FakeResponse:
+        output_text = "ok"
+
+        def model_dump(self, mode="json"):
+            return {"output": [], "usage": {"input_tokens": 2, "input_tokens_details": {"cached_tokens": 1}, "output_tokens": 3, "total_tokens": 5}}
+
+    calls, response_calls, _client_kwargs = _patch_openai(monkeypatch, FakeResponse())
+    session = _session(
+        tmp_path,
+        api_url="https://api.openai.com/v1",
+        api_key="key",
+        model="model",
+        api="responses",
+        stream=False,
+        reasoning="high",
+    )
+
+    response = Agent(session).request("system", "user")
+    payload = _sdk_payload(response_calls[0])
+
+    assert response == {"actions": [], "_assistant_text": "ok"}
+    assert calls == []
+    assert payload["model"] == "model"
+    assert payload["instructions"] == "system"
+    assert payload["input"] == "user"
+    assert payload["store"] is False
+    assert payload["prompt_cache_key"].startswith("nanocode-")
+    assert payload["reasoning"] == {"effort": "high"}
+    assert session.state.last_prompt_tokens == 2
+    assert session.state.last_completion_tokens == 3
+    assert session.state.last_total_tokens == 5
+    assert session.state.last_cached_prompt_tokens == 1
+    assert session.state.session_cached_prompt_tokens == 1
+
+
+def test_agent_request_records_chat_cached_prompt_tokens(tmp_path, monkeypatch):
+    cases = (
+        ({"prompt_tokens": 10, "prompt_tokens_details": {"cached_tokens": 6}, "completion_tokens": 3, "total_tokens": 13}, 6),
+        ({"prompt_tokens": 10, "prompt_cache_hit_tokens": 7, "prompt_cache_miss_tokens": 3, "completion_tokens": 2, "total_tokens": 12}, 7),
+    )
+    _patch_openai(monkeypatch, tuple(_chat_response(usage=usage) for usage, _expected in cases))
+    for _usage, expected in cases:
+        session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
+        Agent(session).request("system", "user")
+        assert session.state.last_cached_prompt_tokens == expected
+        assert session.state.session_cached_prompt_tokens == expected
+        assert session.state.model_usage["model"].cached_prompt_tokens == expected
+
+
+def test_agent_request_responses_api_omits_reasoning_when_disabled(tmp_path, monkeypatch):
+    calls, response_calls, _client_kwargs = _patch_openai(monkeypatch, _responses_response())
+    session = _session(tmp_path, api_url="https://api.openai.com/v1", api_key="key", model="model", api="responses", stream=False)
+    session.config.provider.reasoning = "off"
+
+    Agent(session).request("system", "user")
+    payload = _sdk_payload(response_calls[0])
+
+    assert calls == []
+    assert "reasoning" not in payload
 
 
 def test_agent_request_retries_model_timeout(tmp_path, monkeypatch):
@@ -920,7 +1337,7 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
             if self.calls <= 3:
                 raise LLMError("request model timeout")
@@ -944,7 +1361,7 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
             if self.calls == 1:
                 raise LLMError("request first token timeout")
@@ -967,7 +1384,7 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
             if self.calls <= 2:
                 raise LLMError("request model timeout")
@@ -1037,7 +1454,7 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
             raise LLMError("request model timeout")
 
@@ -1063,7 +1480,7 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
             raise LLMError("API request failed")
 
@@ -1084,663 +1501,530 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert sleeps == []
 
 
-def test_agent_request_streams_and_reports_completed_actions(tmp_path, monkeypatch):
-    captured = {}
-
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def __iter__(self):
-            chunks = [
-                '{"type":"tool","name":"Read",',
-                '"intention":"read sample","args":["sample.txt"]}__END_ACTION__',
-                '{"type":"message","text":"done"}__END_ACTION__',
-            ]
-            for chunk in chunks:
-                yield ("data: " + json.dumps({"choices": [{"delta": {"content": chunk}}]}) + "\n").encode("utf-8")
-            yield (
-                "data: "
-                + json.dumps({"choices": [], "usage": {"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5}})
-                + "\n"
-            ).encode("utf-8")
-            yield b"data: [DONE]\n"
-
-    def fake_urlopen(request, timeout):
-        captured["payload"] = json.loads(request.data.decode("utf-8"))
-        return FakeResponse()
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model")
-
-    response = Agent(session).request("system", "user")
-
-    assert captured["payload"]["stream"] is True
-    assert captured["payload"]["stream_options"] == {"include_usage": True}
-    assert response["actions"] == [
-        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt"]},
-        {"type": "message", "text": "done"},
-    ]
-    assert session.state.last_prompt_tokens == 2
-    assert session.state.last_completion_tokens == 3
-    assert session.state.last_total_tokens == 5
-    assert session.state.session_total_tokens == 5
-
-
-def test_agent_request_stream_uses_first_token_timeout_until_content(tmp_path, monkeypatch):
-    timers = []
-
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def __iter__(self):
-            yield ("data: " + json.dumps({"choices": [{"delta": {"role": "assistant"}}]}) + "\n").encode("utf-8")
-            yield ("data: " + json.dumps({"choices": [{"delta": {"content": '{"type":"message","text":"ok"}__END_ACTION__'}}]}) + "\n").encode("utf-8")
-            yield b"data: [DONE]\n"
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", lambda request, timeout: FakeResponse())
-    monkeypatch.setattr(nanocode.signal, "setitimer", lambda timer, seconds: timers.append(seconds))
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", timeout=90, first_token_timeout=4)
-
-    response = Agent(session).request("system", "user")
-
-    assert response["actions"][0]["text"] == "ok"
-    assert timers[0] == 90
-    assert 4 in timers
-    assert timers[-1] == 0
-
-
-def test_agent_request_records_stream_rate_from_usage(tmp_path, monkeypatch):
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def __iter__(self):
-            yield ("data: " + json.dumps({"choices": [{"delta": {"content": '{"type":"message","text":"ok"}'}}]}) + "\n").encode("utf-8")
-            yield ("data: " + json.dumps({"choices": [], "usage": {"completion_tokens": 20, "total_tokens": 30}}) + "\n").encode("utf-8")
-            yield b"data: [DONE]\n"
-
-    times = [100.0, 100.0, 100.0, 102.0]
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", lambda request, timeout: FakeResponse())
-    monkeypatch.setattr(nanocode.time, "monotonic", lambda: times.pop(0) if times else 102.0)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model")
-
-    response = Agent(session).request("system", "user")
-
-    assert response["actions"][0]["text"] == "ok"
-    assert session.state.last_model_call_rate == 10.0
-
-
-def test_agent_request_stream_hard_timeout_becomes_model_timeout(tmp_path, monkeypatch):
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def __iter__(self):
-            nanocode.signal.raise_signal(nanocode.signal.SIGALRM)
-            yield b""
-
-    sleeps = []
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", lambda request, timeout: FakeResponse())
-    monkeypatch.setattr(nanocode.time, "sleep", sleeps.append)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", timeout=12)
-
-    try:
-        Agent(session).request("system", "user")
-    except LLMError as error:
-        assert str(error) == "request model timeout"
-    else:
-        raise AssertionError("expected LLMError")
-
-    assert session.state.current_model_call_started_at == 0.0
-    assert sleeps == [3, 10, 20, 30, 60, 120]
-
-
-def test_agent_run_reports_streamed_tool_actions_after_execution(tmp_path, monkeypatch):
-    (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
-    (tmp_path / "other.txt").write_text("beta\n", encoding="utf-8")
-    captured_payloads = []
-    responses = [
-        [
-            '{"type":"tool","name":"Read",',
-            '"intention":"read sample","args":["sample.txt","0","1"]}__END_ACTION__',
-            '{"type":"tool","name":"Read",',
-            '"intention":"read other","args":["other.txt","0","1"]}__END_ACTION__',
-        ],
-        [
-            '{"type":"keep","source":["tr.1","tr.2"],"reason":"sample and other reads remain useful"}__END_ACTION__',
-        ],
-        [
-            '{"type":"verify","method":"unit","status":"passed","context":"checked"}__END_ACTION__',
-            '{"type":"goal","text":"read sample","complete":true,"message_for_complete":"done"}__END_ACTION__',
-        ],
-    ]
-
-    class FakeResponse:
-        def __init__(self, chunks):
-            self.chunks = chunks
-
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def __iter__(self):
-            for chunk in self.chunks:
-                yield ("data: " + json.dumps({"choices": [{"delta": {"content": chunk}}]}) + "\n").encode("utf-8")
-            yield b"data: [DONE]\n"
-
-    def fake_urlopen(request, timeout):
-        captured_payloads.append(json.loads(request.data.decode("utf-8")))
-        return FakeResponse(responses.pop(0))
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model")
-    agent = Agent(session)
-    agent.OBSERVE_AFTER_PENDING_RESULT_COUNT = 1
-    _seed_plan(agent, "read sample")
-    messages = []
-
-    response = agent.run("read sample", on_message=messages.append)
-
-    assert response["actions"][-1] == {"type": "goal", "text": "read sample", "complete": True, "message_for_complete": "done"}
-    assert len(captured_payloads) == 3
-    assert [payload["stream"] for payload in captured_payloads] == [True, True, True]
-    assert messages[0].startswith("[success] Read sample.txt 0:1 -> tr.1")
-    assert "why:" not in messages[0]
-    assert messages[-1] == "done"
-
-
-def test_agent_request_uses_configured_reasoning_payload(tmp_path, monkeypatch):
-    captured = {}
-
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def read(self):
-            return json.dumps({"choices": [{"message": {"content": json.dumps({"type": "message", "text": "ok"})}}], "usage": {}}).encode("utf-8")
-
-    def fake_urlopen(request, timeout):
-        captured["payload"] = json.loads(request.data.decode("utf-8"))
-        return FakeResponse()
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(
-        tmp_path,
-        api_url="https://example.test/v1",
-        api_key="key",
-        model="model",
-        reasoning_effort="high",
-        reasoning_payload="reasoning",
-        stream=False,
-    )
-
-    Agent(session).request("system", "user")
-
-    assert captured["payload"]["reasoning"] == {"effort": "high"}
-    assert "reasoning_effort" not in captured["payload"]
-
-
-def test_agent_request_uses_configured_reasoning_effort_payload(tmp_path, monkeypatch):
-    captured = {}
-
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def read(self):
-            return json.dumps({"choices": [{"message": {"content": json.dumps({"type": "message", "text": "ok"})}}], "usage": {}}).encode("utf-8")
-
-    def fake_urlopen(request, timeout):
-        captured["payload"] = json.loads(request.data.decode("utf-8"))
-        return FakeResponse()
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(
-        tmp_path,
-        api_url="https://example.test/v1",
-        api_key="key",
-        model="model",
-        reasoning_effort="high",
-        reasoning_payload="reasoning_effort",
-        stream=False,
-    )
-
-    Agent(session).request("system", "user")
-
-    assert captured["payload"]["reasoning_effort"] == "high"
-    assert "reasoning" not in captured["payload"]
-
-
-def test_agent_request_accepts_json_fenced_model_content(tmp_path, monkeypatch):
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def read(self):
-            return json.dumps(
+def test_agent_request_sends_function_tool_schema_and_parses_tool_call(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(
+        monkeypatch,
+        {
+            "choices": [
                 {
-                    "choices": [{"message": {"content": '```json\n{"type":"message","text":"ok"}\n__END_ACTION__\n```'}}],
-                    "usage": {},
+                    "message": {
+                        "content": "Reading the file.",
+                        "tool_calls": [
+                            {
+                                "function": {
+                                    "name": "Read",
+                                    "arguments": '{"intention":"read sample","args":["sample.txt","0","1"]}',
+                                }
+                            }
+                        ],
+                    }
                 }
-            ).encode("utf-8")
-
-    def fake_urlopen(request, timeout):
-        return FakeResponse()
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
-
-    response = Agent(session).request("system", "user")
-
-    assert response == {"actions": [{"type": "message", "text": "ok"}]}
-
-
-def test_agent_request_accepts_leaked_think_tags_before_json(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    assert client._parse_model_content('</think>{"type":"message","text":"ok"}\n__END_ACTION__') == {
-        "actions": [{"type": "message", "text": "ok"}],
-    }
-    assert client._parse_model_content('<think>reasoning</think>\n{"type":"message","text":"ok"}\n__END_ACTION__') == {
-        "actions": [{"type": "message", "text": "ok"}],
-    }
-
-
-def test_agent_request_accepts_pretty_action_frames_and_marker_variants(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content(
-        '{\n  "type": "message",\n  "text": "ok"\n}\n**END_ACTION**\n{"type":"goal","text":"next"}\nEND_ACTION'
+            ],
+            "usage": {"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5},
+        },
     )
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
 
-    assert response == {"actions": [{"type": "message", "text": "ok"}, {"type": "goal", "text": "next"}]}
-
-
-def test_agent_request_accepts_inline_action_frame_markers(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content('{"type":"message","text":"ok"}__END_ACTION__{"type":"goal","text":"next"}__END_ACTION__')
-
-    assert response == {"actions": [{"type": "message", "text": "ok"}, {"type": "goal", "text": "next"}]}
-
-
-def test_agent_request_accepts_single_unmarked_json_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content('{"type":"message","text":"ok"}')
-
-    assert response == {"actions": [{"type": "message", "text": "ok"}]}
-
-
-def test_agent_request_accepts_adjacent_unmarked_json_actions(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content(
-        '{"type":"known","items":["Project is single-file."]}\n'
-        '{"type":"stable_knowledge","items":[{"category":"structure","text":"All runtime code lives in nanocode.py."}]}'
-    )
+    response = Agent(session).request("system", "user", tool_schemas=[nanocode.ReadTool.tool_schema()])
+    payload = _sdk_payload(calls[0])
 
+    assert payload["tools"][0]["function"]["name"] == "Read"
+    assert payload["tool_choice"] == "auto"
+    assert payload["parallel_tool_calls"] is True
     assert response == {
-        "actions": [
-            {"type": "known", "items": ["Project is single-file."]},
-            {
-                "type": "stable_knowledge",
-                "items": [{"category": "structure", "text": "All runtime code lives in nanocode.py."}],
-            },
-        ],
+        "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}],
+        "_assistant_text": "Reading the file.",
     }
+    assert session.state.last_total_tokens == 5
 
 
-def test_agent_request_accepts_unmarked_json_action_array(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content('[{"type":"tool","name":"Read","args":["nanocode.py","0,1"],"intention":"read source"}]')
-
-    assert response == {"actions": [{"type": "tool", "name": "Read", "args": ["nanocode.py", "0,1"], "intention": "read source"}]}
-
-
-def test_agent_request_repairs_fenced_json_action_array_with_extra_brace(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content(
-        '```json\n[{"type":"tool","name":"ListDir","intention":"Find the demo directory in the project root.","args":[""]}]}\n```'
+def test_agent_step_preserves_raw_bad_function_arguments(tmp_path, monkeypatch):
+    bad_arguments = '{"text":"broken",'
+    _patch_openai(
+        monkeypatch,
+        {
+            "choices": [
+                {
+                    "message": {
+                        "tool_calls": [
+                            {
+                                "function": {
+                                    "name": "goal",
+                                    "arguments": bad_arguments,
+                                }
+                            }
+                        ],
+                    }
+                }
+            ]
+        },
     )
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
 
-    assert response == {
-        "actions": [
-            {"type": "tool", "name": "ListDir", "intention": "Find the demo directory in the project root.", "args": [""]}
-        ]
-    }
-
-
-def test_agent_request_accepts_empty_actions_response_object(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    assert client._parse_model_content('{"actions": []}') == {"actions": []}
-    assert client._parse_model_content('{"actions": []}__END_ACTION__') == {"actions": []}
+    response = Agent(session).step()
 
+    assert response["_format_bad_output"] == bad_arguments
+    assert "invalid tool arguments for goal" in response["_format_error"]
+    assert "Bad output: " + bad_arguments in response["_format_error"]
 
-def test_agent_request_accepts_comma_separated_unmarked_json_actions(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
 
-    response = client._parse_model_content(
-        '{"type":"tool","name":"Read","args":["nanocode.py","3893,3910"]},'
-        '{"type":"tool","name":"Search","args":["STABLE_KNOWLEDGE_CATEGORIES","path=nanocode.py","context=2"]}'
-    )
+def test_agent_accepts_string_plan_items_from_function_call(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    response = {"actions": [{"type": "plan", "mode": "replace", "items": ["Create demo", "Run smoke test"]}]}
 
-    assert response == {
-        "actions": [
-            {"type": "tool", "name": "Read", "args": ["nanocode.py", "3893,3910"]},
-            {"type": "tool", "name": "Search", "args": ["STABLE_KNOWLEDGE_CATEGORIES", "path=nanocode.py", "context=2"]},
-        ]
-    }
+    assert agent._build_response_context(response).has_fresh_plan_action is True
+    agent.apply_response(response)
 
+    assert agent.blackboard.plan == [
+        nanocode.PlanItem(text="Create demo"),
+        nanocode.PlanItem(text="Run smoke test"),
+    ]
 
-def test_agent_request_normalizes_tool_name_as_action_type(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
 
-    response = client._parse_model_content(
-        '{"type":"ListDir","intention":"list root","args":["."]}\n'
-        '{"type":"search","intention":"find tests","args":["pytest","path=.", "context=2"]}\n'
-        '{"type":"recall","intention":"recall result","args":["tr.1"]}'
-    )
+def test_agent_accepts_string_lead_items_from_function_call(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
 
-    assert response == {
-        "actions": [
-            {"type": "tool", "name": "ListDir", "intention": "list root", "args": ["."]},
-            {"type": "tool", "name": "Search", "intention": "find tests", "args": ["pytest", "path=.", "context=2"]},
-            {"type": "tool", "name": "Recall", "intention": "recall result", "args": ["tr.1"]},
-        ]
-    }
+    agent.apply_response({"actions": [{"type": "lead", "items": ["Admin filter excludes history"]}]})
 
+    assert agent.blackboard.leads == [
+        nanocode.Lead(text="Admin filter excludes history"),
+    ]
 
-def test_agent_normalizes_harmless_action_type_aliases(tmp_path):
-    agent = Agent(Session(cwd=str(tmp_path)))
 
-    actions = agent._response_actions(
+def test_function_tool_schemas_define_items_for_every_array():
+    def walk(value, path="schema"):
+        if isinstance(value, dict):
+            schema_type = value.get("type")
+            if schema_type == "array" or (isinstance(schema_type, list) and "array" in schema_type):
+                assert "items" in value, path
+            for key, child in value.items():
+                walk(child, path + "." + str(key))
+        elif isinstance(value, list):
+            for index, child in enumerate(value):
+                walk(child, path + "[" + str(index) + "]")
+
+    state_schemas = [nanocode._state_tool_schema(name) for name in nanocode.STATE_TOOL_PARAMS]
+    repo_schemas = [tool.tool_schema() for tool in nanocode.TOOL_REGISTRY.values()]
+    for schema in [*state_schemas, *repo_schemas, nanocode.COMPACT_TOOL_SCHEMA]:
+        walk(schema)
+
+
+def test_function_tool_schemas_do_not_emit_null_enum_values():
+    def walk(value, path="schema"):
+        if isinstance(value, dict):
+            enum = value.get("enum")
+            if isinstance(enum, list):
+                assert None not in enum, path
+            for key, child in value.items():
+                walk(child, path + "." + str(key))
+        elif isinstance(value, list):
+            for index, child in enumerate(value):
+                walk(child, path + "[" + str(index) + "]")
+
+    state_schemas = [nanocode._state_tool_schema(name) for name in nanocode.STATE_TOOL_PARAMS]
+    repo_schemas = [tool.tool_schema() for tool in nanocode.TOOL_REGISTRY.values()]
+    for schema in [*state_schemas, *repo_schemas, nanocode.COMPACT_TOOL_SCHEMA]:
+        walk(schema)
+
+
+def test_agent_request_responses_api_parses_function_call(tmp_path, monkeypatch):
+    _calls, response_calls, _client_kwargs = _patch_openai(
+        monkeypatch,
         {
-            "actions": [
-                {"type": "Plan", "items": []},
-                {"type": "Message", "content": "ok"},
-            ]
-        }
+            "output": [
+                {
+                    "type": "function_call",
+                    "name": "known",
+                    "arguments": '{"items":["Project uses pytest."]}',
+                }
+            ],
+            "usage": {"input_tokens": 2, "output_tokens": 3, "total_tokens": 5},
+        },
     )
+    session = _session(tmp_path, api_url="https://api.openai.com/v1", api_key="key", model="model", api="responses", stream=False)
 
-    assert actions == [
-        {"type": "plan", "items": []},
-        {"type": "chat", "content": "ok", "text": "ok"},
-    ]
+    response = Agent(session).request("system", "user", tool_schemas=[nanocode._state_tool_schema("known")])
+    payload = _sdk_payload(response_calls[0])
 
+    assert payload["tools"][0]["name"] == "known"
+    assert payload["tool_choice"] == "auto"
+    assert response == {"actions": [{"type": "known", "items": ["Project uses pytest."]}]}
+    assert session.state.last_total_tokens == 5
 
-def test_agent_request_converts_prefixed_unmarked_text_to_progress_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
 
-    response = client._parse_model_content(
-        "The test is failing because the expected message changed. Let me read the test.\n\n"
-        '{"type":"tool","name":"Read","intention":"read the failing test","args":["tests/test_nanocode_commands.py","140,165"]}'
-    )
+def test_agent_request_chat_stream_parses_function_tool_event(tmp_path, monkeypatch):
+    calls = []
 
-    assert response == {
-        "actions": [
-            {"type": "progress", "text": "The test is failing because the expected message changed. Let me read the test."},
-            {"type": "tool", "name": "Read", "intention": "read the failing test", "args": ["tests/test_nanocode_commands.py", "140,165"]},
-        ],
-    }
-
-
-def test_agent_request_converts_plain_unmarked_text_to_progress_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    class FakeCompletions:
+        def create(self, **kwargs):
+            calls.append(kwargs)
+            return iter(
+                [
+                    _stream_chunk({"content": "Reading."}),
+                    _stream_chunk({"tool_calls": [{"index": "0", "function": {"name": "Read", "arguments": '{"intention":"read sample",'}}]}),
+                    _stream_chunk({"tool_calls": [{"index": "0", "function": {"arguments": '"args":["sample.txt","0","1"]}'}}]}),
+                    _stream_chunk(usage={"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5}, choices=False),
+                ]
+            )
 
-    response = client._parse_model_content("Let me read the StatusBar class and the streaming content logic.")
+    class FakeOpenAI:
+        def __init__(self, **_kwargs):
+            self.chat = type("FakeChat", (), {"completions": FakeCompletions()})()
 
-    assert response == {
-        "actions": [
-            {"type": "progress", "text": "Let me read the StatusBar class and the streaming content logic."},
-        ]
-    }
+    monkeypatch.setattr(nanocode, "OpenAI", FakeOpenAI)
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model")
 
-    response = client._parse_model_content("让我读取 `_format_line` 的当前状态，以找到确切插入点。")
+    response = Agent(session).request("system", "user", tool_schemas=[nanocode.ReadTool.tool_schema()])
 
+    assert calls[0]["tools"][0]["function"]["name"] == "Read"
+    assert calls[0]["stream"] is True
     assert response == {
         "actions": [
-            {"type": "progress", "text": "让我读取 `_format_line` 的当前状态，以找到确切插入点。"},
-        ]
+            {
+                "type": "tool",
+                "name": "Read",
+                "intention": "read sample",
+                "args": ["sample.txt", "0", "1"],
+            }
+        ],
+        "_assistant_text": "Reading.",
     }
+    assert session.state.last_total_tokens == 5
 
 
-def test_agent_request_rejects_cli_context_transcript_as_plain_progress(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
-
-    response = client._parse_model_content("}")
-
-    assert response["actions"] == []
-    assert "expected one JSON action object or action frames ending with __END_ACTION__" in response["_format_error"]
-
-    response = client._parse_model_content("Now }")
-
-    assert response["actions"] == []
-    assert "expected one JSON action object or action frames ending with __END_ACTION__" in response["_format_error"]
-
-    response = client._parse_model_content("  ctx: -tr.61 -tr.62")
+def test_agent_stream_step_preserves_same_response_tool_batch_in_latest(tmp_path, monkeypatch):
+    (tmp_path / "one.txt").write_text("one\n", encoding="utf-8")
+    (tmp_path / "two.txt").write_text("two\n", encoding="utf-8")
 
-    assert response["actions"] == []
-    assert "expected one JSON action object or action frames ending with __END_ACTION__" in response["_format_error"]
+    class FakeModelClient:
+        def request(self, *_args, on_stream_action=None, **_kwargs):
+            assert on_stream_action is not None
+            on_stream_action({"type": "tool", "name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]})
+            on_stream_action({"type": "tool", "name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]})
+            return {"actions": []}
 
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.model_client = FakeModelClient()
+    monkeypatch.setattr(agent, "_can_stream_tools", lambda: True)
 
-def test_agent_request_converts_interleaved_unmarked_text_to_progress_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    result, _response, committed = agent.stream_step()
 
-    response = client._parse_model_content(
-        '{"type":"plan","items":[{"id":"p1","text":"Inspect","status":"doing"}]}\n\n'
-        "Now I will read the file.\n\n"
-        '{"type":"tool","name":"Read","intention":"read source","args":["demo/astar_demo.cpp"]}'
-    )
+    latest = _blocks_text(agent.tool_context.latest)
+    assert result.done is False
+    assert committed is True
+    assert "one" in latest
+    assert "two" in latest
+    assert 'tool=Read args=["one.txt","0,1"]' in latest
+    assert 'tool=Read args=["two.txt","0,1"]' in latest
+    assert agent.tool_context.recent == []
 
-    assert response == {
-        "actions": [
-            {"type": "plan", "items": [{"id": "p1", "text": "Inspect", "status": "doing"}]},
-            {"type": "progress", "text": "Now I will read the file."},
-            {"type": "tool", "name": "Read", "intention": "read source", "args": ["demo/astar_demo.cpp"]},
-        ],
-    }
 
+def test_agent_request_responses_stream_parses_function_tool_event(tmp_path, monkeypatch):
+    response_calls = []
 
-def test_agent_request_ignores_fence_only_interleaved_progress(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    class FakeResponses:
+        def create(self, **kwargs):
+            response_calls.append(kwargs)
+            return iter(
+                [
+                    {"type": "response.output_text.delta", "delta": "Recording."},
+                    {
+                        "type": "response.function_call_arguments.done",
+                        "name": "known",
+                        "arguments": '{"items":["Project uses pytest."]}',
+                    },
+                    {"type": "response.completed", "response": {"usage": {"input_tokens": 2, "output_tokens": 3, "total_tokens": 5}}},
+                ]
+            )
 
-    response = client._parse_model_content(
-        '{"type":"plan","items":[{"id":"p1","text":"Inspect","status":"doing"}]}\n```json\n'
-        '{"type":"tool","name":"Read","intention":"read source","args":["demo/astar_demo.cpp"]}'
-    )
+    class FakeOpenAI:
+        def __init__(self, **_kwargs):
+            self.responses = FakeResponses()
 
-    assert response == {
-        "actions": [
-            {"type": "plan", "items": [{"id": "p1", "text": "Inspect", "status": "doing"}]},
-            {"type": "tool", "name": "Read", "intention": "read source", "args": ["demo/astar_demo.cpp"]},
-        ],
-    }
+    monkeypatch.setattr(nanocode, "OpenAI", FakeOpenAI)
+    session = _session(tmp_path, api_url="https://api.openai.com/v1", api_key="key", model="model", api="responses")
 
+    response = Agent(session).request("system", "user", tool_schemas=[nanocode._state_tool_schema("known")])
 
-def test_agent_request_strips_leaked_tool_code_after_valid_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    assert response_calls[0]["tools"][0]["name"] == "known"
+    assert response_calls[0]["stream"] is True
+    assert response == {"actions": [{"type": "known", "items": ["Project uses pytest."]}], "_assistant_text": "Recording."}
+    assert session.state.last_total_tokens == 5
 
-    response = client._parse_model_content(
-        "我正在分析这些更改。让我仔细检查速率计算部分是否存在潜在的 bug。\n\n"
-        "```json\n"
-        '{"type":"Read","args":["nanocode.py","3500,3510"],"intention":"检查速率计算时 elapsed 是否可能为0"}\n'
-        "```\n"
-        "<tool_code>\n"
-        "{\n"
-        "  tool: 'Read',\n"
-        "  args: [\"nanocode.py\", \"3500,3510\"],\n"
-        "  intention: '检查速率计算时 elapsed 是否可能为0'\n"
-        "}\n"
-        "</tool_code>"
-    )
 
-    assert response == {
-        "actions": [
-            {"type": "progress", "text": "我正在分析这些更改。让我仔细检查速率计算部分是否存在潜在的 bug。"},
-            {"type": "tool", "name": "Read", "args": ["nanocode.py", "3500,3510"], "intention": "检查速率计算时 elapsed 是否可能为0"},
-        ]
-    }
+def test_agent_request_responses_stream_uses_output_item_function_name(tmp_path, monkeypatch):
+    class FakeResponses:
+        def create(self, **_kwargs):
+            return iter(
+                [
+                    {
+                        "type": "response.output_item.added",
+                        "output_index": 0,
+                        "item": {"id": "fc_1", "type": "function_call", "name": "goal", "arguments": ""},
+                    },
+                    {
+                        "type": "response.function_call_arguments.done",
+                        "item_id": "fc_1",
+                        "arguments": '{"text":"Greet the user.","complete":true,"message_for_complete":"Hi!"}',
+                    },
+                    {"type": "response.completed", "response": {"usage": {"input_tokens": 2, "output_tokens": 3, "total_tokens": 5}}},
+                ]
+            )
 
+    class FakeOpenAI:
+        def __init__(self, **_kwargs):
+            self.responses = FakeResponses()
 
-def test_agent_request_converts_trailing_unmarked_text_to_progress_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    monkeypatch.setattr(nanocode, "OpenAI", FakeOpenAI)
+    session = _session(tmp_path, api_url="https://api.openai.com/v1", api_key="key", model="model", api="responses")
 
-    response = client._parse_model_content('{"type":"message","text":"ok"}\nDone.')
+    response = Agent(session).request("system", "user", tool_schemas=[nanocode._state_tool_schema("goal")])
+
+    assert response == {"actions": [{"type": "goal", "text": "Greet the user.", "complete": True, "message_for_complete": "Hi!"}]}
 
-    assert response == {
-        "actions": [
-            {"type": "message", "text": "ok"},
-            {"type": "progress", "text": "Done."},
-        ]
-    }
 
+def test_agent_request_responses_stream_error_event_raises_llm_error(tmp_path, monkeypatch):
+    _patch_openai(monkeypatch, [{"code": "InvalidParameter", "message": "Unsupported model: 'deepseek-v4-flash'."}])
+    session = _session(tmp_path, api_url="https://api.openai.com/v1", api_key="key", model="model", api="responses")
 
-def test_agent_request_converts_trailing_unmarked_text_after_action_array_to_progress_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    try:
+        Agent(session).request("system", "user")
+    except LLMError as error:
+        assert str(error) == "API request failed: InvalidParameter: Unsupported model: 'deepseek-v4-flash'."
+    else:
+        raise AssertionError("expected LLMError")
 
-    response = client._parse_model_content('[{"type":"progress","text":"checking"}]\nNow I will read the file.')
 
-    assert response == {
-        "actions": [
-            {"type": "progress", "text": "checking"},
-            {"type": "progress", "text": "Now I will read the file."},
-        ]
-    }
+def test_agent_request_records_stream_rate_from_usage(tmp_path, monkeypatch):
+    times = [100.0, 100.0, 100.0, 102.0]
+    _patch_openai(
+        monkeypatch,
+        [
+            _stream_chunk({"content": "ok"}),
+            _stream_chunk(usage={"completion_tokens": 20, "total_tokens": 30}, choices=False),
+        ],
+    )
+    monkeypatch.setattr(nanocode.time, "monotonic", lambda: times.pop(0) if times else 102.0)
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model")
 
+    response = Agent(session).request("system", "user")
 
-def test_agent_request_repairs_unescaped_newlines_in_unmarked_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    assert response == {"actions": [], "_assistant_text": "ok"}
+    assert session.state.last_model_call_rate == 10.0
 
-    response = client._parse_model_content('{"type":"chat","text":"line 1\n\n1. item\n2. item"}')
 
-    assert response == {
-        "actions": [
-            {"type": "chat", "text": "line 1\n\n1. item\n2. item"},
-        ]
-    }
+def test_agent_request_stream_hard_timeout_becomes_model_timeout(tmp_path, monkeypatch):
+    def stream():
+        if False:
+            yield {}
+        while True:
+            nanocode.signal.raise_signal(nanocode.signal.SIGALRM)
+            yield {}
 
+    sleeps = []
+    _patch_openai(monkeypatch, stream)
+    monkeypatch.setattr(nanocode.time, "sleep", sleeps.append)
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", timeout=12)
 
-def test_agent_request_repairs_extra_closing_brace_after_unmarked_action(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    try:
+        Agent(session).request("system", "user")
+    except LLMError as error:
+        assert str(error) == "request model timeout"
+    else:
+        raise AssertionError("expected LLMError")
 
-    response = client._parse_model_content('{"type":"progress","text":"ok"}}')
+    assert session.state.current_model_call_started_at == 0.0
+    assert sleeps == [3, 10, 20, 30, 60, 120]
 
-    assert response == {"actions": [{"type": "progress", "text": "ok"}]}
 
+def test_agent_request_uses_configured_chat_reasoning(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, _chat_response())
+    session = _session(
+        tmp_path,
+        api_url="https://example.test/v1",
+        api_key="key",
+        model="model",
+        reasoning="high",
+        chat_reasoning="reasoning",
+        stream=False,
+    )
 
-def test_agent_request_ignores_bad_action_frames_when_other_actions_are_valid(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    Agent(session).request("system", "user")
+    payload = _sdk_payload(calls[0])
 
-    response = client._parse_model_content('plain answer\n__END_ACTION__\n{"type":"message","text":"ok"}\n__END_ACTION__')
+    assert payload["reasoning"] == {"effort": "high"}
+    assert "reasoning_effort" not in payload
 
-    assert response["actions"] == [{"type": "message", "text": "ok"}]
-    assert response["_format_frame_errors"] == ["frame 1: expected JSON object action"]
 
+def test_agent_request_uses_configured_reasoning_effort_payload(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, _chat_response())
+    session = _session(
+        tmp_path,
+        api_url="https://example.test/v1",
+        api_key="key",
+        model="model",
+        reasoning="high",
+        chat_reasoning="reasoning_effort",
+        stream=False,
+    )
 
-def test_agent_request_rejects_native_tool_call_syntax(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+    Agent(session).request("system", "user")
+    payload = _sdk_payload(calls[0])
 
-    response = client._parse_model_content('<tool_call>Read("nanocode.py", 0, 100)')
+    assert payload["reasoning_effort"] == "high"
+    assert "reasoning" not in payload
 
-    assert response["actions"] == []
-    assert "Native tool_call syntax is not supported" in response["_format_error"]
-    assert '"name":"Read"' in response["_format_error"]
-    assert '"args":["nanocode.py","0,100"]' in response["_format_error"]
 
+def test_agent_request_uses_configured_thinking_payload(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, _chat_response())
+    session = _session(
+        tmp_path,
+        api_url="https://example.test/v1",
+        api_key="key",
+        model="model",
+        reasoning="xhigh",
+        chat_reasoning="thinking",
+        stream=False,
+    )
 
-def test_agent_request_wraps_non_json_model_content_as_format_error(tmp_path, monkeypatch):
-    class FakeResponse:
-        def __enter__(self):
-            return self
+    Agent(session).request("system", "user")
+    payload = _sdk_payload(calls[0])
 
-        def __exit__(self, *args):
-            return None
+    assert payload["thinking"] == {"type": "enabled"}
+    assert payload["reasoning_effort"] == "max"
+    assert "reasoning" not in payload
 
-        def read(self):
-            return json.dumps({"choices": [{"message": {"content": "plain answer"}}], "usage": {}}).encode("utf-8")
 
-    def fake_urlopen(request, timeout):
-        return FakeResponse()
+def test_agent_request_uses_configured_thinking_disabled_payload(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, _chat_response())
+    session = _session(
+        tmp_path,
+        api_url="https://example.test/v1",
+        api_key="key",
+        model="model",
+        chat_reasoning="thinking",
+        stream=False,
+    )
+    session.config.provider.reasoning = "off"
 
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
-    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
+    Agent(session).request("system", "user")
+    payload = _sdk_payload(calls[0])
+
+    assert payload["thinking"] == {"type": "disabled"}
+    assert "reasoning_effort" not in payload
+
+
+def test_agent_request_auto_detects_chat_reasoning_from_provider_url(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, tuple(_chat_response() for _ in range(10)))
+
+    Agent(_session(tmp_path, api_url="https://api.deepseek.com", api_key="key", model="model", reasoning="xhigh", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://openrouter.ai/api/v1", api_key="key", model="model", api="chat", reasoning="high", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://dashscope.aliyuncs.com/compatible-mode/v1", api_key="key", model="qwen3.6-plus", api="chat", reasoning="high", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://dashscope.aliyuncs.com/compatible-mode/v1", api_key="key", model="deepseek-v4-flash", api="chat", reasoning="xhigh", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://dashscope.aliyuncs.com/compatible-mode/v1", api_key="key", model="glm-5.1", api="chat", reasoning="high", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://api.openai.com/v1", api_key="key", model="gpt-5", api="chat", reasoning="medium", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://opencode.ai/zen/go/v1", api_key="key", model="deepseek-v4-flash", reasoning="high", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://opencode.ai/zen/go/v1", api_key="key", model="kimi-k2.6", reasoning="high", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://not-openrouter.ai/api/v1", api_key="key", model="model", stream=False)).request("system", "user")
+    Agent(_session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)).request("system", "user")
+
+    payloads = [_sdk_payload(call) for call in calls]
+    assert payloads[0]["thinking"] == {"type": "enabled"}
+    assert payloads[0]["reasoning_effort"] == "max"
+    assert payloads[1]["reasoning"] == {"effort": "high"}
+    assert payloads[2]["enable_thinking"] is True
+    assert payloads[2]["thinking_budget"] == nanocode.CHAT_REASONING_EFFORT_VALUES["enable_thinking"]["high"]
+    assert payloads[3]["thinking"] == {"type": "enabled"}
+    assert payloads[3]["reasoning_effort"] == "max"
+    assert payloads[4]["model"] == "glm-5.1"
+    assert payloads[4]["messages"] == [{"role": "system", "content": "system"}, {"role": "user", "content": "user"}]
+    assert payloads[4]["stream"] is False
+    assert payloads[4]["prompt_cache_key"].startswith("nanocode-")
+    assert payloads[5]["reasoning_effort"] == "medium"
+    assert payloads[6]["reasoning"] == {"effort": "high"}
+    for payload in payloads[7:]:
+        assert "reasoning" not in payload
+        assert "reasoning_effort" not in payload
+        assert "thinking" not in payload
+        assert "enable_thinking" not in payload
+
+
+def test_provider_config_auto_resolves_api_and_chat_reasoning_from_profiles():
+    openai_provider = nanocode.ProviderConfig.from_dict({"url": "https://api.openai.com/v1", "api": "auto"})
+    openai_reasoning_provider = nanocode.ProviderConfig.from_dict({"url": "https://api.openai.com/v1", "api": "chat", "model": "gpt-5"})
+    openrouter_provider = nanocode.ProviderConfig.from_dict({"url": "https://openrouter.ai/api/v1", "api": "auto"})
+    opencode_deepseek_provider = nanocode.ProviderConfig.from_dict({"url": "https://opencode.ai/zen/go/v1", "api": "auto", "model": "deepseek-v4-flash"})
+    opencode_kimi_provider = nanocode.ProviderConfig.from_dict({"url": "https://opencode.ai/zen/go/v1", "api": "auto", "model": "kimi-k2.6"})
+    dashscope_provider = nanocode.ProviderConfig.from_dict({"url": "https://dashscope.aliyuncs.com/compatible-mode/v1", "api": "auto", "model": "qwen3.6-plus"})
+    dashscope_deepseek_provider = nanocode.ProviderConfig.from_dict({"url": "https://dashscope.aliyuncs.com/compatible-mode/v1", "api": "auto", "model": "deepseek-v4-flash"})
+    unknown_provider = nanocode.ProviderConfig.from_dict({"url": "https://example.test/v1", "api": "auto"})
+
+    assert openai_provider.resolved_api() == "responses"
+    assert openai_provider.resolved_chat_reasoning() == "off"
+    assert openai_reasoning_provider.resolved_api() == "chat"
+    assert openai_reasoning_provider.resolved_chat_reasoning() == "reasoning_effort"
+    assert openrouter_provider.resolved_api() == "responses"
+    assert openrouter_provider.resolved_chat_reasoning() == "reasoning"
+    assert opencode_deepseek_provider.resolved_api() == "chat"
+    assert opencode_deepseek_provider.resolved_chat_reasoning() == "reasoning"
+    assert opencode_kimi_provider.resolved_api() == "chat"
+    assert opencode_kimi_provider.resolved_chat_reasoning() == "off"
+    assert dashscope_provider.resolved_api() == "chat"
+    assert dashscope_provider.resolved_chat_reasoning() == "enable_thinking"
+    assert dashscope_deepseek_provider.resolved_api() == "chat"
+    assert dashscope_deepseek_provider.resolved_chat_reasoning() == "thinking"
+    assert unknown_provider.resolved_api() == "chat"
+    assert unknown_provider.resolved_chat_reasoning() == "off"
+
+
+def test_agent_request_off_chat_reasoning_disables_auto_detection(tmp_path, monkeypatch):
+    calls, _response_calls, _client_kwargs = _patch_openai(monkeypatch, _chat_response())
+    session = _session(
+        tmp_path,
+        api_url="https://api.deepseek.com",
+        api_key="key",
+        model="model",
+        stream=False,
+    )
+    session.config.provider.chat_reasoning = "off"
 
-    response = Agent(session).request("system", "user")
+    Agent(session).request("system", "user")
+    payload = _sdk_payload(calls[0])
 
-    assert response["actions"] == []
-    assert "expected one JSON action object or action frames ending with __END_ACTION__" in response["_format_error"]
-    assert "plain answer" in response["_format_error"]
+    assert "reasoning" not in payload
+    assert "reasoning_effort" not in payload
+    assert "thinking" not in payload
 
 
-def test_agent_request_rejects_invalid_unmarked_json_action_array(tmp_path):
-    client = Agent(Session(cwd=str(tmp_path))).model_client
+def test_agent_request_wraps_non_json_model_content_as_format_error(tmp_path, monkeypatch):
+    _patch_openai(monkeypatch, _chat_response("plain answer"))
+    session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
 
-    response = client._parse_model_content('[{"text":"ok"}]')
+    response = Agent(session).request("system", "user")
 
     assert response["actions"] == []
-    assert "action missing type" in response["_format_error"]
+    assert response["_assistant_text"] == "plain answer"
 
 
 def test_agent_request_wraps_missing_message_content_as_format_error(tmp_path, monkeypatch):
-    class FakeResponse:
-        def __enter__(self):
-            return self
-
-        def __exit__(self, *args):
-            return None
-
-        def read(self):
-            return json.dumps(
+    _patch_openai(
+        monkeypatch,
+        {
+            "choices": [
                 {
-                    "choices": [
-                        {
-                            "finish_reason": "stop",
-                            "message": {"role": "assistant", "content": None},
-                        }
-                    ],
-                    "usage": {},
+                    "finish_reason": "stop",
+                    "message": {"role": "assistant", "content": None},
                 }
-            ).encode("utf-8")
-
-    def fake_urlopen(request, timeout):
-        return FakeResponse()
-
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
+            ],
+            "usage": {},
+        },
+    )
     session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model", stream=False)
 
     response = Agent(session).request("system", "user")
 
     assert response["actions"] == []
-    assert "expected one JSON object" in response["_format_error"]
+    assert "expected a function tool call" in response["_format_error"]
     assert "API response missing message content" in response["_format_error"]
 
 
@@ -1819,52 +2103,6 @@ def test_agent_keeps_latest_500_known_items(tmp_path):
     assert agent.blackboard.known[-1] == "fact 500"
 
 
-def test_main_agent_applies_stable_knowledge_action(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    agent = Agent(session)
-
-    agent.apply_response(
-        {
-            "actions": [
-                {"type": "known", "items": ["Read pyproject.toml."]},
-                {
-                    "type": "stable_knowledge",
-                    "items": [
-                        {"category": "workflow", "text": "Project test command is make test."},
-                        {"category": "workflow", "text": "Project test command is make test."},
-                    ],
-                }
-            ]
-        }
-    )
-
-    assert agent.blackboard.known == ["Read pyproject.toml."]
-    assert agent.blackboard.stable_knowledge == {"workflow": ["Project test command is make test."]}
-    assert "  Stable_Knowledge\n" in agent.state_updater.latest_report
-    assert "    workflow\n" in agent.state_updater.latest_report
-    assert "      1. Project test command is make test." in agent.state_updater.latest_report
-
-
-def test_main_agent_keeps_latest_30_stable_knowledge_items_per_category(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    agent = Agent(session)
-
-    agent.apply_response(
-        {
-            "actions": [
-                {
-                    "type": "stable_knowledge",
-                    "items": [{"category": "workflow", "text": "stable fact " + str(index)} for index in range(31)],
-                }
-            ]
-        }
-    )
-
-    assert len(agent.blackboard.stable_knowledge["workflow"]) == 30
-    assert agent.blackboard.stable_knowledge["workflow"][0] == "stable fact 1"
-    assert agent.blackboard.stable_knowledge["workflow"][-1] == "stable fact 30"
-
-
 def test_main_agent_applies_user_rule_and_saves(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
@@ -1902,7 +2140,7 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
             return {
                 "actions": [
@@ -1931,7 +2169,7 @@ def test_main_agent_state_updates_show_in_debug(tmp_path):
     agent = Agent(_session(tmp_path, debug=True))
 
     class FakeModelClient:
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return {"actions": [{"type": "user_rule", "text": "Prompt-only changes do not need tests.", "message": "记住了。"}]}
 
     agent.model_client = FakeModelClient()
@@ -1939,7 +2177,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     agent.run("记住：prompt 改动不用测试", on_message=messages.append)
 
-    assert any(message.startswith("State Updated") for message in messages)
+    assert "User Rules Updated\n  updated" in messages
 
 
 def test_main_agent_state_updates_are_compact_without_debug(tmp_path):
@@ -1948,10 +2186,10 @@ def test_main_agent_state_updates_are_compact_without_debug(tmp_path):
     agent.apply_response(
         {
             "actions": [
+                {"type": "goal", "text": "inspect project", "complete": False},
                 {
-                    "type": "start",
-                    "goal": "inspect project",
-                    "plan": [
+                    "type": "plan",
+                    "items": [
                         {"id": "p1", "text": "List files", "status": "done"},
                         {"id": "p2", "text": "Read config", "status": "done"},
                         {"id": "p3", "text": "Update code", "status": "doing"},
@@ -1964,23 +2202,23 @@ def test_main_agent_state_updates_are_compact_without_debug(tmp_path):
     )
 
     report = agent.state_updater.compact_report()
-    assert report.startswith("Plan + Known Updated")
+    assert report.startswith("Goal + Plan + Facts Updated")
+    assert "\nGoal\n  inspect project\n" in report
     assert "\nPlan\n" in report
     assert "  ... 1 older\n  2. [✓ done] Read config\n  3. [◔ doing] Update code\n  4. [○ todo] Run tests" in report
-    assert "\nKnown\n" in report
+    assert "\nFacts\n" in report
     assert "  ... 1 older\n  2. fact two\n  3. fact three\n  4. fact four" in report
-    assert "inspect project" not in report
     assert "State Updated" not in report
 
 
-def test_main_agent_compact_report_labels_combined_hypotheses_and_known(tmp_path):
+def test_main_agent_compact_report_labels_combined_leads_and_facts(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
 
     agent.apply_response(
         {
             "actions": [
                 {
-                    "type": "hypothesis",
+                    "type": "lead",
                     "items": [{"id": "h1", "text": "admin selector starves history mode", "status": "active", "source": ["tr.2"]}],
                 },
                 {"type": "known", "items": [{"fact": "feed SSE request path is shared by admin and normal users", "source": ["tr.3"]}]},
@@ -1991,10 +2229,10 @@ def test_main_agent_compact_report_labels_combined_hypotheses_and_known(tmp_path
     report = agent.state_updater.compact_report()
     assert report == "\n".join(
         [
-            "Hypotheses + Known Updated",
-            "Hypotheses",
+            "Leads + Facts Updated",
+            "Leads",
             "  1. [active] h1: admin selector starves history mode [tr.2]",
-            "Known",
+            "Facts",
             "  1. [tr.3] feed SSE request path is shared by admin and normal users",
         ]
     )
@@ -2066,10 +2304,9 @@ def test_agent_state_report_only_includes_real_plan_and_known_changes(tmp_path):
 
     agent.apply_response(response)
 
-    assert "State Updated | VERIFY:idle" in agent.state_updater.latest_report
     assert "  Plan\n" in agent.state_updater.latest_report
     assert "    1. [○ todo] Inspect file" in agent.state_updater.latest_report
-    assert "  Known\n" in agent.state_updater.latest_report
+    assert "  Facts\n" in agent.state_updater.latest_report
     assert "    1. Search uses rg." in agent.state_updater.latest_report
 
     agent.apply_response(response)
@@ -2088,7 +2325,7 @@ def test_agent_ignores_empty_plan_replace(tmp_path):
     assert agent.state_updater.latest_report == ""
 
 
-def test_agent_treats_plan_without_mode_as_replace(tmp_path):
+def test_agent_patches_existing_plan_ids_without_mode(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
     agent.blackboard.plan = [
@@ -2100,6 +2337,43 @@ def test_agent_treats_plan_without_mode_as_replace(tmp_path):
     assert agent._build_response_context(response).has_fresh_plan_action is True
     agent.apply_response(response)
 
+    assert [item.text for item in agent.blackboard.plan] == ["Inspect new file", "Edit old file"]
+    assert agent.blackboard.plan[0].status == nanocode.PlanStatus.DOING
+
+
+def test_agent_explicit_plan_replace_discards_old_items(tmp_path):
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    agent.blackboard.plan = [
+        nanocode.PlanItem(id="p1", text="Inspect old file", status=nanocode.PlanStatus.DONE),
+        nanocode.PlanItem(id="p2", text="Edit old file", status=nanocode.PlanStatus.TODO),
+    ]
+
+    agent.apply_response({"actions": [{"type": "plan", "mode": "replace", "items": [{"id": "p1", "text": "Inspect new file", "status": "doing"}]}]})
+
+    assert [item.text for item in agent.blackboard.plan] == ["Inspect new file"]
+    assert agent.blackboard.plan[0].status == nanocode.PlanStatus.DOING
+
+
+def test_agent_replaces_plan_by_default_when_goal_changes(tmp_path):
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    agent.blackboard.goal = "old task"
+    agent.blackboard.plan = [
+        nanocode.PlanItem(id="p1", text="Inspect old file", status=nanocode.PlanStatus.DONE),
+        nanocode.PlanItem(id="p2", text="Edit old file", status=nanocode.PlanStatus.TODO),
+    ]
+
+    agent.apply_response(
+        {
+            "actions": [
+                {"type": "goal", "text": "new task", "complete": False},
+                {"type": "plan", "items": [{"id": "p1", "text": "Inspect new file", "status": "doing"}]},
+            ]
+        }
+    )
+
+    assert agent.blackboard.goal == "new task"
     assert [item.text for item in agent.blackboard.plan] == ["Inspect new file"]
     assert agent.blackboard.plan[0].status == nanocode.PlanStatus.DOING
 
@@ -2118,17 +2392,140 @@ def test_agent_applies_partial_plan_patch(tmp_path):
     ]
 
 
-def test_agent_applies_start_action_to_goal_and_plan(tmp_path):
+def test_agent_plan_items_track_followup_statuses(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    agent.apply_response(
+        {
+            "actions": [
+                {
+                    "type": "plan",
+                    "items": [
+                        {
+                            "id": "p1",
+                            "text": "Update dependency declaration",
+                            "status": "done",
+                            "context": "pyproject updated",
+                            "followup_action": {"status": "needed", "reason": "dependency change may require sync"},
+                            "followup_check": {"status": "done", "reason": "tests passed after edit"},
+                        }
+                    ],
+                }
+            ]
+        }
+    )
+
+    assert agent.blackboard.plan == [
+        nanocode.PlanItem(
+            id="p1",
+            text="Update dependency declaration",
+            status=nanocode.PlanStatus.DONE,
+            context="pyproject updated",
+            followup_action=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.NEEDED, "dependency change may require sync"),
+            followup_check=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.DONE, "tests passed after edit"),
+        )
+    ]
+    assert "followup_action: needed: dependency change may require sync" in agent.build_user_prompt()
+    assert "followup_check: done: tests passed after edit" in agent.build_user_prompt()
+    assert "followup_action: needed: dependency change may require sync" in agent.state_updater.latest_report
+    assert "followup_check: done: tests passed after edit" in agent.state_updater.latest_report
+
+
+def test_agent_completion_after_edit_requires_plan_followup_status(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
+    agent.blackboard.goal = "change sample"
+    agent.blackboard.goal_reached = True
+    agent.blackboard.plan = [nanocode.PlanItem(id="p1", text="edit sample", status=nanocode.PlanStatus.DONE, context="edited")]
+    agent.recent_edits = ["- sample.txt: edit sample"]
+    ctx = agent._build_response_context({"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]})
+
+    result = agent._finish_or_continue(ctx, None)
+
+    assert result.done is False
+    assert any("plan follow-up status missing" in error for error in agent.agent_feedback_errors)
+
+
+def test_agent_completion_after_edit_blocks_needed_plan_followup(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
+    agent.blackboard.goal = "change sample"
+    agent.blackboard.goal_reached = True
+    agent.blackboard.plan = [
+        nanocode.PlanItem(
+            id="p1",
+            text="edit sample",
+            status=nanocode.PlanStatus.DONE,
+            context="edited",
+            followup_action=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.NEEDED, "edit requires another file update"),
+            followup_check=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.DONE, "unit test passed"),
+        )
+    ]
+    agent.recent_edits = ["- sample.txt: edit sample"]
+    ctx = agent._build_response_context({"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]})
+
+    result = agent._finish_or_continue(ctx, None)
+
+    assert result.done is False
+    assert any("plan follow-up still needed" in error for error in agent.agent_feedback_errors)
+
+
+def test_agent_completion_after_edit_requires_plan_followup_reason(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
+    agent.blackboard.goal = "change sample"
+    agent.blackboard.goal_reached = True
+    agent.blackboard.plan = [
+        nanocode.PlanItem(
+            id="p1",
+            text="edit sample",
+            status=nanocode.PlanStatus.DONE,
+            context="edited",
+            followup_action=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.NONE),
+            followup_check=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.DONE),
+        )
+    ]
+    agent.recent_edits = ["- sample.txt: edit sample"]
+    ctx = agent._build_response_context({"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]})
+
+    result = agent._finish_or_continue(ctx, None)
+
+    assert result.done is False
+    assert any("plan follow-up reason missing" in error for error in agent.agent_feedback_errors)
+
+
+def test_agent_completion_after_edit_allows_resolved_plan_followup(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
+    agent.blackboard.goal = "change sample"
+    agent.blackboard.goal_reached = True
+    agent.blackboard.plan = [
+        nanocode.PlanItem(
+            id="p1",
+            text="edit sample",
+            status=nanocode.PlanStatus.DONE,
+            context="edited",
+            followup_action=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.NONE, "edit has no generated follow-up"),
+            followup_check=nanocode.PlanFollowup(nanocode.PlanFollowupStatus.DONE, "smoke test passed"),
+        )
+    ]
+    agent.recent_edits = ["- sample.txt: edit sample"]
+    ctx = agent._build_response_context({"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]})
+    messages = []
+
+    result = agent._finish_or_continue(ctx, messages.append)
+
+    assert result.done is True
+    assert messages == ["done"]
+
+
+def test_agent_applies_goal_and_plan_actions(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
     agent.apply_response(
         {
             "actions": [
+                {"type": "goal", "text": "change map", "complete": False},
                 {
-                    "type": "start",
-                    "goal": "change map",
-                    "plan": [
+                    "type": "plan",
+                    "items": [
                         {"id": "p1", "text": "Find map code", "status": "doing", "context": "need location"},
                         {"id": "p2", "text": "Edit map size", "status": "todo"},
                     ],
@@ -2146,7 +2543,21 @@ def test_agent_applies_start_action_to_goal_and_plan(tmp_path):
     assert "  Plan\n" in agent.state_updater.latest_report
 
 
-def test_start_new_goal_clears_task_local_kept_results_only(tmp_path):
+def test_agent_accepts_goal_without_plan_for_new_task(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
+    agent.blackboard.task_code = nanocode.TaskCode.NEW
+    messages = []
+
+    result = agent.handle_response({"actions": [{"type": "goal", "text": "change map", "complete": False}]}, on_message=messages.append)
+
+    assert result.done is False
+    assert agent.blackboard.goal == "change map"
+    assert agent.blackboard.task_code == nanocode.TaskCode.WORKING
+    assert agent.blackboard.plan == []
+    assert messages == ["Goal Updated\n  change map"]
+
+
+def test_new_goal_clears_task_local_kept_results_only(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     agent.blackboard.goal = "old goal"
     agent.tool_context.kept_results = ['- ok tool=Read args=["old.py"] key=tr.1\n  output:\nselected result']
@@ -2156,11 +2567,11 @@ def test_start_new_goal_clears_task_local_kept_results_only(tmp_path):
     agent.apply_response(
         {
             "actions": [
+                {"type": "goal", "text": "new goal", "complete": False},
                 {
-                    "type": "start",
-                    "goal": "new goal",
-                    "plan": [{"id": "p1", "text": "Inspect new target", "status": "doing"}],
-                }
+                    "type": "plan",
+                    "items": [{"id": "p1", "text": "Inspect new target", "status": "doing"}],
+                },
             ]
         }
     )
@@ -2171,7 +2582,7 @@ def test_start_new_goal_clears_task_local_kept_results_only(tmp_path):
     assert "recent.py" in _blocks_text(agent.tool_context.recent)
 
 
-def test_start_same_goal_keeps_task_local_tool_results(tmp_path):
+def test_same_goal_keeps_task_local_tool_results(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     agent.blackboard.goal = "same goal"
     agent.tool_context.kept_results = ['- ok tool=Read args=["old.py"] key=tr.1\n  output:\nselected result']
@@ -2180,11 +2591,11 @@ def test_start_same_goal_keeps_task_local_tool_results(tmp_path):
     agent.apply_response(
         {
             "actions": [
+                {"type": "goal", "text": "same goal", "complete": False},
                 {
-                    "type": "start",
-                    "goal": "same goal",
-                    "plan": [{"id": "p1", "text": "Continue current target", "status": "doing"}],
-                }
+                    "type": "plan",
+                    "items": [{"id": "p1", "text": "Continue current target", "status": "doing"}],
+                },
             ]
         }
     )
@@ -2202,11 +2613,11 @@ def test_agent_state_report_does_not_repeat_goal_for_restarted_task_when_text_ma
     agent.apply_response(
         {
             "actions": [
+                {"type": "goal", "text": "change map", "complete": False},
                 {
-                    "type": "start",
-                    "goal": "change map",
-                    "plan": [{"id": "p1", "text": "Find map code", "status": "doing"}],
-                }
+                    "type": "plan",
+                    "items": [{"id": "p1", "text": "Find map code", "status": "doing"}],
+                },
             ]
         }
     )
@@ -2215,55 +2626,44 @@ def test_agent_state_report_does_not_repeat_goal_for_restarted_task_when_text_ma
     assert "  Plan\n" in agent.state_updater.latest_report
 
 
-def test_agent_resets_verification_when_goal_changes(tmp_path):
+def test_agent_resets_checks_when_goal_changes(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
     agent.blackboard.goal = "old goal"
-    agent.blackboard.verification.goal = "old goal"
-    agent.blackboard.verification.status = VerificationStatus.DONE
-    agent.blackboard.verification.kind = "test"
-    agent.blackboard.verification.method = "old check"
-    agent.blackboard.verification.criteria = ["old criterion"]
-    agent.blackboard.verification.context = "old context"
+    agent.blackboard.checks.status = CheckStatus.PASSED
+    agent.blackboard.checks.method = "old check"
+    agent.blackboard.checks.context = "old context"
 
     agent.apply_response({"actions": [{"type": "goal", "text": "new goal", "complete": False}]})
 
     assert agent.blackboard.goal_reached is False
-    assert agent.blackboard.verification.goal == ""
-    assert agent.blackboard.verification.status == VerificationStatus.IDLE
-    assert agent.blackboard.verification.kind == ""
-    assert agent.blackboard.verification.method == ""
-    assert agent.blackboard.verification.criteria == []
-    assert agent.blackboard.verification.context == ""
+    assert agent.blackboard.checks.status == CheckStatus.IDLE
+    assert agent.blackboard.checks.method == ""
+    assert agent.blackboard.checks.context == ""
 
-    agent.apply_response(
-        {"actions": [{"type": "verify", "kind": "test", "method": "run tests", "criteria": ["tests pass"], "status": "passed", "context": "tests pass"}]}
-    )
+    agent.apply_response({"actions": [{"type": "verify", "method": "run tests", "status": "passed", "context": "tests pass"}]})
 
-    assert agent.blackboard.verification.goal == "new goal"
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
-    assert agent.blackboard.verification.kind == "test"
-    assert agent.blackboard.verification.method == "run tests"
-    assert agent.blackboard.verification.criteria == ["tests pass"]
-    assert agent.blackboard.verification.context == "tests pass"
+    assert agent.blackboard.checks.status == CheckStatus.PASSED
+    assert agent.blackboard.checks.method == "run tests"
+    assert agent.blackboard.checks.context == "tests pass"
 
     agent.apply_response({"actions": [{"type": "goal", "text": "new goal", "complete": True}]})
 
     assert agent.blackboard.goal_reached is True
 
 
-def test_agent_task_code_returns_to_working_after_verification_result(tmp_path):
+def test_agent_task_code_returns_to_working_after_checks_result(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
-    agent.blackboard.task_code = nanocode.TaskCode.VERIFYING
+    agent.blackboard.task_code = nanocode.TaskCode.CHECKING
 
     agent.apply_response({"actions": [{"type": "verify", "status": "passed", "context": "checked"}]})
 
     assert agent.blackboard.task_code == nanocode.TaskCode.WORKING
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
+    assert agent.blackboard.checks.status == CheckStatus.PASSED
 
 
-def test_agent_accepts_combined_verification_kind_and_ignores_pending(tmp_path):
+def test_agent_accepts_checks_result_and_ignores_pending(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
 
     agent.apply_response(
@@ -2271,27 +2671,23 @@ def test_agent_accepts_combined_verification_kind_and_ignores_pending(tmp_path):
             "actions": [
                 {
                     "type": "verify",
-                    "kind": "syntax_check+test",
                     "method": "check edit",
-                    "criteria": ["syntax passes", "tests pass"],
                     "status": "passed",
                 }
             ]
         }
     )
 
-    assert agent.blackboard.verification.kind == "syntax_check+test"
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
+    assert agent.blackboard.checks.status == CheckStatus.PASSED
+    assert agent.blackboard.checks.method == "check edit"
 
-    agent.blackboard.verification.reset()
+    agent.blackboard.checks.reset()
     result = agent.handle_response(
         {
             "actions": [
                 {
                     "type": "verify",
-                    "kind": "syntax_check+test",
                     "method": "check edit",
-                    "criteria": ["syntax passes", "tests pass"],
                     "status": "pending",
                 }
             ]
@@ -2299,8 +2695,7 @@ def test_agent_accepts_combined_verification_kind_and_ignores_pending(tmp_path):
     )
 
     assert result.done is False
-    assert agent.blackboard.verification.status == VerificationStatus.IDLE
-    assert agent.blackboard.verification.kind == ""
+    assert agent.blackboard.checks.status == CheckStatus.IDLE
     assert any('ignored verify status="pending"' in error for error in agent.agent_feedback_errors)
 
 
@@ -2310,14 +2705,15 @@ def test_agent_execute_tool_calls_requests_confirmation_for_edit_tools(tmp_path)
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
     confirmations = []
+    anchor = _read_anchors(session, "sample.txt")[0]
 
     latest = agent.execute_tool_calls(
-        [{"name": "Edit", "intention": "edit sample", "args": ["sample.txt", "old", "new"]}],
+        [{"name": "Edit", "intention": "edit sample", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]}],
         confirm=lambda call, tool: confirmations.append((call.executed, tool.preview())) or False,
     )
 
     assert confirmations
-    assert confirmations[0][0] == 'Edit("sample.txt", "old", "new")'
+    assert confirmations[0][0].startswith('Edit("sample.txt", ')
     assert "-old" in confirmations[0][1]
     assert "+new" in confirmations[0][1]
     assert "Cancelled: user refused" in latest
@@ -2329,9 +2725,10 @@ def test_agent_execute_tool_calls_records_refusal_reason(tmp_path):
     path.write_text("old\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
+    anchor = _read_anchors(session, "sample.txt")[0]
 
     latest = agent.execute_tool_calls(
-        [{"name": "Edit", "intention": "edit sample", "args": ["sample.txt", "old", "new"]}],
+        [{"name": "Edit", "intention": "edit sample", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]}],
         confirm=lambda call, tool: "please inspect tests first",
     )
 
@@ -2339,6 +2736,8 @@ def test_agent_execute_tool_calls_records_refusal_reason(tmp_path):
     assert path.read_text(encoding="utf-8") == "old\n"
     assert session.state.conversation == []
     assert os.path.isdir(session.tool_results_dir())
+    assert any("please inspect tests first" in error for error in agent.agent_feedback_errors)
+    assert "please inspect tests first" in agent.build_user_prompt()
 
 
 def test_agent_execute_tool_calls_stops_batch_after_refusal(tmp_path):
@@ -2346,10 +2745,11 @@ def test_agent_execute_tool_calls_stops_batch_after_refusal(tmp_path):
     path.write_text("old\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
+    anchor = _read_anchors(session, "sample.txt")[0]
 
     latest = agent.execute_tool_calls(
         [
-            {"name": "Edit", "intention": "edit sample", "args": ["sample.txt", "old", "new"]},
+            {"name": "Edit", "intention": "edit sample", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]},
             {"name": "Bash", "intention": "should not run", "args": ["touch should-not-exist"]},
         ],
         confirm=lambda call, tool: "use English question",
@@ -2395,12 +2795,12 @@ def test_agent_execute_tool_calls_rejects_failed_preview_before_confirmation(tmp
     confirmations = []
 
     latest = agent.execute_tool_calls(
-        [{"name": "ReplaceRange", "intention": "edit stale range", "args": ["sample.txt", "0", "1", "bad", "", "", "new"]}],
+        [{"name": "Edit", "intention": "edit stale range", "args": ["sample.txt", [{"op": "replace", "start": "0:abcdef", "end": "0:abcdef", "content": "new\n"}]]}],
         confirm=lambda call, tool: confirmations.append((call.executed, tool.preview())) or True,
     )
 
     assert confirmations == []
-    assert "ToolCallError: preview unavailable: fingerprint mismatch" in latest
+    assert "ToolCallError: preview unavailable: stale anchor" in latest
     assert path.read_text(encoding="utf-8") == "old\n"
 
 
@@ -2422,30 +2822,43 @@ def test_agent_execute_tool_calls_records_arg_errors_in_feedback(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
-    latest = agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": ["sample.txt", "bad", "1"]}])
+    latest = agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": ["sample.txt", "bad,1"]}])
 
-    assert "ToolCallError: invalid start: should be an integer" in latest
-    assert agent.agent_feedback_errors == [
-        'Error: tool call args invalid: tool=Read args=["sample.txt","bad","1"] -> ToolCallError: invalid start: should be an integer. Rule: use the tool signature exactly.'
-    ]
+    assert "ToolCallError: Read args error: invalid range token" in latest
+    assert len(agent.agent_feedback_errors) == 1
+    assert 'tool=Read args=["sample.txt","bad,1"]' in agent.agent_feedback_errors[0]
+    assert "invalid range token" in agent.agent_feedback_errors[0]
 
 
 def test_agent_execute_tool_calls_reports_arg_count_details(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
-    latest = agent.execute_tool_calls([{"name": "ReplaceRange", "intention": "bad edit", "args": ["sample.txt", "0", "1", "abc", "", ""]}])
+    latest = agent.execute_tool_calls([{"name": "Edit", "intention": "bad edit", "args": ["sample.txt", "0", "1"]}])
+
+    assert "ToolCallError: requires args: filepath, edits" in latest
+    assert "got 3 args, expected 2, extra: 1" in agent.agent_feedback_errors[0]
+    assert "use Edit(filepath, edits) with visible line anchors" in agent.agent_feedback_errors[0]
+
+
+def test_agent_drops_old_feedback_after_successful_tool_progress(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.agent_feedback_errors = [
+        "Error blocked: Invalid function-tool response: invalid tool arguments.",
+        "Error blocked: tool call args invalid: old bad call.",
+        "Warning blocked: state update-only turn; include frontier tool.",
+    ]
 
-    assert "ToolCallError: requires exactly 7 args" in latest
-    assert "got 6 args, expected 7, missing: content" in agent.agent_feedback_errors[0]
-    assert "use ReplaceRange for read ranges" in agent.agent_feedback_errors[0]
+    agent.handle_response({"actions": [{"type": "tool", "name": "List", "intention": "inspect root", "args": ["."]}]})
+
+    assert agent.agent_feedback_errors == []
 
 
 def test_tool_arg_error_does_not_force_observe(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
-    agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": ["sample.txt", "bad", "1"]}])
+    agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": ["sample.txt", "bad,1"]}])
 
     assert agent.mode == nanocode.AgentMode.ACT
     assert agent.agent_feedback_errors
@@ -2468,7 +2881,7 @@ def test_agent_blocks_repeated_identical_failed_tool_call(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
     _seed_plan(agent, "read sample")
-    action = {"type": "tool", "name": "Read", "intention": "bad range", "args": ["sample.txt", "bad", "1"]}
+    action = {"type": "tool", "name": "Read", "intention": "bad range", "args": ["sample.txt", "bad,1"]}
 
     agent.handle_response({"actions": [action]})
     agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "failed read has no useful result"}]})
@@ -2480,13 +2893,13 @@ def test_agent_blocks_repeated_identical_failed_tool_call(tmp_path):
     assert any("repeated failed tool call" in error for error in agent.agent_feedback_errors)
 
 
-def test_agent_execute_bash_does_not_require_verification(tmp_path):
+def test_agent_execute_bash_does_not_require_checks(tmp_path):
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
     agent.execute_tool_calls([{"name": "Bash", "intention": "run command", "args": ["true"]}], confirm=lambda call, tool: True)
 
-    assert agent.blackboard.verification_required is False
+    assert agent.blackboard.checks_required is False
 
 
 def test_agent_marks_nonzero_bash_exit_as_failed_tool_call(tmp_path):
@@ -2504,7 +2917,7 @@ def test_agent_execute_tool_calls_does_not_record_runtime_errors_in_feedback(tmp
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
 
-    latest = agent.execute_tool_calls([{"name": "Read", "intention": "missing file", "args": ["missing.txt", "0", "1"]}])
+    latest = agent.execute_tool_calls([{"name": "Read", "intention": "missing file", "args": ["missing.txt", "0,1"]}])
 
     assert "ToolCallError: " in latest
     assert agent.agent_feedback_errors == []
@@ -2528,23 +2941,24 @@ def test_agent_execute_tool_calls_shows_auto_approval_in_yolo_mode(tmp_path):
     agent = Agent(session)
     confirmations = []
     auto_approvals = []
+    anchor = _read_anchors(session, "sample.txt")[0]
 
     latest = agent.execute_tool_calls(
-        [{"name": "Edit", "intention": "edit sample", "args": ["sample.txt", "old", "new"]}],
+        [{"name": "Edit", "intention": "edit sample", "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]]}],
         confirm=lambda call, tool: confirmations.append(call.executed) or False,
         on_auto_approve=lambda call, tool: auto_approvals.append((call.executed, tool.preview())),
     )
 
     assert confirmations == []
     assert auto_approvals
-    assert auto_approvals[0][0] == 'Edit("sample.txt", "old", "new")'
+    assert auto_approvals[0][0].startswith('Edit("sample.txt", ')
     assert "-old" in auto_approvals[0][1]
     assert "+new" in auto_approvals[0][1]
     assert latest.startswith("- ok")
     assert path.read_text(encoding="utf-8") == "new\n"
-    assert agent.blackboard.verification_required is True
-    assert agent.blackboard.task_code == nanocode.TaskCode.VERIFYING
-    assert agent.runtime.recent_edits == ["- sample.txt: edit sample"]
+    assert agent.blackboard.checks_required is True
+    assert agent.blackboard.task_code == nanocode.TaskCode.CHECKING
+    assert agent.recent_edits == ["- sample.txt: edit sample"]
 
 
 def test_agent_run_loops_tool_results_into_next_model_prompt(tmp_path):
@@ -2555,7 +2969,7 @@ def __init__(self):
             self.user_prompts = []
             self.responses = [
                 {
-                    "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}]
+                    "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]
                 },
                 {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]},
                 {
@@ -2572,7 +2986,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -2586,7 +3000,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     response = agent.run("read sample", on_message=messages.append)
 
     assert response["actions"][-1]["message_for_complete"] == "done"
-    assert messages[0].startswith("[success] Read sample.txt 0:1 -> tr.1")
+    assert messages[0].startswith("[success] Read sample.txt 0,1 -> tr.1")
     assert "why:" not in messages[0]
     assert "log: .nanocode/sessions/" not in messages[0]
     assert messages[-1] == "done"
@@ -2595,93 +3009,146 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert "alpha" in fake_client.user_prompts[2]
     assert "Kept Tool Results:" in fake_client.user_prompts[2]
     assert "<ReadToolResult>" in fake_client.user_prompts[2]
-    assert 'tool=Read args=["sample.txt","0","1"]' in _blocks_text(agent.tool_context.latest)
+    assert 'tool=Read args=["sample.txt","0,1"]' in _blocks_text(agent.tool_context.latest)
     assert agent.tool_context.recent == []
     assert agent.blackboard.known == ["Read sample.txt and found alpha."]
     assert agent.blackboard.user_input == "read sample"
     assert agent.blackboard.goal == "read sample"
     assert agent.blackboard.plan == [nanocode.PlanItem(text="test plan", status=nanocode.PlanStatus.DONE, context="seeded")]
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
+    assert agent.blackboard.checks.status == CheckStatus.PASSED
     assert agent.blackboard.goal_reached is False
-    assert agent.blackboard.verification_required is False
+    assert agent.blackboard.checks_required is False
+
+
+def test_agent_run_ingests_queued_user_input_before_next_model_call(tmp_path):
+    class FakeModelClient:
+        def __init__(self):
+            self.user_prompts = []
+            self.responses = [
+                {"actions": [{"type": "goal", "text": "initial task"}]},
+                {"actions": [{"type": "known", "items": ["queued feedback was visible"]}]},
+                {"actions": [{"type": "goal", "complete": True, "message_for_complete": "done"}]},
+            ]
+
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            self.user_prompts.append(user_prompt)
+            return self.responses.pop(0)
 
+    queued_inputs = [None, "use chinese", None]
+    messages = []
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.model_client = FakeModelClient()
 
-def test_agent_plan_mode_tool_gate_allows_only_readonly_tools(tmp_path):
-    agent = Agent(_session(tmp_path, plan_mode=True))
+    response = agent.run("initial task", on_message=messages.append, poll_user_input=lambda: queued_inputs.pop(0) if queued_inputs else None)
 
-    assert agent._plan_mode_tool_error([{"type": "tool", "name": "Read", "args": ["sample.txt"]}]) == ""
-    assert agent._plan_mode_tool_error([{"type": "tool", "name": "Git", "args": ["status", "--short"]}]) == ""
-    assert "blocked tool=Bash" in agent._plan_mode_tool_error([{"type": "tool", "name": "Bash", "args": ["echo hi"]}])
-    assert "blocked tool=Edit" in agent._plan_mode_tool_error([{"type": "tool", "name": "Edit", "args": ["sample.txt", "old", "new"]}])
-    assert "blocked tool=Git" in agent._plan_mode_tool_error([{"type": "tool", "name": "Git", "args": ["commit", "-m", "x"]}])
-    assert "blocked tool=Lsp" in agent._plan_mode_tool_error([{"type": "tool", "name": "Lsp", "args": ["symbols"]}])
+    assert response["actions"][0]["message_for_complete"] == "done"
+    assert messages == ["Goal Updated\n  initial task", "sent: use chinese", "Facts Updated\n  1. queued feedback was visible", "done"]
+    assert [item.content for item in agent.session.state.conversation if isinstance(item, nanocode.UserMessage)] == ["initial task", "use chinese"]
+    assert agent.blackboard.user_input == "use chinese"
+    assert "use chinese" not in agent.model_client.user_prompts[0]
+    assert "use chinese" in agent.model_client.user_prompts[1]
+    assert "Pending User Feedback:\nuse chinese" in agent.model_client.user_prompts[1]
+    assert "Pending User Feedback:\n(empty)" in agent.model_client.user_prompts[2]
+    assert "Latest User Request:" in agent.model_client.user_prompts[1]
 
 
-def test_agent_plan_mode_rejects_mutating_tool_before_execution(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("old\n", encoding="utf-8")
-    agent = Agent(_session(tmp_path, plan_mode=True, debug=True))
-    _seed_plan(agent, "plan change")
+def test_agent_rejects_invalid_action_instead_of_completing(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
     messages = []
 
-    result = agent.handle_response(
-        {"actions": [{"type": "tool", "name": "Edit", "intention": "change sample", "args": ["sample.txt", "old", "new"]}]},
-        confirm=lambda call, tool: True,
-        on_message=messages.append,
-    )
+    result = agent.handle_response({"actions": [{"type": "invalid", "text": "done"}]}, on_message=messages.append)
 
     assert result.done is False
-    assert path.read_text(encoding="utf-8") == "old\n"
-    assert agent.tool_runner.latest_executions == []
-    assert messages == ['PlanMode_Gate: plan mode allows readonly discovery only; blocked tool=Edit args=["sample.txt","old","new"].']
+    assert agent.session.state.conversation == []
+    assert messages == ["Protocol_Gate: invalid action type(s): invalid."]
+
+
+def test_agent_normalizes_protocol_action_type_case(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+
+    ctx = agent._build_response_context(
+        {
+            "actions": [
+                {"type": "Goal", "text": "change map", "complete": False},
+                {"type": "PLAN", "items": ["inspect files"]},
+                {"type": "Known", "items": ["fact"]},
+                {"type": "LEAD", "items": ["branch"]},
+                {"type": "Verify", "status": "passed", "context": "checked"},
+                {"type": "USER_RULE", "text": "prefer concise", "message": "saved"},
+                {"type": "FORGET", "source": ["tr.1"], "reason": "old"},
+                {"type": "KEEP", "source": ["tr.2"], "reason": "useful"},
+                {"type": "Tool", "name": "search", "intention": "find", "args": ["needle"]},
+            ]
+        }
+    )
+
+    assert [action["type"] for action in ctx.actions] == ["goal", "plan", "known", "lead", "verify", "user_rule", "forget", "keep", "tool"]
 
 
-def test_agent_plan_mode_rejects_chat_instead_of_completing(tmp_path):
-    agent = Agent(_session(tmp_path, plan_mode=True, debug=True))
+def test_agent_accepts_capitalized_goal_action_type(tmp_path):
+    agent = Agent(_session(tmp_path, debug=True))
     messages = []
 
-    result = agent.handle_response({"actions": [{"type": "chat", "text": "done"}]}, on_message=messages.append)
+    result = agent.handle_response({"actions": [{"type": "Goal", "text": "change map", "complete": False}]}, on_message=messages.append)
 
     assert result.done is False
-    assert agent.session.state.conversation == []
-    assert messages == ["ActionType_Gate: invalid action type(s): chat."]
+    assert agent.blackboard.goal == "change map"
+    assert not any("Protocol_Gate" in message for message in messages)
 
 
-def test_agent_plan_mode_stores_proposed_plan_completion(tmp_path):
-    agent = Agent(_session(tmp_path, plan_mode=True))
-    _seed_plan(agent, "plan change")
-    message = "<proposed_plan>\n1. Inspect target.\n2. Patch code.\n3. Run tests.\n</proposed_plan>"
+def test_agent_normalizes_direct_repo_tool_action_type(tmp_path):
+    path = tmp_path / "sample.txt"
+    path.write_text("old\n", encoding="utf-8")
+    agent = Agent(_session(tmp_path, debug=True))
+    _seed_plan(agent, "change sample")
+    messages = []
+    anchor = _read_anchors(agent.session, "sample.txt")[0]
 
-    result = agent.handle_response({"actions": [{"type": "goal", "text": "plan change", "complete": True, "message_for_complete": message}]})
+    result = agent.handle_response(
+        {
+            "actions": [
+                {
+                    "type": "Edit",
+                    "intention": "change sample",
+                    "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]],
+                }
+            ]
+        },
+        confirm=lambda call, tool: True,
+        on_message=messages.append,
+    )
 
-    assert result.done is True
-    assert isinstance(agent.session.state.conversation[-1], nanocode.AssistantMessage)
-    assert agent.session.state.conversation[-1].content == message
+    assert result.done is False
+    assert path.read_text(encoding="utf-8") == "new\n"
+    assert agent.tool_runner.latest_executions[0].call.name == "Edit"
+    assert not any("Protocol_Gate" in message for message in messages)
 
 
-def test_agent_plan_mode_requires_proposed_plan_completion_block(tmp_path):
-    agent = Agent(_session(tmp_path, plan_mode=True, debug=True))
-    _seed_plan(agent, "plan change")
+def test_agent_normalizes_lowercase_repo_tool_names(tmp_path):
+    path = tmp_path / "sample.txt"
+    path.write_text("needle\n", encoding="utf-8")
+    agent = Agent(_session(tmp_path, debug=True))
+    _seed_plan(agent, "find sample")
     messages = []
 
     result = agent.handle_response(
-        {"actions": [{"type": "goal", "text": "plan change", "complete": True, "message_for_complete": "plain plan"}]},
+        {"actions": [{"type": "search", "intention": "find sample", "args": ["needle", "sample.txt"]}]},
         on_message=messages.append,
     )
 
     assert result.done is False
-    assert not agent.session.state.conversation
-    assert messages == ["PlanMode_Gate: final plan must be wrapped in <proposed_plan>...</proposed_plan>."]
+    assert agent.tool_runner.latest_executions[0].call.name == "Search"
+    assert not any("Protocol_Gate" in message for message in messages)
 
 
-def test_agent_run_allows_readonly_answer_without_verification(tmp_path):
+def test_agent_run_allows_readonly_answer_without_checks(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
 
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
-                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}]},
+                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]},
                 {
                     "actions": [
                         {"type": "goal", "text": "answer sample", "complete": True, "message_for_complete": "sample contains alpha"},
@@ -2689,7 +3156,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -2700,12 +3167,14 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     response = agent.run("answer sample", on_message=messages.append)
 
     assert response["actions"][-1]["message_for_complete"] == "sample contains alpha"
-    assert "Retrying: verification must pass before completion." not in messages
+    assert "Retrying: checks must pass before completion." not in messages
     assert messages[-1] == "sample contains alpha"
 
 
-def test_agent_run_executes_edit_tool_and_requires_verification(tmp_path):
+def test_agent_run_executes_edit_tool_and_requires_checks(tmp_path):
     (tmp_path / "sample.txt").write_text("old\n", encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+    anchor = _read_anchors(session, "sample.txt")[0]
 
     class FakeModelClient:
         def __init__(self):
@@ -2718,27 +3187,35 @@ def __init__(self):
                             "type": "tool",
                             "name": "Edit",
                             "intention": "change sample text",
-                            "args": ["sample.txt", "old", "new"],
+                            "args": ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "new\n"}]],
                         },
                     ]
                 },
                 {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]},
                 {"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]},
-                {"actions": [{"type": "tool", "name": "Read", "intention": "inspect changed sample", "args": ["sample.txt", "0", "1"]}]},
-                {"actions": [{"type": "keep", "source": ["tr.2"], "reason": "keep useful result"}]},
                 {
                     "actions": [
-                        {"type": "verify", "kind": "change_check", "method": "Read sample.txt", "criteria": ["sample text is new"], "status": "passed", "context": "sample.txt contains new"},
+                        {
+                            "type": "plan",
+                            "items": [
+                                {
+                                    "text": "test plan",
+                                    "status": "done",
+                                    "context": "seeded",
+                                    "followup_action": {"status": "none", "reason": "seeded plan has no follow-up action"},
+                                    "followup_check": {"status": "done", "reason": "seeded plan check complete"},
+                                }
+                            ],
+                        },
                         {"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"},
                     ]
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
-    session = Session(cwd=str(tmp_path))
     agent = Agent(session)
     _seed_plan(agent, "change sample")
     agent.model_client = FakeModelClient()
@@ -2747,50 +3224,68 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     response = agent.run("change sample", confirm=lambda call, tool: True, on_message=messages.append)
 
     assert response["actions"][-1]["message_for_complete"] == "done"
-    assert any(message.startswith("[success] Edit sample.txt") for message in messages)
-    assert any(message.startswith("[success] Read sample.txt") for message in messages)
+    assert any(message.startswith("[success] Edit sample.txt 1 edits") for message in messages)
     assert not any(message.startswith("State Updated") for message in messages)
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
-    assert agent.blackboard.verification.context == "sample.txt contains new"
+    assert any("edited files need Checks before completion" in error for error in agent.agent_feedback_errors)
     assert (tmp_path / "sample.txt").read_text(encoding="utf-8") == "new\n"
     assert messages[-1] == "done"
 
 
-def test_agent_reports_edit_verification_gate_in_debug(tmp_path):
+def test_agent_warns_but_allows_completion_when_checks_required(tmp_path):
     agent = Agent(_session(tmp_path, debug=True))
     _seed_plan(agent, "change sample")
     agent.blackboard.goal_reached = True
-    agent.blackboard.verification_required = True
-    agent.blackboard.verification.status = VerificationStatus.REQUIRED
+    agent.blackboard.checks_required = True
+    agent.blackboard.checks.status = CheckStatus.REQUIRED
     ctx = agent._build_response_context({"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]})
     messages = []
 
     result = agent._finish_or_continue(ctx, messages.append)
 
-    assert result.done is False
-    assert messages == ["Verification_Gate: edit completion requires verification."]
+    assert result.done is True
+    assert messages == ["done"]
+    assert agent.agent_feedback_errors == [
+        'Warning blocked: edited files need Checks before completion. Next: run checks, then report verify status="passed"|"failed"|"blocked".'
+    ]
+
+
+def test_agent_plain_text_can_finish_without_active_task_when_checks_required(tmp_path):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.checks_required = True
+    agent.blackboard.checks.status = CheckStatus.REQUIRED
+    agent.blackboard.task_code = nanocode.TaskCode.NEW
+    ctx = agent._build_response_context({"actions": [], "_assistant_text": "Done."})
+    messages = []
+
+    result = agent._handle_text_response(ctx, messages.append)
+
+    assert result is not None
+    assert result.done is True
+    assert agent.blackboard.task_code == nanocode.TaskCode.DONE
+    assert agent.agent_feedback_errors == []
+    assert messages == ["Done."]
 
 
-def test_agent_run_keeps_tool_results_when_format_retry_happens(tmp_path):
+def test_agent_run_keeps_tool_results_when_format_retry_happens(tmp_path, monkeypatch):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
 
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
-                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}]},
-                {"_format_error": "Invalid model output: plain answer", "actions": []},
+                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]},
+                {"_format_error": "Invalid function-tool response: plain answer", "actions": []},
                 {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]},
                 {"actions": _final_actions("read sample")},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
-    agent.OBSERVE_AFTER_PENDING_RESULT_COUNT = 1
+    _set_context_budget(monkeypatch, agent, observe_after_results=1)
     _seed_plan(agent, "read sample")
     agent.model_client = FakeModelClient()
 
@@ -2802,7 +3297,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert "<ReadToolResult>" in agent.model_client.user_prompts[2]
     assert "Kept Tool Results:" in agent.model_client.user_prompts[3]
     assert "<ReadToolResult>" in agent.model_client.user_prompts[3]
-    assert 'tool=Read args=["sample.txt","0","1"]' in _blocks_text(agent.tool_context.latest)
+    assert 'tool=Read args=["sample.txt","0,1"]' in _blocks_text(agent.tool_context.latest)
     assert agent.tool_context.recent == []
 
 
@@ -2815,7 +3310,7 @@ def __init__(self):
             self.responses = [
                 {
                     "actions": [
-                        {"type": "tool", "name": "Read", "intention": f"read {index}", "args": [f"sample-{index}.txt", "0", "1"]}
+                        {"type": "tool", "name": "Read", "intention": f"read {index}", "args": [f"sample-{index}.txt", "0,1"]}
                         for index in range(51)
                     ]
                 },
@@ -2823,7 +3318,7 @@ def __init__(self):
                 {"actions": _final_actions("read samples")},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -2831,7 +3326,6 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     agent.blackboard.goal = "read samples"
     agent.blackboard.plan = [nanocode.PlanItem(text="try answer", status=nanocode.PlanStatus.DONE, context="seeded")]
     agent.blackboard.known = ["keep this fact"]
-    agent.blackboard.stable_knowledge = {"workflow": ["Project test command is make test."]}
     agent.tool_context.latest = ["old tool call"]
     agent.model_client = FakeModelClient()
 
@@ -2840,7 +3334,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert len(session.state.tool_result_store) == 51
     assert list(session.state.tool_result_store)[0] == "tr.1"
 
-    agent.model_client.responses = [{"actions": [{"type": "chat", "text": "ok"}]}]
+    agent.model_client.responses = [{"actions": [], "_assistant_text": "ok"}]
     agent.run("next task")
 
     assert len(session.state.tool_result_store) == 50
@@ -2850,8 +3344,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.blackboard.goal == "read samples"
     assert agent.blackboard.plan == [nanocode.PlanItem(text="try answer", status=nanocode.PlanStatus.DONE, context="seeded")]
     assert agent.blackboard.known == ["keep this fact"]
-    assert agent.blackboard.stable_knowledge == {"workflow": ["Project test command is make test."]}
-    assert agent.blackboard.verification.status == VerificationStatus.IDLE
+    assert agent.blackboard.checks.status == CheckStatus.IDLE
     assert agent.blackboard.goal_reached is False
 
 
@@ -2862,12 +3355,12 @@ class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
-                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}]},
+                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]},
                 {"actions": [{"type": "forget", "source": ["tr.1"], "reason": "sample content is not needed"}]},
                 {"actions": _final_actions("read sample", "done too early")},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -2896,7 +3389,7 @@ def __init__(self):
                 {
                     "actions": [
                         {"type": "goal", "text": "read sample", "complete": False},
-                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]},
+                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]},
                     ]
                 },
                 {
@@ -2907,7 +3400,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -2924,7 +3417,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert [item.text for item in agent.blackboard.plan] == ["Read sample"]
 
 
-def test_agent_run_requires_fresh_plan_when_goal_changes(tmp_path):
+def test_agent_run_allows_readonly_discovery_when_goal_changes_before_plan(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
 
     class FakeModelClient:
@@ -2933,23 +3426,23 @@ def __init__(self):
                 {
                     "actions": [
                         {"type": "goal", "text": "new goal", "complete": False},
-                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]},
+                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]},
                     ]
                 },
                 {
                     "actions": [
                         {"type": "goal", "text": "new goal", "complete": False},
-                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]},
+                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]},
                     ]
                 },
                 {
                     "actions": [
+                        {"type": "goal", "text": "new goal", "complete": False},
                         {
-                            "type": "start",
-                            "goal": "new goal",
-                            "plan": [{"id": "p1", "text": "Read sample", "status": "doing"}],
+                            "type": "plan",
+                            "items": [{"id": "p1", "text": "Read sample", "status": "doing"}],
                         },
-                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]},
+                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]},
                     ]
                 },
                 {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]},
@@ -2961,7 +3454,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -2977,7 +3470,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert "Retrying: new goal requires a fresh plan." not in messages
     assert agent.blackboard.goal == "new goal"
     assert [item.text for item in agent.blackboard.plan] == ["Read sample"]
-    assert len(session.state.tool_result_store) == 1
+    assert len(session.state.tool_result_store) == 3
 
 
 def test_agent_run_requires_task_alignment_before_work_with_old_context(tmp_path):
@@ -2986,15 +3479,15 @@ def test_agent_run_requires_task_alignment_before_work_with_old_context(tmp_path
     class FakeModelClient:
         def __init__(self):
             self.responses = [
-                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}]},
+                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]},
                 {
                     "actions": [
+                        {"type": "goal", "text": "run lint", "complete": False},
                         {
-                            "type": "start",
-                            "goal": "run lint",
-                            "plan": [{"id": "p1", "text": "Read sample", "status": "doing"}],
+                            "type": "plan",
+                            "items": [{"id": "p1", "text": "Read sample", "status": "doing"}],
                         },
-                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]},
+                        {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]},
                     ]
                 },
                 {
@@ -3005,7 +3498,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -3021,10 +3514,9 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.blackboard.goal == "run lint"
     assert [item.text for item in agent.blackboard.plan] == ["Read sample"]
     assert "previous task context is still present" in " ".join(agent.agent_feedback_errors)
-    assert not any("repeated start is invalid" in error for error in agent.agent_feedback_errors)
 
 
-def test_agent_run_rejects_repeated_start_after_task_is_working(tmp_path):
+def test_agent_run_warns_on_goal_rewrite_after_task_is_working(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
 
     class FakeModelClient:
@@ -3033,33 +3525,25 @@ def __init__(self):
             self.responses = [
                 {
                     "actions": [
+                        {"type": "goal", "text": "read sample", "complete": False},
                         {
-                            "type": "start",
-                            "goal": "read sample",
-                            "plan": [{"id": "p1", "text": "Read sample", "status": "doing"}],
-                        }
+                            "type": "plan",
+                            "items": [{"id": "p1", "text": "Read sample", "status": "doing"}],
+                        },
                     ]
                 },
+                {"actions": [{"type": "goal", "text": "read sample again", "complete": False}]},
+                {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]},
+                {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]},
                 {
                     "actions": [
-                        {
-                            "type": "start",
-                            "goal": "read sample again",
-                            "plan": [{"id": "p1", "text": "Read sample again", "status": "doing"}],
-                        }
-                    ]
-                    },
-                    {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}]},
-                    {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]},
-                    {
-                        "actions": [
-                            {"type": "plan", "items": [{"id": "p1", "text": "Read sample", "status": "done", "context": "read sample.txt"}]},
+                        {"type": "plan", "items": [{"id": "p1", "text": "Read sample", "status": "done", "context": "read sample.txt"}]},
                         *_final_actions("read sample"),
                     ]
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3069,10 +3553,9 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     response = agent.run("read sample")
 
     assert response["actions"][-1]["message_for_complete"] == "done"
-    assert agent.blackboard.goal == "read sample"
     assert [item.text for item in agent.blackboard.plan] == ["Read sample"]
     assert len(agent.tool_runner.latest_executions) == 1
-    assert "ignored repeated start" in " ".join(agent.agent_feedback_errors)
+    assert "rewrote Goal after the task was active" in " ".join(agent.agent_feedback_errors)
 
 
 def test_agent_allows_plan_with_multiple_doing_items(tmp_path):
@@ -3083,10 +3566,10 @@ def test_agent_allows_plan_with_multiple_doing_items(tmp_path):
     result = agent.handle_response(
         {
             "actions": [
+                {"type": "goal", "text": "answer", "complete": False},
                 {
-                    "type": "start",
-                    "goal": "answer",
-                    "plan": [
+                    "type": "plan",
+                    "items": [
                         {"id": "p1", "text": "first", "status": "doing"},
                         {"id": "p2", "text": "second", "status": "doing"},
                     ],
@@ -3101,7 +3584,7 @@ def test_agent_allows_plan_with_multiple_doing_items(tmp_path):
     assert agent.agent_feedback_errors == []
 
 
-def test_agent_rejects_goal_rewrite_after_task_is_working(tmp_path):
+def test_agent_warns_but_keeps_goal_rewrite_after_task_is_working(tmp_path):
     agent = Agent(Session(cwd=str(tmp_path)))
     agent.blackboard.task_code = nanocode.TaskCode.WORKING
     agent.blackboard.goal = "read sample"
@@ -3110,9 +3593,9 @@ def test_agent_rejects_goal_rewrite_after_task_is_working(tmp_path):
     result = agent.handle_response({"actions": [{"type": "goal", "text": "read sample again", "complete": False}]})
 
     assert result.done is False
-    assert agent.blackboard.goal == "read sample"
-    assert [item.text for item in agent.blackboard.plan] == ["Read sample"]
-    assert "cannot rewrite Goal" in " ".join(agent.agent_feedback_errors)
+    assert agent.blackboard.goal == "read sample again"
+    assert agent.blackboard.plan == []
+    assert "rewrote Goal after the task was active" in " ".join(agent.agent_feedback_errors)
 
 
 def test_agent_run_continues_when_no_tool_calls_and_goal_not_reached(tmp_path):
@@ -3124,7 +3607,7 @@ def __init__(self):
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3142,14 +3625,14 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert not any(message.startswith("State Updated") for message in messages)
 
 
-def test_agent_run_stops_after_chat_action(tmp_path):
+def test_agent_run_stops_after_assistant_text(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
-            return {"actions": [{"type": "chat", "text": "你好"}]}
+            return {"actions": [], "_assistant_text": "你好"}
 
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
@@ -3159,7 +3642,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     response = agent.run("你好", on_message=messages.append)
 
-    assert response["actions"] == [{"type": "chat", "text": "你好"}]
+    assert response == {"actions": [], "_assistant_text": "你好"}
     assert messages == ["你好"]
     assert len(agent.model_client.user_prompts) == 1
     assert agent.blackboard.task_code == nanocode.TaskCode.DONE
@@ -3173,7 +3656,7 @@ def __init__(self):
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -3196,7 +3679,7 @@ def __init__(self):
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -3211,6 +3694,17 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert any("state update-only turn" in error for error in agent.agent_feedback_errors)
 
 
+def test_agent_warns_when_discovery_runs_long_without_plan(tmp_path, monkeypatch):
+    agent = Agent(Session(cwd=str(tmp_path)))
+    agent.blackboard.goal = "investigate"
+    _set_context_budget(monkeypatch, agent, planless_discovery_tool_calls=2)
+
+    agent.handle_response({"actions": [{"type": "tool", "name": "List", "intention": "inspect root", "args": ["."]}]})
+    agent.handle_response({"actions": [{"type": "tool", "name": "List", "intention": "inspect root again", "args": ["."]}]})
+
+    assert any("Plan is empty after discovery" in error for error in agent.agent_feedback_errors)
+
+
 def test_agent_run_reports_continuation_only_when_no_actions(tmp_path):
     class FakeModelClient:
         def __init__(self):
@@ -3219,7 +3713,7 @@ def __init__(self):
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -3234,7 +3728,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert "Continuing: assistant must set current task's goal." not in messages
 
 
-def test_agent_run_retries_when_verification_done_without_goal_complete(tmp_path):
+def test_agent_run_retries_when_checks_done_without_goal_complete(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
@@ -3249,7 +3743,7 @@ def __init__(self):
                 {"actions": _final_actions("change file")},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3263,11 +3757,11 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     assert response["actions"][-1]["message_for_complete"] == "done"
     assert len(agent.model_client.user_prompts) == 3
-    assert "Retrying: verification is done but goal is not complete." not in messages
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
+    assert "Retrying: checks is done but goal is not complete." not in messages
+    assert agent.blackboard.checks.status == CheckStatus.PASSED
 
 
-def test_agent_run_retries_when_plan_complete_without_verification(tmp_path):
+def test_agent_run_retries_when_plan_complete_without_checks(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
@@ -3284,9 +3778,7 @@ def __init__(self):
                     "actions": [
                         {
                             "type": "verify",
-                            "kind": "test",
                             "method": "pytest",
-                            "criteria": ["tests pass"],
                             "status": "passed",
                             "context": "tests passed",
                         }
@@ -3295,7 +3787,7 @@ def __init__(self):
                 {"actions": _final_actions("change file")},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3309,56 +3801,22 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     assert response["actions"][-1]["message_for_complete"] == "done"
     assert len(agent.model_client.user_prompts) == 3
-    assert any("Plan is complete but verification is not recorded" in error for error in agent.agent_feedback_errors)
-    assert agent.blackboard.verification.status == VerificationStatus.DONE
-
-
-def test_agent_run_retries_noop_state_only_response(tmp_path):
-    class FakeModelClient:
-        def __init__(self):
-            self.user_prompts = []
-            self.responses = [
-                {"actions": [{"type": "plan", "mode": "patch", "items": [{"id": "p1", "status": "doing"}]}]},
-                {"actions": [{"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0", "1"]}]},
-                {"actions": [{"type": "forget", "source": ["tr.1"], "reason": "read result is not needed"}]},
-                {
-                    "actions": [
-                        {"type": "plan", "mode": "patch", "items": [{"id": "p1", "status": "done", "context": "sample inspected"}]},
-                        {"type": "verify", "status": "passed", "context": "no code change"},
-                        {"type": "goal", "text": "inspect sample", "complete": True, "message_for_complete": "done"},
-                    ]
-                },
-            ]
-
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
-            self.user_prompts.append(user_prompt)
-            return self.responses.pop(0)
-
-    (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    agent = Agent(session)
-    agent.blackboard.goal = "inspect sample"
-    agent.blackboard.plan = [nanocode.PlanItem(id="p1", text="inspect sample", status=nanocode.PlanStatus.DOING)]
-    agent.model_client = FakeModelClient()
-
-    response = agent.run("inspect sample")
-
-    assert response["actions"][-1]["message_for_complete"] == "done"
-    assert any("response made no effective state change" in error for error in agent.agent_feedback_errors)
+    assert any("Plan is complete but Checks are not recorded" in error for error in agent.agent_feedback_errors)
+    assert agent.blackboard.checks.status == CheckStatus.PASSED
 
 
-def test_agent_allows_tool_after_completed_plan_and_verification(tmp_path):
+def test_agent_allows_tool_after_completed_plan_and_checks(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(_session(tmp_path, debug=True))
     _seed_plan(agent, "inspect")
-    agent.blackboard.verification.status = VerificationStatus.DONE
-    agent.blackboard.verification.context = "syntax check passed"
+    agent.blackboard.checks.status = CheckStatus.PASSED
+    agent.blackboard.checks.context = "syntax check passed"
     messages = []
 
     result = agent.handle_response(
         {
             "actions": [
-                {"type": "tool", "name": "Read", "intention": "inspect again", "args": ["sample.txt", "0", "1"]}
+                {"type": "tool", "name": "Read", "intention": "inspect again", "args": ["sample.txt", "0,1"]}
             ]
         },
         on_message=messages.append,
@@ -3367,16 +3825,16 @@ def test_agent_allows_tool_after_completed_plan_and_verification(tmp_path):
     assert result.done is False
     assert len(agent.tool_runner.latest_executions) == 1
     assert agent.tool_runner.latest_executions[0].outcome == "success"
-    assert not any("Completion_Gate: completed plan and verification" in message for message in messages)
-    assert any("Plan and verification are complete" in error for error in agent.agent_feedback_errors)
+    assert not any("Completion_Gate: completed plan and Checks" in message for message in messages)
+    assert any("Plan and Checks are complete" in error for error in agent.agent_feedback_errors)
 
 
 def test_agent_allows_tool_after_reopening_completed_plan_with_context(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(Session(cwd=str(tmp_path)))
     _seed_plan(agent, "inspect")
-    agent.blackboard.verification.status = VerificationStatus.DONE
-    agent.blackboard.verification.context = "syntax check passed"
+    agent.blackboard.checks.status = CheckStatus.PASSED
+    agent.blackboard.checks.context = "syntax check passed"
 
     result = agent.handle_response(
         {
@@ -3393,7 +3851,7 @@ def test_agent_allows_tool_after_reopening_completed_plan_with_context(tmp_path)
                         }
                     ],
                 },
-                {"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0", "1"]},
+                {"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0,1"]},
             ]
         }
     )
@@ -3413,8 +3871,8 @@ def test_agent_allows_tool_after_reopening_completed_plan_without_context(tmp_pa
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     agent = Agent(_session(tmp_path, debug=True))
     _seed_plan(agent, "inspect")
-    agent.blackboard.verification.status = VerificationStatus.DONE
-    agent.blackboard.verification.context = "syntax check passed"
+    agent.blackboard.checks.status = CheckStatus.PASSED
+    agent.blackboard.checks.context = "syntax check passed"
     messages = []
 
     result = agent.handle_response(
@@ -3425,7 +3883,7 @@ def test_agent_allows_tool_after_reopening_completed_plan_without_context(tmp_pa
                     "mode": "patch",
                     "items": [{"id": "p2", "text": "Inspect the remaining issue", "status": "doing"}],
                 },
-                {"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0", "1"]},
+                {"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0,1"]},
             ]
         },
         on_message=messages.append,
@@ -3438,7 +3896,7 @@ def test_agent_allows_tool_after_reopening_completed_plan_without_context(tmp_pa
     assert any("Continuing tools after completed Plan" in error for error in agent.agent_feedback_errors)
 
 
-def test_agent_blocks_verify_blocked_completion_without_manual_context(tmp_path):
+def test_agent_warns_on_verify_blocked_completion_without_manual_context(tmp_path):
     agent = Agent(_session(tmp_path, debug=True))
     _seed_plan(agent, "verify")
     messages = []
@@ -3453,9 +3911,9 @@ def test_agent_blocks_verify_blocked_completion_without_manual_context(tmp_path)
         on_message=messages.append,
     )
 
-    assert result.done is False
-    assert messages[-1] == "Verification_Gate: verify blocked requires blocker=user before completion."
-    assert not agent.session.state.conversation
+    assert result.done is True
+    assert any("verify blocked requires blocker=user before completion" in error for error in agent.agent_feedback_errors)
+    assert messages[-1] == "done"
 
 
 def test_agent_allows_verify_blocked_completion_with_user_blocker(tmp_path):
@@ -3479,7 +3937,7 @@ def test_agent_allows_verify_blocked_completion_with_user_blocker(tmp_path):
     )
 
     assert result.done is True
-    assert agent.blackboard.verification.blocker == nanocode.VerificationBlocker.USER
+    assert agent.blackboard.checks.blocker == nanocode.CheckBlocker.USER
     assert messages[-1] == "done"
 
 
@@ -3491,7 +3949,7 @@ def __init__(self):
                 {"actions": [{"type": "goal", "text": "answer", "complete": True}]},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3511,6 +3969,82 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.blackboard.goal_reached is False
 
 
+def test_agent_run_allows_assistant_text_without_task_context(tmp_path):
+    class FakeModelClient:
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            return {"actions": [], "_assistant_text": "hello"}
+
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    agent.model_client = FakeModelClient()
+    messages = []
+
+    response = agent.run("hi", on_message=messages.append)
+
+    assert response == {"actions": [], "_assistant_text": "hello"}
+    assert messages == ["hello"]
+    assert session.state.conversation[-1].content == "hello"
+
+
+def test_agent_run_allows_assistant_text_after_one_shot_tool_without_goal(tmp_path):
+    class FakeModelClient:
+        def __init__(self):
+            self.responses = [
+                {"actions": [{"type": "tool", "name": "List", "intention": "list current directory", "args": []}]},
+                {"actions": [], "_assistant_text": "listed files"},
+            ]
+
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            return self.responses.pop(0)
+
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    agent.model_client = FakeModelClient()
+    messages = []
+
+    response = agent.run("ls", on_message=messages.append)
+
+    assert response == {"actions": [], "_assistant_text": "listed files"}
+    assert messages[-1] == "listed files"
+    assert agent.blackboard.task_code == nanocode.TaskCode.DONE
+
+
+def test_agent_run_treats_assistant_text_as_progress_with_unfinished_task_context(tmp_path):
+    class FakeModelClient:
+        def __init__(self):
+            self.user_prompts = []
+            self.responses = [
+                {"actions": [], "_assistant_text": "done too early"},
+                {
+                    "actions": [
+                        {"type": "plan", "mode": "patch", "items": [{"id": "p1", "status": "done", "context": "answered"}]},
+                        *_final_actions("answer"),
+                    ]
+                },
+            ]
+
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            self.user_prompts.append(user_prompt)
+            return self.responses.pop(0)
+
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    agent.blackboard.goal = "answer"
+    agent.blackboard.task_code = nanocode.TaskCode.WORKING
+    agent.blackboard.plan = [nanocode.PlanItem(id="p1", text="answer", status=nanocode.PlanStatus.DOING)]
+    agent.model_client = FakeModelClient()
+    messages = []
+
+    response = agent.run("answer", on_message=messages.append)
+
+    assert response["actions"][-1]["message_for_complete"] == "done"
+    assert messages[-1] == "done"
+    assert "done too early" in messages
+    assert len(agent.model_client.user_prompts) == 2
+    assert "done too early" in [item.content for item in session.state.conversation]
+    assert not any("assistant text cannot finish an active task" in error for error in agent.agent_feedback_errors)
+
+
 def test_agent_run_retries_goal_complete_with_unfinished_plan(tmp_path):
     class FakeModelClient:
         def __init__(self):
@@ -3528,7 +4062,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3547,10 +4081,10 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.blackboard.plan == [nanocode.PlanItem(id="p1", text="answer", status=nanocode.PlanStatus.DONE, context="answered")]
 
 
-def test_investigate_completion_requires_root_cause_hypothesis(tmp_path):
+def test_investigate_completion_without_confirmed_lead_warns(tmp_path):
     agent = Agent(_session(tmp_path, debug=True))
     _seed_plan(agent, "find bug")
-    agent.blackboard.work_mode = nanocode.WorkMode.INVESTIGATE
+    agent.blackboard.leads = [nanocode.Lead(id="h1", text="bad admin filter", status=nanocode.LeadStatus.ACTIVE, source=("tr.1",))]
     messages = []
 
     result = agent.handle_response(
@@ -3563,16 +4097,16 @@ def test_investigate_completion_requires_root_cause_hypothesis(tmp_path):
         on_message=messages.append,
     )
 
-    assert result.done is False
+    assert result.done is True
     assert agent.blackboard.goal_reached is False
-    assert any("confirmed hypothesis" in error for error in agent.agent_feedback_errors)
-    assert messages[-1] == "Completion_Gate: investigate completion requires a confirmed hypothesis."
+    assert any("confirmed lead" in error for error in agent.agent_feedback_errors)
+    assert messages[-1] == "done"
 
     result = agent.handle_response(
         {
             "actions": [
                 {
-                    "type": "hypothesis",
+                    "type": "lead",
                     "items": [{"id": "h1", "text": "bad admin filter", "status": "confirmed", "source": ["tr.1"]}],
                 },
                 _verify_passed_action(),
@@ -3583,26 +4117,25 @@ def test_investigate_completion_requires_root_cause_hypothesis(tmp_path):
     )
 
     assert result.done is True
-    assert agent.blackboard.hypotheses[0].status == nanocode.HypothesisStatus.CONFIRMED
+    assert agent.blackboard.leads[0].status == nanocode.LeadStatus.CONFIRMED
     assert messages[-1] == "done"
 
 
-def test_start_declares_investigate_work_mode(tmp_path):
+def test_investigation_state_keeps_empty_leads_out_of_initial_prompt(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return {
                 "actions": [
+                    {"type": "goal", "text": "find bug", "complete": False},
                     {
-                        "type": "start",
-                        "goal": "find bug",
-                        "work_mode": "investigate",
-                        "plan": [{"id": "p1", "text": "identify root cause", "status": "done", "context": "reasoned"}],
+                        "type": "plan",
+                        "items": [{"id": "p1", "text": "identify root cause", "status": "done", "context": "reasoned"}],
                     },
-                    {"type": "hypothesis", "items": [{"id": "h1", "text": "bad filter", "status": "confirmed", "source": ["tr.1"]}]},
+                    {"type": "lead", "items": [{"id": "h1", "text": "bad filter", "status": "confirmed", "source": ["tr.1"]}]},
                     _verify_passed_action(),
                     {"type": "goal", "text": "find bug", "complete": True, "message_for_complete": "done"},
                 ]
@@ -3614,8 +4147,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     result = agent.run("为什么 admin history 不出现")
 
     assert result["actions"][-1]["message_for_complete"] == "done"
-    assert agent.blackboard.work_mode == nanocode.WorkMode.INVESTIGATE
-    assert "Work Mode:\nnormal" in agent.model_client.user_prompts[0]
+    assert "Leads:" not in agent.model_client.user_prompts[0]
 
 
 def test_agent_run_retries_goal_complete_when_plan_done_without_context(tmp_path):
@@ -3639,7 +4171,7 @@ def __init__(self):
                 },
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -3653,6 +4185,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     assert response["actions"][-1]["message_for_complete"] == "done"
     assert any("before Plan was complete" in error for error in agent.agent_feedback_errors)
+    assert any("plan items missing context" in error for error in agent.agent_feedback_errors)
     assert agent.agent_feedback_errors
     assert agent.blackboard.plan == [nanocode.PlanItem(id="p1", text="answer", status=nanocode.PlanStatus.DONE, context="answered")]
 
@@ -3662,12 +4195,12 @@ class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
-                {"_format_error": "Invalid model output: plain answer", "actions": []},
-                {"_format_error": "Invalid model output: plain answer", "actions": []},
+                {"_format_error": "Invalid function-tool response: plain answer", "actions": []},
+                {"_format_error": "Invalid function-tool response: plain answer", "actions": []},
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3680,21 +4213,46 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     assert response["actions"][-1]["message_for_complete"] == "done"
     assert len(agent.model_client.user_prompts) == 3
-    assert "Retrying: model returned invalid output: plain answer" not in messages
+    assert "Retrying: invalid function/tool response: plain answer" not in messages
     assert messages[-1] == "done"
 
 
+def test_agent_run_retries_action_level_format_error(tmp_path):
+    class FakeModelClient:
+        def __init__(self):
+            self.user_prompts = []
+            self.responses = [
+                {"actions": [{"type": "goal", "_format_error": "invalid tool arguments: bad json"}]},
+                {"actions": _final_actions()},
+            ]
+
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            self.user_prompts.append(user_prompt)
+            return self.responses.pop(0)
+
+    session = Session(cwd=str(tmp_path))
+    agent = Agent(session)
+    agent.model_client = FakeModelClient()
+    messages = []
+
+    response = agent.run("answer", on_message=messages.append)
+
+    assert response["actions"][-1]["message_for_complete"] == "done"
+    assert len(agent.model_client.user_prompts) == 2
+    assert agent.agent_feedback_errors
+
+
 def test_agent_feedback_survives_goal_complete_until_next_run(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
-                {"_format_error": "Invalid model output: plain answer", "actions": []},
+                {"_format_error": "Invalid function-tool response: plain answer", "actions": []},
                 {"actions": [{"type": "goal", "text": "answer", "complete": False}]},
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3709,14 +4267,14 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.agent_feedback_errors
 
     class ChatModelClient:
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
-            return {"actions": [{"type": "chat", "text": "ok"}]}
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            return {"actions": [], "_assistant_text": "ok"}
 
     agent.model_client = ChatModelClient()
     agent.run("next task")
 
     assert agent.agent_feedback_errors == []
-    assert agent.blackboard.verification.status == VerificationStatus.IDLE
+    assert agent.blackboard.checks.status == CheckStatus.IDLE
 
 
 def test_agent_allows_progress_message_before_goal_complete(tmp_path):
@@ -3724,11 +4282,22 @@ class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
-                {"actions": [{"type": "progress", "text": "progress"}]},
+                {
+                    "actions": [
+                        {
+                            "type": "verify",
+                            "method": "check",
+                            "status": "passed",
+                            "blocker": None,
+                            "context": "progress context",
+                        }
+                    ],
+                    "_assistant_text": "progress",
+                },
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3741,7 +4310,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     response = agent.run("answer", on_message=messages.append)
 
     assert response["actions"][-1]["message_for_complete"] == "done"
-    assert messages[0] == "progress"
+    assert "progress" in messages
     assert messages[-1] == "done"
     assert "progress" not in [item.content for item in session.state.conversation]
     assert agent.agent_feedback_errors == []
@@ -3756,15 +4325,15 @@ def __init__(self):
             self.responses = [
                 {
                     "actions": [
-                        {"type": "progress", "text": "reading sample"},
                         {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt"]},
-                    ]
+                    ],
+                    "_assistant_text": "reading sample",
                 },
                 {"actions": [{"type": "forget", "source": ["tr.1"], "reason": "progress-only read result is not needed"}]},
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = Session(cwd=str(tmp_path))
@@ -3784,11 +4353,11 @@ def test_agent_feedback_survives_keyboard_interrupt_until_next_run(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.responses = [
-                {"_format_error": "Invalid model output: plain answer", "actions": []},
+                {"_format_error": "Invalid function-tool response: plain answer", "actions": []},
                 KeyboardInterrupt(),
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             response = self.responses.pop(0)
             if isinstance(response, KeyboardInterrupt):
                 raise response
@@ -3799,7 +4368,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     agent.blackboard.goal = "answer"
     agent.blackboard.plan = [nanocode.PlanItem(text="try answer")]
     agent.blackboard.known = ["keep this fact"]
-    agent.blackboard.verification.status = VerificationStatus.REQUIRED
+    agent.blackboard.checks.status = CheckStatus.REQUIRED
     agent.tool_context.latest = ["old tool call"]
     agent.model_client = FakeModelClient()
 
@@ -3816,12 +4385,12 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.blackboard.goal == "answer"
     assert agent.blackboard.plan == [nanocode.PlanItem(text="try answer")]
     assert agent.blackboard.known == ["keep this fact"]
-    assert agent.blackboard.verification.status == VerificationStatus.IDLE
+    assert agent.blackboard.checks.status == CheckStatus.IDLE
     assert agent.blackboard.goal_reached is False
 
     class ChatModelClient:
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
-            return {"actions": [{"type": "chat", "text": "ok"}]}
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
+            return {"actions": [], "_assistant_text": "ok"}
 
     agent.model_client = ChatModelClient()
     agent.run("next task")
@@ -3838,7 +4407,7 @@ def __init__(self):
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3852,49 +4421,15 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert len(agent.model_client.user_prompts) == 2
 
 
-def test_agent_run_only_shows_ignored_action_frame_errors_in_debug(tmp_path):
-    class FakeModelClient:
-        def __init__(self):
-            self.responses = [
-                {
-                    "actions": _final_actions(),
-                    "_format_frame_errors": ["frame 1: expected JSON object action"],
-                }
-            ]
-
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
-            return self.responses.pop(0)
-
-    session = Session(cwd=str(tmp_path))
-    agent = Agent(session)
-    agent.model_client = FakeModelClient()
-    messages = []
-
-    agent.run("answer", on_message=messages.append)
-
-    assert "Format_Warning:" not in "\n".join(messages)
-    assert messages[-1] == "done"
-
-    debug_session = _session(tmp_path, debug=True)
-    debug_agent = Agent(debug_session)
-    debug_agent.model_client = FakeModelClient()
-    debug_messages = []
-
-    debug_agent.run("answer", on_message=debug_messages.append)
-
-    assert debug_messages[0] == "Format_Warning: ignored invalid action frame(s).\n- frame 1: expected JSON object action"
-    assert debug_messages[-1] == "done"
-
-
 def test_agent_run_shows_debug_gate_details_when_debug_enabled(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.responses = [
-                {"_format_error": "Invalid model output: plain answer", "_format_bad_output": "plain answer", "actions": []},
+                {"_format_error": "Invalid function-tool response: plain answer", "_format_bad_output": "plain answer", "actions": []},
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             return self.responses.pop(0)
 
     session = _session(tmp_path, debug=True)
@@ -3904,7 +4439,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     agent.run("answer", on_message=messages.append)
 
-    assert messages[0] == "Format_Gate: retrying model response. Invalid model output: plain answer\nFull bad output:\nplain answer"
+    assert messages[0] == "Format_Gate: retrying function/tool response. Invalid function-tool response: plain answer\nFull bad output:\nplain answer"
 
 
 def test_agent_run_stops_after_repeated_format_errors(tmp_path):
@@ -3912,9 +4447,9 @@ class FakeModelClient:
         def __init__(self):
             self.calls = 0
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.calls += 1
-            return {"_format_error": "Invalid model output: missing content", "actions": []}
+            return {"_format_error": "Invalid function-tool response: missing content", "actions": []}
 
     session = Session(cwd=str(tmp_path))
     agent = Agent(session)
@@ -3929,8 +4464,8 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
         raise AssertionError("expected LLMError")
 
     assert agent.model_client.calls == Agent.MAX_CONSECUTIVE_FORMAT_ERRORS
-    assert "model returned invalid output 3 times in a row" in message
-    assert messages[-1] == "Stopped: model returned invalid output 3 times in a row."
+    assert "invalid function/tool response 3 times in a row" in message
+    assert messages[-1] == "Stopped: invalid function/tool response 3 times in a row."
 
 
 def test_agent_run_no_retry_when_goal_complete_has_message_for_complete(tmp_path):
@@ -3942,7 +4477,7 @@ def __init__(self):
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3966,7 +4501,7 @@ def __init__(self):
                 {"actions": [{"type": "goal", "text": "answer", "complete": True, "message_for_complete": ""}]},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -3984,26 +4519,26 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     assert agent.agent_feedback_errors
 
 
-def test_agent_run_uses_message_for_complete_even_when_progress_actions_exist(tmp_path):
+def test_agent_run_uses_message_for_complete_even_when_assistant_text_exists(tmp_path):
     class FakeModelClient:
         def __init__(self):
             self.user_prompts = []
             self.responses = [
                 {
                     "actions": [
-                        {"type": "progress", "text": "explicit progress"},
                         {
                             "type": "goal",
                             "text": "answer",
                             "complete": True,
                             "message_for_complete": "fallback message",
                         },
-                    ]
+                    ],
+                    "_assistant_text": "explicit progress",
                 },
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -4014,8 +4549,8 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
 
     response = agent.run("answer", on_message=messages.append)
 
-    assert response["actions"][1]["message_for_complete"] == "fallback message"
-    assert "explicit progress" in messages
+    assert response["actions"][0]["message_for_complete"] == "fallback message"
+    assert "explicit progress" not in messages
     assert messages[-1] == "fallback message"
     assert len(agent.model_client.user_prompts) == 1
     assert "explicit progress" not in [item.content for item in session.state.conversation]
@@ -4028,11 +4563,10 @@ def __init__(self):
             self.user_prompts = []
             self.responses = [
                 {"actions": [{"type": "goal", "text": "answer", "complete": False, "message_for_complete": "should be ignored"}]},
-                {"actions": [{"type": "progress", "text": "done without goal"}]},
                 {"actions": _final_actions()},
             ]
 
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
+        def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
             self.user_prompts.append(user_prompt)
             return self.responses.pop(0)
 
@@ -4044,6 +4578,6 @@ def request(self, system_prompt, user_prompt, *, activity="agent"):
     response = agent.run("answer", on_message=messages.append)
 
     assert response["actions"][-1]["message_for_complete"] == "done"
-    assert len(agent.model_client.user_prompts) == 3
+    assert len(agent.model_client.user_prompts) == 2
     assert "should be ignored" not in messages
-    assert agent.agent_feedback_errors == []
+    assert not agent.agent_feedback_errors
diff --git a/tests/test_nanocode_bash_tool.py b/tests/test_nanocode_bash_tool.py
index f45e6fc..15f3dbe 100644
--- a/tests/test_nanocode_bash_tool.py
+++ b/tests/test_nanocode_bash_tool.py
@@ -34,6 +34,21 @@ def test_bash_tool_returns_nonzero_exit_and_stderr(tmp_path):
     assert "<stderr>\nnope\n</stderr>" in result
 
 
+def test_bash_tool_streams_live_output_while_collecting_result(tmp_path):
+    session = Session(cwd=str(tmp_path))
+    tool = BashTool.make(session, ["printf out; printf err >&2"])
+    chunks = []
+    tool.live_output = lambda stream, text: chunks.append((stream, text))
+
+    result = tool.call()
+
+    assert "".join(text for stream, text in chunks if stream == "stdout") == "out"
+    assert "".join(text for stream, text in chunks if stream == "stderr") == "err"
+    assert chunks[-1] == ("", "")
+    assert "<stdout>\nout\n</stdout>" in result
+    assert "<stderr>\nerr\n</stderr>" in result
+
+
 def test_bash_tool_times_out_and_reports_timeout(tmp_path):
     session = Session(cwd=str(tmp_path), settings=RuntimeSettings(shell_timeout=0))
 
@@ -43,17 +58,22 @@ def test_bash_tool_times_out_and_reports_timeout(tmp_path):
     assert "timeout" in result
 
 
-def test_bash_tool_kills_process_group_on_interrupt(tmp_path):
+def test_bash_tool_kills_process_group_on_interrupt(tmp_path, monkeypatch):
     session = Session(cwd=str(tmp_path), settings=RuntimeSettings(shell_timeout=30))
     pid_file = tmp_path / "pid"
     tool = BashTool.make(session, [f"echo $$ > {pid_file}; printf started; sleep 30"])
+    original_read_chunk = BashTool._read_stream_chunk
 
-    def interrupt_on_output(chunk: str) -> None:
-        if "started" in chunk:
+    def interrupt_on_output(selector, key, stdout_parts, stderr_parts, live_output=None):
+        result = original_read_chunk(selector, key, stdout_parts, stderr_parts, live_output)
+        if "started" in "".join(stdout_parts):
             raise KeyboardInterrupt()
+        return result
+
+    monkeypatch.setattr(BashTool, "_read_stream_chunk", staticmethod(interrupt_on_output))
 
     try:
-        result = tool.call_live(interrupt_on_output)
+        result = tool.call()
         assert "* exit_code: -1" in result
         assert "* interrupted: true" in result
         assert "* reason: user_ctrl_c" in result
diff --git a/tests/test_nanocode_code_index_tools.py b/tests/test_nanocode_code_index_tools.py
new file mode 100644
index 0000000..e8c89f0
--- /dev/null
+++ b/tests/test_nanocode_code_index_tools.py
@@ -0,0 +1,282 @@
+from types import SimpleNamespace
+
+import nanocode
+import pytest
+
+from nanocode import Agent, InspectCodeTool, Session, ToolCallArgError, ToolCallError
+
+
+class FakeRepository:
+    events = []
+    status = "ready"
+    refresh_status = None
+
+    def __init__(self, root, *, db_path=None, create_index=False):
+        self.root = root
+        self.db_path = db_path
+        self.create_index = create_index
+        self.events.append(("repo", root, db_path, create_index))
+
+    def refresh(self, *, progress=None):
+        self.events.append(("refresh", self.root, self.db_path, progress is not None))
+        if progress is not None:
+            progress("scan")
+            progress("start", done=0, total=2)
+            progress("file", done=1, total=2, path="code.py")
+        if self.refresh_status is not None:
+            type(self).status = self.refresh_status
+        return self
+
+    def update(self, paths=None, *, progress=None):
+        self.events.append(("update", tuple(paths or ()), self.root, self.db_path, progress is not None))
+        if progress is not None:
+            progress("scan")
+            progress("finish", done=1, total=1)
+        return self
+
+    def search_text(self, query, *, kind=None, path=None, exact_only=False, limit=20):
+        self.events.append(("search_text", query, kind, path, exact_only, limit, self.root, self.db_path))
+        return "query: " + query + "\ncount: 1\nsymbol Tool nanocode.py:10:20"
+
+    def inspect_text(self, symbol, *, kind=None, path=None, exact_only=False, anchors=False):
+        self.events.append(("inspect_text", symbol, kind, path, exact_only, anchors, self.root, self.db_path))
+        return "symbol:\n  name: " + symbol + "\nsource:\n  status: full"
+
+    def outline_text(self, filepath, *, symbol=None):
+        self.events.append(("outline_text", filepath, symbol, self.root, self.db_path))
+        return "file: " + filepath + "\noutline:\n  class Tool 0:2 class Tool:"
+
+
+def fake_code_index_module(status="ready", *, refresh_status=None, pending_changes=None, pending_files=()):
+    FakeRepository.status = status
+    FakeRepository.refresh_status = refresh_status
+
+    def status_fn(root, *, db_path=None, check=False, max_pending_files=50, format="object"):
+        status = FakeRepository.status
+        FakeRepository.events.append(("status", root, db_path, check, max_pending_files, format))
+        files = tuple(pending_files[:max_pending_files])
+        return SimpleNamespace(
+            status=status,
+            reason="index not initialized" if status == "missing" else "",
+            message="",
+            pending_changes=len(pending_files) if pending_changes is None else pending_changes,
+            pending_files=files,
+        )
+
+    def refresh_async(root, *, db_path=None, progress=None, **kwargs):
+        FakeRepository.events.append(("refresh_async", root, db_path, progress is not None, kwargs))
+        if progress is not None:
+            progress("scan")
+            progress("finish", done=1, total=1)
+        return SimpleNamespace()
+
+    return SimpleNamespace(Repository=FakeRepository, refresh_async=refresh_async, status=status_fn)
+
+
+@pytest.fixture(autouse=True)
+def reset_fake_repository():
+    FakeRepository.events = []
+    FakeRepository.status = "ready"
+    FakeRepository.refresh_status = None
+
+
+def test_inspect_code_requires_code_index(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: None)
+
+    with pytest.raises(ToolCallError, match="code index is not available"):
+        InspectCodeTool.make(Session(cwd=str(tmp_path)), ["inspect", "Tool"])
+
+
+def test_code_index_schema_accepts_expected_args():
+    args_schema = InspectCodeTool.tool_schema()["function"]["parameters"]["properties"]["args"]
+    assert args_schema["minItems"] == 2
+    assert args_schema["maxItems"] == 3
+
+
+def test_inspect_code_rejects_natural_language(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+
+    with pytest.raises(ToolCallArgError, match="do not pass natural language"):
+        InspectCodeTool.make(Session(cwd=str(tmp_path)), ["inspect", "Tool class callers"])
+    with pytest.raises(ToolCallArgError, match="do not pass natural language"):
+        InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool class"])
+
+
+def test_inspect_code_rejects_invalid_mode_and_options(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+
+    with pytest.raises(ToolCallArgError, match="mode must be find, inspect, or outline"):
+        InspectCodeTool.make(Session(cwd=str(tmp_path)), ["search", "Tool"])
+    with pytest.raises(ToolCallArgError, match="options must be an object"):
+        InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool", "limit=10"])
+
+
+def test_code_index_missing_is_not_initialized_implicitly(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("missing"))
+
+    with pytest.raises(ToolCallError, match="code index is not available"):
+        InspectCodeTool.make(session, ["find", "Tool"])
+
+    assert not [event for event in FakeRepository.events if event[0] in {"repo", "refresh"}]
+
+
+def test_code_index_status_formats_checked_pending_files(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+
+    def status_fn(root, *, db_path=None, check=False, max_pending_files=50, format="object"):
+        return SimpleNamespace(status="stale", reason="", message="", pending_changes=5, pending_files=("a.py", "b.py", "c.py", "d.py"))
+
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: SimpleNamespace(status=status_fn))
+
+    assert nanocode._code_index_status(session, check=True) == ("stale", "pending 5 (a.py, b.py, c.py...)")
+
+
+def test_code_index_sync_initializes_missing_index_in_project_data(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    module = fake_code_index_module("missing", refresh_status="ready")
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: module)
+
+    result = nanocode._code_index_sync(session)
+
+    db_path = str(tmp_path / "data" / "projects" / session.project_key() / "code-symbol-index" / "index.sqlite")
+    assert ("repo", str(tmp_path), db_path, True) in FakeRepository.events
+    assert ("refresh", str(tmp_path), db_path, True) in FakeRepository.events
+    assert session.state.status_notice == "index:done"
+    assert result == "code_index: initialized\nstatus: ready\npath: " + db_path
+
+
+def test_code_index_force_rebuild_removes_project_index_dir(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    module = fake_code_index_module("ready")
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: module)
+    index_dir = tmp_path / "data" / "projects" / session.project_key() / "code-symbol-index"
+    index_dir.mkdir(parents=True)
+    (index_dir / "old.sqlite").write_text("old", encoding="utf-8")
+
+    result = nanocode._code_index_sync(session, force=True)
+
+    assert not (index_dir / "old.sqlite").exists()
+    assert ("repo", str(tmp_path), nanocode._code_index_db_path(session), True) in FakeRepository.events
+    assert ("refresh", str(tmp_path), nanocode._code_index_db_path(session), True) in FakeRepository.events
+    assert result == "code_index: rebuilt\nstatus: ready\npath: " + nanocode._code_index_db_path(session)
+
+
+def test_code_index_refresh_existing_async_starts_for_ready_index(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("ready"))
+
+    assert nanocode._code_index_refresh_existing_async(session) is True
+
+    assert ("refresh_async", str(tmp_path), nanocode._code_index_db_path(session), True, {}) in FakeRepository.events
+    assert session.code_index_repository is None
+    assert session.state.status_notice == "index:done 1/1"
+    assert session.state.code_index_refreshing is False
+    assert session.state.code_index_reload_needed is True
+
+    nanocode._code_index_reload_if_ready(session)
+
+    assert isinstance(session.code_index_repository, FakeRepository)
+    assert session.state.code_index_reload_needed is False
+
+
+def test_code_index_update_pending_updates_small_stale_file_set(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("stale", pending_files=("a.py", "pkg/b.py")))
+
+    nanocode._code_index_update_pending(session, limit=3)
+
+    assert ("status", str(tmp_path), nanocode._code_index_db_path(session), True, 4, "object") in FakeRepository.events
+    assert ("update", (str(tmp_path / "a.py"), str(tmp_path / "pkg" / "b.py")), str(tmp_path), nanocode._code_index_db_path(session), False) in FakeRepository.events
+
+
+def test_code_index_update_pending_skips_large_stale_file_set(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("stale", pending_changes=4, pending_files=("a.py", "b.py", "c.py")))
+
+    nanocode._code_index_update_pending(session, limit=3)
+
+    assert not [event for event in FakeRepository.events if event[0] == "update"]
+
+
+def test_inspect_code_find_uses_search_text(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data")))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+
+    result = InspectCodeTool.make(session, ["find", "Tool", {"limit": 12, "kind": "class", "path": "nanocode.py", "exact_only": True}]).call()
+
+    db_path = str(tmp_path / "data" / "projects" / session.project_key() / "code-symbol-index" / "index.sqlite")
+    assert ("search_text", "Tool", "class", "nanocode.py", True, 12, str(tmp_path), db_path) in FakeRepository.events
+    assert result == "<InspectCodeToolResult>\nmode: find\nquery: Tool\ncount: 1\nsymbol Tool nanocode.py:10:20\n</InspectCodeToolResult>"
+
+
+def test_inspect_code_find_clamps_limit(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+    assert InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool", {"limit": 999}]).limit == 80
+    assert InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool", {"limit": 0}]).limit == 1
+    with pytest.raises(ToolCallArgError, match="limit must be an integer"):
+        InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool", {"limit": "many"}])
+
+
+def test_inspect_code_symbol_rejects_files_directories_and_dotted_module_paths(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+    (tmp_path / "orion" / "biz" / "handlers" / "syftpp").mkdir(parents=True)
+    (tmp_path / "code.py").write_text("class Tool:\n", encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+
+    with pytest.raises(ToolCallArgError, match="file or directory"):
+        InspectCodeTool.make(session, ["inspect", "code.py"])
+    with pytest.raises(ToolCallArgError, match="file or directory"):
+        InspectCodeTool.make(session, ["inspect", "orion.biz.handlers.syftpp"])
+    with pytest.raises(ToolCallArgError, match="module path"):
+        InspectCodeTool.make(session, ["inspect", "pkg.module.symbol"])
+
+
+def test_inspect_code_inspect_uses_inspect_text(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+
+    result = InspectCodeTool.make(session, ["inspect", "Tool", {"path": "nanocode.py", "exact_only": True}]).call()
+
+    assert ("inspect_text", "Tool", None, "nanocode.py", True, True, str(tmp_path), nanocode._code_index_db_path(session)) in FakeRepository.events
+    assert result == "<InspectCodeToolResult>\nmode: inspect\nsymbol:\n  name: Tool\nsource:\n  status: full\n</InspectCodeToolResult>"
+
+
+def test_agent_tool_call_preserves_code_index_options_object(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path))
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+
+    Agent(session).execute_tool_calls(
+        [
+            {
+                "name": "InspectCode",
+                "intention": "inspect exact symbol",
+                "args": ["inspect", "Tool", {"path": "nanocode.py", "exact_only": True}],
+            }
+        ]
+    )
+
+    assert ("inspect_text", "Tool", None, "nanocode.py", True, True, str(tmp_path), nanocode._code_index_db_path(session)) in FakeRepository.events
+
+
+def test_inspect_code_outline_uses_outline_text(tmp_path, monkeypatch):
+    session = Session(cwd=str(tmp_path))
+    filepath = tmp_path / "code.py"
+    filepath.write_text("class Tool:\n    pass\n", encoding="utf-8")
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+
+    result = InspectCodeTool.make(session, ["outline", "code.py", {"symbol": "Tool"}]).call()
+
+    assert ("outline_text", str(filepath), "Tool", str(tmp_path), nanocode._code_index_db_path(session)) in FakeRepository.events
+    assert result == "<InspectCodeToolResult>\nmode: outline\nfile: " + str(filepath) + "\noutline:\n  class Tool 0:2 class Tool:\n</InspectCodeToolResult>"
+
+
+def test_outline_code_file_rejects_directories_and_symbols(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module())
+    (tmp_path / "pkg").mkdir()
+    session = Session(cwd=str(tmp_path))
+
+    with pytest.raises(ToolCallArgError, match="existing file"):
+        InspectCodeTool.make(session, ["outline", "pkg"])
+    with pytest.raises(ToolCallArgError, match="existing file"):
+        InspectCodeTool.make(session, ["outline", "Tool"])
diff --git a/tests/test_nanocode_commands.py b/tests/test_nanocode_commands.py
index f5742ab..e61322f 100644
--- a/tests/test_nanocode_commands.py
+++ b/tests/test_nanocode_commands.py
@@ -1,9 +1,8 @@
 import os
-import shutil
 import time
 
 import nanocode
-from nanocode import Config, Agent, CommandDispatcher, CommandStatus, ModelUsage, RuntimeSettings, Session, SessionLock, SessionLogCleaner, UserMessage
+from nanocode import Config, Agent, CommandDispatcher, CommandStatus, ModelUsage, RuntimeSettings, Session, SessionLock, UserMessage, clean_sessions
 
 
 class FakeModelClient:
@@ -11,11 +10,30 @@ def __init__(self, summary="LLM compact summary"):
         self.summary = summary
         self.requests = []
 
-    def request(self, system_prompt, user_prompt, *, activity="agent"):
+    def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
         self.requests.append((system_prompt, user_prompt, activity))
         return {"summary": self.summary}
 
 
+def patch_openai_models(monkeypatch, models=None, error: Exception | None = None):
+    seen = {}
+
+    class FakeModels:
+        def list(self, **kwargs):
+            seen["list_kwargs"] = kwargs
+            if error is not None:
+                raise error
+            return type("ModelList", (), {"data": [type("Model", (), {"id": model})() for model in (models or ())]})()
+
+    class FakeOpenAI:
+        def __init__(self, **kwargs):
+            seen["client_kwargs"] = kwargs
+            self.models = FakeModels()
+
+    monkeypatch.setattr(nanocode, "OpenAI", FakeOpenAI)
+    return seen
+
+
 def make_session(tmp_path, *, model: str = "", stream: bool | None = None, compact_at: int = 50) -> Session:
     provider: dict[str, object] = {"model": model}
     if stream is not None:
@@ -37,20 +55,24 @@ def test_command_dispatcher_updates_config_and_auto_compacts(tmp_path):
     session.state.conversation = [UserMessage(content="one"), UserMessage(content="two"), UserMessage(content="three")]
 
     model_result = dispatcher.dispatch("/set provider.model new-model")
-    effort_result = dispatcher.dispatch("/set provider.effort high")
-    reason_result = dispatcher.dispatch("/set provider.reasoning off")
+    cache_result = dispatcher.dispatch("/set provider.prompt_cache_key off")
+    reason_result = dispatcher.dispatch("/set provider.reasoning high")
+    chat_reasoning_result = dispatcher.dispatch("/set provider.chat_reasoning reasoning")
     stream_result = dispatcher.dispatch("/set provider.stream off")
     first_token_result = dispatcher.dispatch("/set provider.first_token_timeout 6")
     yolo_result = dispatcher.dispatch("/set runtime.yolo on")
     compact_result = dispatcher.dispatch("/set runtime.compact_at 2")
+    context_result = dispatcher.dispatch("/set runtime.context_budget low")
     exit_result = dispatcher.dispatch("/exit")
 
     assert model_result.status == CommandStatus.HANDLED
     assert session.config.provider.model == "new-model"
-    assert effort_result.message == "Set provider.effort = high"
-    assert session.config.provider.reasoning_effort == "high"
-    assert reason_result.message == "Set provider.reasoning = off"
-    assert session.config.provider.reasoning is False
+    assert cache_result.message == "Set provider.prompt_cache_key = off"
+    assert session.config.provider.prompt_cache_key == "off"
+    assert reason_result.message == "Set provider.reasoning = high"
+    assert session.config.provider.reasoning == "high"
+    assert chat_reasoning_result.message == "Set provider.chat_reasoning = reasoning"
+    assert session.config.provider.chat_reasoning == "reasoning"
     assert stream_result.message == "Set provider.stream = off"
     assert session.config.provider.stream is False
     assert first_token_result.message == "Set provider.first_token_timeout = 6"
@@ -59,32 +81,57 @@ def test_command_dispatcher_updates_config_and_auto_compacts(tmp_path):
     assert session.settings.yolo is True
     assert compact_result.message == "Set runtime.compact_at = 2"
     assert session.settings.compact_at == 2
+    assert context_result.message == "Set runtime.context_budget = low"
+    assert session.settings.context_budget == "low"
     assert len(session.state.conversation) == 3
     assert fake_client.requests == []
     assert exit_result.status == CommandStatus.EXIT
 
 
-def test_status_reports_tokens_in_human_readable_format(tmp_path):
+def test_status_reports_tokens_in_human_readable_format(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_status", lambda session, *, check=False: ("unavailable", ""))
     session = make_session(tmp_path, model="model")
     session.state.last_total_tokens = 1200
+    session.state.last_cached_prompt_tokens = 400
     session.state.session_total_tokens = 2_345_678
-    session.state.model_usage["model"] = ModelUsage(calls=2, total_tokens=2_345_678)
+    session.state.session_prompt_tokens = 1000
+    session.state.session_cached_prompt_tokens = 400
+    session.state.model_usage["model"] = ModelUsage(calls=2, total_tokens=2_345_678, cached_prompt_tokens=400)
     dispatcher = CommandDispatcher(Agent(session))
 
     result = dispatcher.dispatch("/status")
 
     assert result.status == CommandStatus.HANDLED
     assert "tokens: last=1k session=2m" in result.message
-    assert "model: model reasoning=medium stream=on" in result.message
+    assert "cache: last=400 session=400 rate=40%" in result.message
+    assert "model: model api=chat(auto) reasoning=medium(off) stream=on" in result.message
     assert "session: " + session.session_id in result.message
-    assert "runtime: yolo=off plan=off compact_at=50" in result.message
+    assert "runtime: yolo=off compact_at=50" in result.message
     assert "models:" in result.message
-    assert "model: calls=2 tokens=2m" in result.message
+    assert "model: calls=2 tokens=2m cached=400" in result.message
     assert "tool_calls: turn=0 session=0" in result.message
-    assert "task: done" in result.message
+    assert "tools: code_index=unavailable" in result.message
+    assert "task:" not in result.message
+    assert "checks: idle" in result.message
     assert "blackboard" not in result.message
 
 
+def test_index_command_syncs_code_index(tmp_path, monkeypatch):
+    calls = []
+    monkeypatch.setattr(nanocode, "_code_index_sync", lambda session, *, force=False: calls.append(force) or "code_index: synced")
+    dispatcher = CommandDispatcher(Agent(make_session(tmp_path)))
+
+    result = dispatcher.dispatch("/index")
+    force_result = dispatcher.dispatch("/index force")
+    usage_result = dispatcher.dispatch("/index extra")
+
+    assert result.status == CommandStatus.HANDLED
+    assert result.message == "code_index: synced"
+    assert force_result.message == "code_index: synced"
+    assert calls == [False, True]
+    assert usage_result.message == "Usage: /index [force]"
+
+
 def test_set_command_shows_and_validates_runtime_config(tmp_path):
     session = make_session(tmp_path, stream=True)
     dispatcher = CommandDispatcher(Agent(session))
@@ -125,6 +172,7 @@ def test_config_command_reports_resolved_provider_config(tmp_path):
     assert "config: " in result.message
     assert "provider.active: default" in result.message
     assert "provider.model: config-model" in result.message
+    assert "provider.prompt_cache_key: auto" in result.message
     assert "provider.available_models: config-model, other-model" in result.message
     assert "provider.first_token_timeout: 90" in result.message
     assert "paths.data_dir: " + str(tmp_path / ".nanocode") in result.message
@@ -132,53 +180,51 @@ def test_config_command_reports_resolved_provider_config(tmp_path):
     assert "paths.session_dir: " in result.message
     assert "paths.history: " + str(tmp_path / ".nanocode" / "history") in result.message
     assert "runtime.max_agent_steps: 100" in result.message
-    assert "runtime.plan_timeout: 360" in result.message
-    assert "runtime.plan_first_token_timeout: 180" in result.message
-    assert "runtime.auto_clean_recent: 3d" in result.message
-    assert "runtime.plan_mode: off" in result.message
+    assert "runtime.context_budget: medium" in result.message
+    assert "runtime.auto_clean_recent: 1d" in result.message
+    assert "runtime.plan" not in result.message
 
 
-def test_set_command_updates_plan_timeouts(tmp_path):
+def test_plan_runtime_config_keys_are_removed(tmp_path):
     session = make_session(tmp_path)
     dispatcher = CommandDispatcher(Agent(session))
 
     timeout_result = dispatcher.dispatch("/set runtime.plan_timeout 240")
     first_token_result = dispatcher.dispatch("/set runtime.plan_first_token_timeout 80")
+    mode_result = dispatcher.dispatch("/set runtime.plan_mode on")
 
-    assert timeout_result.message == "Set runtime.plan_timeout = 240"
-    assert first_token_result.message == "Set runtime.plan_first_token_timeout = 80"
-    assert session.settings.plan_timeout == 240
-    assert session.settings.plan_first_token_timeout == 80
+    assert timeout_result.message == "Unknown config key: runtime.plan_timeout"
+    assert first_token_result.message == "Unknown config key: runtime.plan_first_token_timeout"
+    assert mode_result.message == "Unknown config key: runtime.plan_mode"
 
 
-def test_plan_command_toggles_plan_mode(tmp_path):
+def test_context_command_shows_and_sets_budget(tmp_path):
     session = make_session(tmp_path)
-    dispatcher = CommandDispatcher(Agent(session))
+    agent = Agent(session)
+    agent.tool_context.kept_results = ['- ok tool=Read args=["large.py"] key=tr.1\n  output:\n' + ("x" * 10_000)]
+    dispatcher = CommandDispatcher(agent)
 
-    on_result = dispatcher.dispatch("/plan")
-    off_result = dispatcher.dispatch("/plan off")
-    unknown_set_result = dispatcher.dispatch("/set runtime.plan_mode on")
+    show_result = dispatcher.dispatch("/context")
+    set_result = dispatcher.dispatch("/context low")
+    alias_result = dispatcher.dispatch("/context_budget high")
+    invalid_result = dispatcher.dispatch("/context tiny")
 
-    assert on_result.message == "Set plan mode = on"
-    assert off_result.message == "Set plan mode = off"
-    assert unknown_set_result.message == "Unknown config key: runtime.plan_mode"
-    assert session.settings.plan_mode is False
+    assert "context_budget: medium" in show_result.message
+    assert "observe_after_results: 10" in show_result.message
+    assert set_result.message.startswith("Set runtime.context_budget = low\ncontext_budget: low")
+    assert session.settings.context_budget == "high"
+    assert len(agent.tool_context.kept_results[0]) <= agent.context_budget().kept_block_chars
+    assert alias_result.message.startswith("Set runtime.context_budget = high\ncontext_budget: high")
+    assert invalid_result.message == "Usage: /context [low|medium|high]"
 
 
-def test_plan_command_runs_one_shot_plan_question(tmp_path):
-    prompts = []
+def test_plan_command_is_removed(tmp_path):
     session = make_session(tmp_path)
-
-    def run_agent(prompt):
-        prompts.append((prompt, session.settings.plan_mode))
-
-    dispatcher = CommandDispatcher(Agent(session), run_agent=run_agent)
+    dispatcher = CommandDispatcher(Agent(session))
 
     result = dispatcher.dispatch("/plan how should lsp tools work?")
 
-    assert result.message == ""
-    assert prompts == [("how should lsp tools work?", True)]
-    assert session.settings.plan_mode is False
+    assert result.message == "Unknown command: /plan"
 
 
 def test_provider_command_switches_current_provider(tmp_path):
@@ -231,10 +277,27 @@ def test_model_command_can_select_reasoning_effort(tmp_path):
 
     result = dispatcher.dispatch("/model new-model")
 
-    assert result.message == "Set provider.model = new-model\nSet provider.reasoning = on\nSet provider.effort = high"
+    assert result.message == "Set provider.model = new-model\nSet provider.reasoning = high"
     assert session.config.provider.model == "new-model"
-    assert session.config.provider.reasoning is True
-    assert session.config.provider.reasoning_effort == "high"
+    assert session.config.provider.reasoning == "high"
+
+
+def test_api_command_shows_and_sets_provider_api(tmp_path):
+    session = make_session(tmp_path, model="model")
+    dispatcher = CommandDispatcher(Agent(session))
+
+    show_result = dispatcher.dispatch("/api")
+    responses_result = dispatcher.dispatch("/api responses")
+    chat_result = dispatcher.dispatch("/api chat")
+    auto_result = dispatcher.dispatch("/api auto")
+    bad_result = dispatcher.dispatch("/api invalid")
+
+    assert show_result.message == "provider.api: auto (chat)\nUsage: /api [auto|chat|responses]"
+    assert responses_result.message == "Set provider.api = responses"
+    assert chat_result.message == "Set provider.api = chat"
+    assert auto_result.message == "Set provider.api = auto"
+    assert bad_result.message == "Usage: /api [auto|chat|responses]"
+    assert session.config.provider.api == "auto"
 
 
 def test_model_command_can_disable_reasoning(tmp_path):
@@ -245,7 +308,7 @@ def test_model_command_can_disable_reasoning(tmp_path):
 
     assert result.message == "Set provider.model = new-model\nSet provider.reasoning = off"
     assert session.config.provider.model == "new-model"
-    assert session.config.provider.reasoning is False
+    assert session.config.provider.reasoning == "off"
 
 
 def test_model_command_reasoning_back_cancels_direct_model_change(tmp_path):
@@ -271,10 +334,9 @@ def test_model_command_reasoning_back_returns_to_model_selection(tmp_path):
 
     result = dispatcher.dispatch("/model")
 
-    assert result.message == "Set provider.model = second\nSet provider.reasoning = on\nSet provider.effort = high"
+    assert result.message == "Set provider.model = second\nSet provider.reasoning = high"
     assert session.config.provider.model == "second"
-    assert session.config.provider.reasoning is True
-    assert session.config.provider.reasoning_effort == "high"
+    assert session.config.provider.reasoning == "high"
 
 
 def test_reason_command_selects_reasoning_effort(tmp_path):
@@ -284,10 +346,9 @@ def test_reason_command_selects_reasoning_effort(tmp_path):
     result = dispatcher.dispatch("/reason")
     usage_result = dispatcher.dispatch("/reason high")
 
-    assert result.message == "Set provider.reasoning = on\nSet provider.effort = high"
+    assert result.message == "Set provider.reasoning = high"
     assert usage_result.message == "Usage: /reason"
-    assert session.config.provider.reasoning is True
-    assert session.config.provider.reasoning_effort == "high"
+    assert session.config.provider.reasoning == "high"
 
 
 def test_reason_command_back_keeps_current_reasoning(tmp_path):
@@ -297,8 +358,31 @@ def test_reason_command_back_keeps_current_reasoning(tmp_path):
     result = dispatcher.dispatch("/reason")
 
     assert result.message == "No change"
-    assert session.config.provider.reasoning is True
-    assert session.config.provider.reasoning_effort == "medium"
+    assert session.config.provider.reasoning == "medium"
+
+
+def test_reason_payload_command_shows_and_sets_chat_payload(tmp_path):
+    session = make_session(tmp_path, model="old")
+    dispatcher = CommandDispatcher(Agent(session))
+
+    show_result = dispatcher.dispatch("/reason-payload")
+    off_result = dispatcher.dispatch("/reason-payload off")
+    reasoning_result = dispatcher.dispatch("/reason-payload reasoning")
+    auto_result = dispatcher.dispatch("/reason-payload auto")
+    bad_result = dispatcher.dispatch("/reason-payload bad")
+
+    assert show_result.message == "\n".join(
+        [
+            "provider.chat_reasoning: auto",
+            "provider.resolved_chat_reasoning: off",
+            "Usage: /reason-payload [auto|off|reasoning|reasoning_effort|thinking|enable_thinking]",
+        ]
+    )
+    assert off_result.message == "Set provider.chat_reasoning = off"
+    assert reasoning_result.message == "Set provider.chat_reasoning = reasoning"
+    assert auto_result.message == "Set provider.chat_reasoning = auto"
+    assert bad_result.message == "Usage: /reason-payload [auto|off|reasoning|reasoning_effort|thinking|enable_thinking]"
+    assert session.config.provider.chat_reasoning == "auto"
 
 
 def test_model_command_selects_from_available_models(tmp_path):
@@ -317,37 +401,26 @@ def test_model_command_lists_configured_models_before_remote_models(tmp_path, mo
     session.config.provider.url = "https://provider.example/v1"
     session.config.provider.key = "key"
     session.config.provider.available_models = ("old", "manual")
-    seen = {}
-
-    def fake_urlopen(request, timeout):
-        assert request.full_url == "https://provider.example/v1/models"
-        seen["auth"] = request.headers["Authorization"]
-
-        class Response:
-            def __enter__(self):
-                return self
-
-            def __exit__(self, exc_type, exc, tb):
-                return False
-
-            @staticmethod
-            def read():
-                return b'{"data":[{"id":"remote-b"},{"id":"manual"},{"id":"remote-a"}]}'
-
-        return Response()
+    seen = patch_openai_models(monkeypatch, ("remote-b", "manual", "remote-a"))
 
     def select_model(models, current):
         seen["models"] = models
         seen["current"] = current
         return "remote-a"
 
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", fake_urlopen)
     dispatcher = CommandDispatcher(Agent(session), select_model=select_model)
 
     result = dispatcher.dispatch("/model")
 
     assert seen == {
-        "auth": "Bearer key",
+        "client_kwargs": {
+            "api_key": "key",
+            "base_url": "https://provider.example/v1",
+            "timeout": 3,
+            "max_retries": 0,
+            "default_headers": {"User-Agent": "nanocode/" + nanocode.__version__},
+        },
+        "list_kwargs": {"timeout": 3},
         "models": (
             CommandDispatcher.MODEL_CONFIGURED_LABEL,
             "old",
@@ -373,7 +446,7 @@ def select_model(models, current):
         seen["models"] = models
         return "manual"
 
-    monkeypatch.setattr(nanocode.urllib.request, "urlopen", lambda request, timeout: (_ for _ in ()).throw(OSError("offline")))
+    patch_openai_models(monkeypatch, error=OSError("offline"))
     dispatcher = CommandDispatcher(Agent(session), select_model=select_model)
 
     result = dispatcher.dispatch("/model")
@@ -402,31 +475,6 @@ def test_rules_command_shows_rules_content(tmp_path):
     assert result.message == "# User Rules\n\n- Prompt-only changes do not need tests."
 
 
-def test_knowledge_command_shows_stable_knowledge(tmp_path):
-    agent = Agent(Session(cwd=str(tmp_path)))
-    dispatcher = CommandDispatcher(agent)
-
-    empty_result = dispatcher.dispatch("/knowledge")
-    usage_result = dispatcher.dispatch("/knowledge extra")
-    agent.blackboard.stable_knowledge = {
-        "workflow": ["Project test command is make test."],
-        "structure": ["Main runtime lives in nanocode.py."],
-    }
-    result = dispatcher.dispatch("/knowledge")
-
-    assert empty_result.message == "No stable knowledge stored."
-    assert usage_result.message == "Usage: /knowledge"
-    assert result.status == CommandStatus.HANDLED
-    assert result.message == "\n".join(
-        [
-            "Stable knowledge:",
-            "structure:",
-            "- Main runtime lives in nanocode.py.",
-            "workflow:",
-            "- Project test command is make test.",
-        ]
-    )
-
 def test_command_dispatcher_auto_compacts_only_when_history_exceeds_keep_recent(tmp_path):
     session = make_session(tmp_path, compact_at=2)
     agent = Agent(session)
@@ -547,157 +595,43 @@ def test_help_question_runs_agent_with_source_aware_prompt(tmp_path):
     assert len(prompts) == 1
 
 
-def test_clean_command_removes_all_session_log_files(tmp_path):
+def test_clean_sessions_removes_old_inactive_session_directories(tmp_path):
     session = Session(cwd=str(tmp_path))
-    tool_results_dir = session.tool_results_dir()
-    other_tool_results_dir = session.data_path("sessions", "other-session", "tool_results")
-    os.makedirs(tool_results_dir, exist_ok=True)
-    os.makedirs(other_tool_results_dir, exist_ok=True)
-
-    # Create some log files and a non-log file
-    log1 = os.path.join(tool_results_dir, "test1.log")
-    log2 = os.path.join(tool_results_dir, "test2.log")
-    log3 = os.path.join(other_tool_results_dir, "test3.log")
-    other = os.path.join(tool_results_dir, "other.txt")
-    with open(log1, "w"):
-        pass
-    with open(log2, "w"):
-        pass
-    with open(log3, "w"):
-        pass
-    with open(other, "w"):
-        pass
-
-    dispatcher = CommandDispatcher(Agent(session))
-    result = dispatcher.dispatch("/clean")
-
-    assert result.status == CommandStatus.HANDLED
-    assert "Cleaned 3 log file(s)" in result.message
-    assert not os.path.exists(log1)
-    assert not os.path.exists(log2)
-    assert not os.path.exists(log3)
-    assert os.path.exists(other)
-
-
-def test_clean_command_skips_active_sessions(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    active_tool_results_dir = session.tool_results_dir()
-    stale_tool_results_dir = session.data_path("sessions", "stale-session", "tool_results")
-    os.makedirs(active_tool_results_dir, exist_ok=True)
-    os.makedirs(stale_tool_results_dir, exist_ok=True)
-
-    active_log = os.path.join(active_tool_results_dir, "active.log")
-    stale_log = os.path.join(stale_tool_results_dir, "stale.log")
-    with open(active_log, "w"):
-        pass
-    with open(stale_log, "w"):
-        pass
-
-    with SessionLock(session.lock_path()):
-        dispatcher = CommandDispatcher(Agent(session))
-        result = dispatcher.dispatch("/clean")
-
-    assert result.status == CommandStatus.HANDLED
-    assert "Cleaned 1 log file(s)" in result.message
-    assert "1 active session(s) skipped" in result.message
-    assert os.path.exists(active_log)
-    assert not os.path.exists(stale_log)
-
-
-def test_session_log_cleaner_removes_only_old_logs_from_inactive_sessions(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    old_dir = session.data_path("sessions", "old-session", "tool_results")
-    recent_dir = session.data_path("sessions", "recent-session", "tool_results")
-    active_dir = session.tool_results_dir()
-    os.makedirs(old_dir, exist_ok=True)
-    os.makedirs(recent_dir, exist_ok=True)
-    os.makedirs(active_dir, exist_ok=True)
-
-    old_log = os.path.join(old_dir, "old.log")
-    recent_log = os.path.join(recent_dir, "recent.log")
-    active_old_log = os.path.join(active_dir, "active-old.log")
-    for path in (old_log, recent_log, active_old_log):
-        with open(path, "w"):
-            pass
+    current_dir = session.session_dir()
+    old_dir = session.data_path("sessions", "old-session")
+    recent_dir = session.data_path("sessions", "recent-session")
+    for path in (current_dir, old_dir, recent_dir):
+        os.makedirs(path, exist_ok=True)
     old_time = time.time() - 10 * 86400
-    os.utime(old_log, (old_time, old_time))
-    os.utime(active_old_log, (old_time, old_time))
+    os.utime(old_dir, (old_time, old_time))
 
     with SessionLock(session.lock_path()):
-        result = SessionLogCleaner(session).clean(older_than_seconds=3 * 86400)
+        clean_sessions(session, older_than_seconds=3 * 86400)
 
-    assert result.cleaned == 1
-    assert result.skipped == 1
-    assert not os.path.exists(old_log)
-    assert os.path.exists(recent_log)
-    assert os.path.exists(active_old_log)
+    assert os.path.exists(current_dir)
+    assert not os.path.exists(old_dir)
+    assert os.path.exists(recent_dir)
 
 
-def test_clean_command_no_directory(tmp_path):
+def test_clean_sessions_skips_locked_sessions(tmp_path):
     session = Session(cwd=str(tmp_path))
-    sessions_dir = session.data_path("sessions")
-    if os.path.exists(sessions_dir):
-        shutil.rmtree(sessions_dir)
-
-    dispatcher = CommandDispatcher(Agent(session))
-    result = dispatcher.dispatch("/clean")
-
-    assert result.status == CommandStatus.HANDLED
-    assert "No session logs directory found" in result.message
-
-
-def test_clean_command_empty_directory(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    tool_results_dir = session.tool_results_dir()
-    os.makedirs(tool_results_dir, exist_ok=True)
-
-    dispatcher = CommandDispatcher(Agent(session))
-    result = dispatcher.dispatch("/clean")
-
-    assert result.status == CommandStatus.HANDLED
-    assert "Cleaned 0 log file(s)" in result.message
-
-
-def test_clean_command_with_args_returns_usage(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    tool_results_dir = session.tool_results_dir()
-    os.makedirs(tool_results_dir, exist_ok=True)
+    active_dir = session.data_path("sessions", "active-session")
+    stale_dir = session.data_path("sessions", "stale-session")
+    os.makedirs(active_dir, exist_ok=True)
+    os.makedirs(stale_dir, exist_ok=True)
+    old_time = time.time() - 2 * 86400
 
-    dispatcher = CommandDispatcher(Agent(session))
-    result = dispatcher.dispatch("/clean extra-arg")
+    with SessionLock(os.path.join(active_dir, "session.lock")):
+        os.utime(active_dir, (old_time, old_time))
+        os.utime(stale_dir, (old_time, old_time))
+        clean_sessions(session, older_than_seconds=86400)
 
-    assert result.status == CommandStatus.HANDLED
-    assert result.message == "Usage: /clean"
+    assert os.path.exists(active_dir)
+    assert not os.path.exists(stale_dir)
 
 
-def test_clean_command_reports_failed_deletions(tmp_path):
+def test_session_lock_removes_lock_file_on_release(tmp_path):
     session = Session(cwd=str(tmp_path))
-    tool_results_dir = session.tool_results_dir()
-    os.makedirs(tool_results_dir, exist_ok=True)
-
-    # Create two log files
-    log1 = os.path.join(tool_results_dir, "good.log")
-    log2 = os.path.join(tool_results_dir, "fail.log")
-    with open(log1, "w"):
-        pass
-    with open(log2, "w"):
-        pass
-
-    # Mock os.remove to fail on the second file
-    original_remove = os.remove
-    call_count = [0]
-
-    def mock_remove(path):
-        call_count[0] += 1
-        if call_count[0] == 2:
-            raise OSError("Permission denied")
-        original_remove(path)
-
-    import unittest.mock
-    with unittest.mock.patch("os.remove", side_effect=mock_remove):
-        dispatcher = CommandDispatcher(Agent(session))
-        result = dispatcher.dispatch("/clean")
-
-    assert result.status == CommandStatus.HANDLED
-    assert "Cleaned 1 log file(s)" in result.message
-    assert "1 failed" in result.message
+    with SessionLock(session.lock_path()):
+        assert os.path.exists(session.lock_path())
+    assert not os.path.exists(session.lock_path())
diff --git a/tests/test_nanocode_compactor.py b/tests/test_nanocode_compactor.py
index 9c32cf8..02cbcbe 100644
--- a/tests/test_nanocode_compactor.py
+++ b/tests/test_nanocode_compactor.py
@@ -8,7 +8,7 @@ def __init__(self, summary="LLM compact summary", known=None):
         self.known = known
         self.requests = []
 
-    def request(self, system_prompt, user_prompt, *, activity="agent"):
+    def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs):
         self.requests.append((system_prompt, user_prompt, activity))
         response = {"summary": self.summary}
         if self.known is not None:
diff --git a/tests/test_nanocode_context_tool.py b/tests/test_nanocode_context_tool.py
index 251e7a6..6a17dfd 100644
--- a/tests/test_nanocode_context_tool.py
+++ b/tests/test_nanocode_context_tool.py
@@ -17,7 +17,7 @@ def test_tool_result_tool_gets_multiple_keys(tmp_path):
         )
     }
 
-    assert ToolResultTool.name() == "Recall"
+    assert ToolResultTool.NAME == "Recall"
     result = ToolResultTool.make(session, ["tr.1", "missing"]).call()
 
     assert result.startswith("RecallToolResult:")
diff --git a/tests/test_nanocode_create_file_tool.py b/tests/test_nanocode_create_file_tool.py
index 1fb95c1..003340f 100644
--- a/tests/test_nanocode_create_file_tool.py
+++ b/tests/test_nanocode_create_file_tool.py
@@ -37,6 +37,28 @@ def test_create_file_tool_rejects_existing_file(tmp_path):
     assert path.read_text(encoding="utf-8") == "existing\n"
 
 
+def test_create_file_tool_creates_missing_parent_inside_cwd(tmp_path):
+    path = tmp_path / "nested" / "created.txt"
+    session = Session(cwd=str(tmp_path))
+
+    tool = CreateFileTool.make(session, ["nested/created.txt", "alpha\n"])
+    result = tool.call()
+
+    assert path.read_text(encoding="utf-8") == "alpha\n"
+    assert "* path: nested/created.txt" in result
+
+
+def test_create_file_tool_rejects_missing_parent_outside_cwd(tmp_path):
+    outside = tmp_path.parent / (tmp_path.name + "-outside") / "created.txt"
+    session = Session(cwd=str(tmp_path))
+
+    tool = CreateFileTool.make(session, [str(outside), "alpha\n"])
+
+    with pytest.raises(ToolCallError, match="No such file or directory"):
+        tool.call()
+    assert not outside.exists()
+
+
 def test_main_agent_can_execute_create_file_tool(tmp_path):
     path = tmp_path / "created.txt"
     session = Session(cwd=str(tmp_path))
@@ -49,4 +71,4 @@ def test_main_agent_can_execute_create_file_tool(tmp_path):
 
     assert path.read_text(encoding="utf-8") == "alpha\n"
     assert "<CreateFileToolResult>" in latest
-    assert agent.blackboard.verification_required is True
+    assert agent.blackboard.checks_required is True
diff --git a/tests/test_nanocode_edit_tool.py b/tests/test_nanocode_edit_tool.py
index 1aef327..5db5bd4 100644
--- a/tests/test_nanocode_edit_tool.py
+++ b/tests/test_nanocode_edit_tool.py
@@ -1,14 +1,26 @@
+import re
+
 import pytest
 
-from nanocode import EditTool, Session, ToolCallError
+from nanocode import Agent, EditTool, ReadTool, Session, ToolCallError
+
+
+def _anchors(read_result: str) -> list[str]:
+    return re.findall(r"^(\d+:[0-9a-f]{6})\|", read_result, re.MULTILINE)
+
+
+def _read_anchors(session: Session, filepath: str, range_token: str = "0,0") -> list[str]:
+    args = [filepath] if range_token == "0,0" else [filepath, range_token]
+    return _anchors(ReadTool.make(session, args).call())
 
 
-def test_edit_tool_replaces_unique_exact_match(tmp_path):
+def test_edit_file_replaces_range_from_read_anchors(tmp_path):
     path = tmp_path / "sample.txt"
     path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
+    anchors = _read_anchors(session, "sample.txt")
 
-    tool = EditTool.make(session, ["sample.txt", "beta", "BETA"])
+    tool = EditTool.make(session, ["sample.txt", [{"op": "replace", "start": anchors[1], "end": anchors[1], "content": "BETA\n"}]])
     display = tool.preview()
     result = tool.call()
 
@@ -20,81 +32,175 @@ def test_edit_tool_replaces_unique_exact_match(tmp_path):
         [
             "<EditToolResult>",
             "* path: sample.txt",
-            "* replacements: 1",
+            "* edits: 1",
+            "* range[1]: 1:2",
             "</EditToolResult>",
         ]
     )
 
 
-def test_edit_tool_rejects_repeated_find_text(tmp_path):
+def test_edit_file_accepts_full_hashline_anchor(tmp_path):
     path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\nbeta\n", encoding="utf-8")
+    path.write_text("alpha\nbeta\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
+    read_result = ReadTool.make(session, ["sample.txt"]).call()
+    full_hashline = next(line for line in read_result.splitlines() if line.endswith("|beta"))
 
-    tool = EditTool.make(session, ["sample.txt", "beta", "BETA"])
+    EditTool.make(session, ["sample.txt", [{"op": "replace", "start": full_hashline, "end": full_hashline, "content": "BETA\n"}]]).call()
 
-    assert "matched multiple times" in tool.preview()
-    with pytest.raises(ToolCallError, match="matched multiple times"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "alpha\nbeta\nbeta\n"
+    assert path.read_text(encoding="utf-8") == "alpha\nBETA\n"
 
 
-def test_edit_tool_raises_when_find_text_is_missing(tmp_path):
+def test_edit_file_inserts_and_deletes_atomically(tmp_path):
     path = tmp_path / "sample.txt"
-    path.write_text("alpha\n", encoding="utf-8")
+    path.write_text("alpha\nbeta\ngamma\ndelta\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
+    anchors = _read_anchors(session, "sample.txt")
 
-    tool = EditTool.make(session, ["sample.txt", "missing", "replacement"])
+    result = EditTool.make(
+        session,
+        [
+            "sample.txt",
+            [
+                {"op": "insert_after", "start": anchors[0], "content": "inserted\n"},
+                {"op": "delete", "start": anchors[2], "end": anchors[2], "content": ""},
+                {"op": "replace", "start": anchors[3], "end": anchors[3], "content": "DELTA\n"},
+            ],
+        ],
+    ).call()
 
-    with pytest.raises(ToolCallError, match="target `find` text not found"):
-        tool.call()
+    assert "* edits: 3" in result
+    assert path.read_text(encoding="utf-8") == "alpha\ninserted\nbeta\nDELTA\n"
 
 
-def test_edit_tool_creates_missing_file_with_empty_find(tmp_path):
-    path = tmp_path / "created.txt"
+def test_edit_file_replace_all_literal_text_without_anchors(tmp_path):
+    path = tmp_path / "sample.txt"
+    path.write_text("OldName alpha\nOldName beta\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    tool = EditTool.make(session, ["created.txt", "", "alpha\n"])
+    tool = EditTool.make(session, ["sample.txt", [{"op": "replace_all", "old": "OldName", "new": "NewName"}]])
     display = tool.preview()
     result = tool.call()
 
-    assert "+alpha\n" in display
-    assert path.read_text(encoding="utf-8") == "alpha\n"
-    assert result == "\n".join(
-        [
-            "<EditToolResult>",
-            "* path: created.txt",
-            "* created: true",
-            "</EditToolResult>",
-        ]
-    )
+    assert "-OldName alpha\n" in display
+    assert "+NewName alpha\n" in display
+    assert path.read_text(encoding="utf-8") == "NewName alpha\nNewName beta\n"
+    assert "* edits: 1" in result
+    assert "* replace_all[1]: 2 replacements" in result
 
 
-def test_edit_tool_rejects_wrong_arg_count_with_actionable_error(tmp_path):
+def test_edit_file_replace_all_rejects_no_match_or_mixed_edits(tmp_path):
+    path = tmp_path / "sample.txt"
+    path.write_text("alpha\nbeta\n", encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+    anchors = _read_anchors(session, "sample.txt")
+
+    with pytest.raises(ToolCallError, match="old text not found"):
+        EditTool.make(session, ["sample.txt", [{"op": "replace_all", "old": "missing", "new": "x"}]]).call()
+    with pytest.raises(ToolCallError, match="cannot be mixed"):
+        EditTool.make(
+            session,
+            [
+                "sample.txt",
+                [
+                    {"op": "replace_all", "old": "alpha", "new": "ALPHA"},
+                    {"op": "replace", "start": anchors[1], "end": anchors[1], "content": "BETA\n"},
+                ],
+            ],
+        ).call()
+    assert path.read_text(encoding="utf-8") == "alpha\nbeta\n"
+
+
+def test_edit_file_rejects_stale_anchor_without_writing(tmp_path):
+    path = tmp_path / "sample.txt"
+    path.write_text("alpha\nbeta\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
+    anchors = _read_anchors(session, "sample.txt")
+    path.write_text("alpha\nchanged\n", encoding="utf-8")
 
-    with pytest.raises(ToolCallError, match=r'Edit args error: got 0 args; expected \["filepath", "find", "replace"\]'):
-        EditTool.make(session, [])
+    tool = EditTool.make(session, ["sample.txt", [{"op": "replace", "start": anchors[1], "end": anchors[1], "content": "BETA\n"}]])
 
+    assert "stale anchor" in tool.preview()
+    with pytest.raises(ToolCallError, match="stale anchor"):
+        tool.call()
+    assert path.read_text(encoding="utf-8") == "alpha\nchanged\n"
 
-def test_edit_tool_rejects_empty_find_text_for_existing_file(tmp_path):
+
+def test_edit_file_rejects_overlapping_edits_without_writing(tmp_path):
     path = tmp_path / "sample.txt"
-    path.write_text("alpha\n", encoding="utf-8")
+    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
+    anchors = _read_anchors(session, "sample.txt")
+
+    tool = EditTool.make(
+        session,
+        [
+            "sample.txt",
+            [
+                {"op": "replace", "start": anchors[0], "end": anchors[1], "content": "AB\n"},
+                {"op": "replace", "start": anchors[1], "end": anchors[2], "content": "BG\n"},
+            ],
+        ],
+    )
+
+    with pytest.raises(ToolCallError, match="overlap"):
+        tool.call()
+    assert path.read_text(encoding="utf-8") == "alpha\nbeta\ngamma\n"
 
-    tool = EditTool.make(session, ["sample.txt", "", "replacement"])
 
-    assert "empty find creates missing files only" in tool.preview()
-    with pytest.raises(ToolCallError, match="empty find creates missing files only"):
+def test_edit_file_rejects_missing_files(tmp_path):
+    session = Session(cwd=str(tmp_path))
+    tool = EditTool.make(session, ["missing.txt", [{"op": "insert_after", "start": "0:abcdef", "content": "alpha\n"}]])
+
+    assert "use CreateFile" in tool.preview()
+    with pytest.raises(ToolCallError, match="use CreateFile"):
         tool.call()
-    assert path.read_text(encoding="utf-8") == "alpha\n"
 
 
-def test_edit_tool_display_falls_back_when_find_text_is_missing(tmp_path):
+def test_edit_file_rejects_wrong_arg_shape(tmp_path):
+    session = Session(cwd=str(tmp_path))
+
+    with pytest.raises(ToolCallError, match="requires args: filepath, edits"):
+        EditTool.make(session, [])
+    with pytest.raises(ToolCallError, match="edits cannot be empty"):
+        EditTool.make(session, ["sample.txt", []])
+    with pytest.raises(ToolCallError, match="edit op must be"):
+        EditTool.make(session, ["sample.txt", [{"op": "move", "start": "0:abcdef"}]])
+    with pytest.raises(ToolCallError, match="replace_all requires old and new"):
+        EditTool.make(session, ["sample.txt", [{"op": "replace_all", "old": "alpha"}]])
+    with pytest.raises(ToolCallError, match="replace_all old cannot be empty"):
+        EditTool.make(session, ["sample.txt", [{"op": "replace_all", "old": "", "new": "beta"}]])
+
+
+def test_edit_file_schema_describes_two_structured_args():
+    args_schema = EditTool.tool_schema()["function"]["parameters"]["properties"]["args"]
+
+    assert args_schema["minItems"] == 2
+    assert args_schema["maxItems"] == 2
+    assert "Do not pass edits as a JSON string" in args_schema["description"]
+    edit_schemas = args_schema["items"]["anyOf"][1]["items"]["anyOf"]
+    assert edit_schemas[0]["properties"]["op"]["enum"] == ["replace", "delete", "insert_before", "insert_after"]
+    assert edit_schemas[1]["properties"]["op"]["enum"] == ["replace_all"]
+
+
+def test_agent_executes_edit_file_with_structured_args(tmp_path):
     path = tmp_path / "sample.txt"
-    path.write_text("alpha\n", encoding="utf-8")
+    path.write_text("alpha\nbeta\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
+    anchors = _read_anchors(session, "sample.txt")
+    agent = Agent(session)
 
-    tool = EditTool.make(session, ["sample.txt", "missing", "replacement"])
+    latest = agent.execute_tool_calls(
+        [
+            {
+                "name": "Edit",
+                "intention": "replace beta",
+                "args": ["sample.txt", [{"op": "replace", "start": anchors[1], "end": anchors[1], "content": "BETA\n"}]],
+            }
+        ],
+        confirm=lambda call, tool: True,
+    )
 
-    assert tool.preview() == f'Edit({path}, find="missing")'
+    assert path.read_text(encoding="utf-8") == "alpha\nBETA\n"
+    assert "<EditToolResult>" in latest
+    assert agent.blackboard.checks_required is True
diff --git a/tests/test_nanocode_list_dir_tool.py b/tests/test_nanocode_list_tool.py
similarity index 75%
rename from tests/test_nanocode_list_dir_tool.py
rename to tests/test_nanocode_list_tool.py
index 89591c4..07a317d 100644
--- a/tests/test_nanocode_list_dir_tool.py
+++ b/tests/test_nanocode_list_tool.py
@@ -1,6 +1,6 @@
 import pytest
 
-from nanocode import ListDirTool, Session, ToolCallError
+from nanocode import ListTool, Session, ToolCallError
 
 
 def test_list_dir_tool_lists_filtered_entries_relative_to_cwd(tmp_path):
@@ -10,14 +10,14 @@ def test_list_dir_tool_lists_filtered_entries_relative_to_cwd(tmp_path):
     (src / "notes.md").write_text("notes\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    tool = ListDirTool.make(session, ["src", "*.py"])
+    tool = ListTool.make(session, ["src", "*.py"])
 
     assert tool.requires_confirmation(session) is False
     assert tool.call() == "\n".join(
         [
-            "<ListDirToolResult>",
+            "<ListToolResult>",
             "* (file): src/app.py",
-            "</ListDirToolResult>",
+            "</ListToolResult>",
         ]
     )
 
@@ -29,16 +29,16 @@ def test_list_dir_tool_sorts_dirs_before_files_then_by_name(tmp_path):
     (tmp_path / "a_dir").mkdir()
     session = Session(cwd=str(tmp_path))
 
-    result = ListDirTool.make(session, ["."]).call()
+    result = ListTool.make(session, ["."]).call()
 
     assert result == "\n".join(
         [
-            "<ListDirToolResult>",
+            "<ListToolResult>",
             "* (dir): a_dir",
             "* (dir): z_dir",
             "* (file): a.txt",
             "* (file): b.txt",
-            "</ListDirToolResult>",
+            "</ListToolResult>",
         ]
     )
 
@@ -47,13 +47,13 @@ def test_list_dir_tool_defaults_to_cwd(tmp_path):
     (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    result = ListDirTool.make(session, []).call()
+    result = ListTool.make(session, []).call()
 
     assert result == "\n".join(
         [
-            "<ListDirToolResult>",
+            "<ListToolResult>",
             "* (file): sample.txt",
-            "</ListDirToolResult>",
+            "</ListToolResult>",
         ]
     )
 
@@ -63,7 +63,7 @@ def test_list_dir_tool_rejects_non_directory(tmp_path):
     path.write_text("alpha\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    tool = ListDirTool.make(session, ["sample.txt"])
+    tool = ListTool.make(session, ["sample.txt"])
 
     with pytest.raises(ToolCallError, match="not a directory"):
         tool.call()
diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py
index b6a0138..d9fbb40 100644
--- a/tests/test_nanocode_loop.py
+++ b/tests/test_nanocode_loop.py
@@ -1,18 +1,22 @@
-from prompt_toolkit.completion import CompleteEvent, WordCompleter
+from prompt_toolkit.completion import CompleteEvent
 from prompt_toolkit.document import Document
 import time
 
 import nanocode
-from nanocode import AgentLoop, CommandLexer, Config, ConfigFile, Blackboard, ParsedToolCall, ReferenceFileCompleter, RuntimeSettings, Session, StatusBar, ToolCallDisplayFormatter
+from nanocode import AgentLoop, CommandLexer, Config, ConfigFile, Blackboard, ParsedToolCall, RuntimeSettings, Session, StatusBar, ToolCallDisplayFormatter
 
 
-def make_session(tmp_path, *, model: str = "", compact_at: int = 50, yolo: bool = False, plan_mode: bool = False) -> Session:
+def make_session(tmp_path, *, model: str = "", compact_at: int = 50, yolo: bool = False) -> Session:
     data = {
         "provider": {"active": "default", "default": {"model": model}},
         "paths": {"data_dir": str(tmp_path / ".nanocode")},
         "runtime": {"compact_at": compact_at},
     }
-    return Session(cwd=str(tmp_path), config=Config.from_dict(data), settings=RuntimeSettings.from_dict(data, yolo=yolo, plan_mode=plan_mode))
+    return Session(cwd=str(tmp_path), config=Config.from_dict(data), settings=RuntimeSettings.from_dict(data, yolo=yolo))
+
+
+def _status_text(bar: StatusBar) -> str:
+    return "".join(text for _, text in bar._fragments(0.0, now=time.monotonic(), show_sweep=False, show_elapsed=False))
 
 
 def test_session_reports_missing_required_config(tmp_path):
@@ -45,13 +49,13 @@ def test_session_loads_user_rules_from_project_file(tmp_path, monkeypatch):
     assert session.state.user_rules.format() == "# User Rules\n\n- Prompt-only changes do not need tests."
 
 
-def test_runtime_settings_loads_modes_from_config():
-    data = {"runtime": {"yolo": True, "plan_mode": True}}
+def test_runtime_settings_loads_yolo_from_config():
+    data = {"runtime": {"yolo": True}}
 
     settings = RuntimeSettings.from_dict(data)
 
     assert settings.yolo is True
-    assert settings.plan_mode is True
+    assert not hasattr(settings, "plan_mode")
 
 
 def test_runtime_settings_loads_auto_clean_recent():
@@ -79,15 +83,17 @@ def test_init_config_file_writes_default_toml(tmp_path):
     assert config["provider"]["default"]["url"] == ""
     assert "available_models" not in config["provider"]["default"]
     assert "temperature" not in config["provider"]["default"]
-    assert "reasoning_payload" not in config["provider"]["default"]
+    assert config["provider"]["default"]["reasoning"] == "medium"
+    assert "chat_reasoning" not in config["provider"]["default"]
     assert config["provider"]["default"]["timeout"] == 180
     assert config["provider"]["default"]["first_token_timeout"] == 90
     assert config["runtime"]["compact_at"] == 50
-    assert config["runtime"]["plan_timeout"] == 360
-    assert config["runtime"]["plan_first_token_timeout"] == 180
-    assert config["runtime"]["auto_clean_recent"] == "3d"
+    assert config["runtime"]["context_budget"] == "medium"
+    assert config["runtime"]["auto_clean_recent"] == "1d"
     assert config["runtime"]["yolo"] is False
-    assert config["runtime"]["plan_mode"] is False
+    assert "plan_timeout" not in config["runtime"]
+    assert "plan_first_token_timeout" not in config["runtime"]
+    assert "plan_mode" not in config["runtime"]
 
 
 def test_main_init_config_uses_config_argument(tmp_path, capsys):
@@ -101,6 +107,18 @@ def test_main_init_config_uses_config_argument(tmp_path, capsys):
     assert "Created config: " + str(config_path) in output.out
 
 
+def test_main_rejects_plan_argument(capsys):
+    try:
+        nanocode.main(["--plan"])
+    except SystemExit as error:
+        assert error.code == 2
+    else:
+        raise AssertionError("--plan should be rejected by argparse")
+
+    output = capsys.readouterr()
+    assert "unrecognized arguments: --plan" in output.err
+
+
 def test_main_loads_config_argument(tmp_path, monkeypatch):
     config_path = tmp_path / "custom.toml"
     config_path.write_text(
@@ -127,7 +145,7 @@ def fake_run(self):
 
     monkeypatch.setattr(nanocode.AgentLoop, "run", fake_run)
 
-    result = nanocode.main(["--config", str(config_path), "--plan"])
+    result = nanocode.main(["--config", str(config_path)])
 
     assert result == 0
     assert sessions[0].config.provider.url == "https://example.test/v1"
@@ -135,7 +153,7 @@ def fake_run(self):
     assert sessions[0].config.provider.model == "custom-model"
     assert sessions[0].config.provider.available_models == ("custom-model", "other-model")
     assert sessions[0].config.data_dir == ".custom-nanocode"
-    assert sessions[0].settings.plan_mode is True
+    assert not hasattr(sessions[0].settings, "plan_mode")
 
 
 def test_status_bar_text_has_visible_sweep_marker(tmp_path):
@@ -156,7 +174,7 @@ def test_status_bar_text_has_visible_sweep_marker(tmp_path):
     assert "turn:1.2s" in text
     assert all(style.startswith("#") for style, _ in fragments)
     assert len({style for style, _ in fragments}) > 3
-    snapshot = bar.snapshot()
+    snapshot = _status_text(bar)
     assert snapshot == "model (medium) | ctx:0/9 | tool:3 | tok:last:42 sess:1k"
     assert ">" not in snapshot
 
@@ -189,10 +207,10 @@ def test_status_bar_shows_current_model_call_number(tmp_path):
 
 
 def test_status_bar_shows_active_modes(tmp_path):
-    session = make_session(tmp_path, model="provider/model", yolo=True, plan_mode=True)
+    session = make_session(tmp_path, model="provider/model", yolo=True)
     bar = StatusBar(session)
 
-    assert bar.snapshot() == "model (medium) | yolo | plan | ctx:0/50 | tool:0 | tok:last:- sess:-"
+    assert _status_text(bar) == "model (medium) | yolo | ctx:0/50 | tool:0 | tok:last:- sess:-"
 
 
 def test_status_bar_shows_recent_status_notice(tmp_path):
@@ -201,11 +219,11 @@ def test_status_bar_shows_recent_status_notice(tmp_path):
     session.state.status_notice_until = time.monotonic() + 5
     bar = StatusBar(session)
 
-    assert bar.snapshot().endswith(" | err:format")
+    assert "model (medium) | err:format | ctx:" in _status_text(bar)
 
     session.state.status_notice_until = 0
 
-    assert "err:format" not in bar.snapshot()
+    assert "err:format" not in _status_text(bar)
 
 
 def test_agent_loop_highlights_only_diff_previews(tmp_path):
@@ -269,18 +287,6 @@ def __init__(self):
     assert captured == ["  Read sample.txt 0:1"]
 
 
-def test_agent_loop_live_preview_interrupt_hint_latches(tmp_path):
-    class FakeAgent:
-        def __init__(self):
-            self.session = make_session(tmp_path, model="model")
-
-    loop = AgentLoop(FakeAgent(), output_fn=lambda message: None)
-    loop._live_preview_started_at = time.monotonic() - loop.LIVE_PREVIEW_INTERRUPT_HINT_AFTER - 0.1
-
-    assert loop._live_preview_interrupt_hint(time.monotonic()) is True
-    assert loop._live_preview_interrupt_hint(time.monotonic()) is True
-
-
 def test_agent_loop_renders_tool_result_context_as_weak_status(tmp_path):
     class FakeAgent:
         def __init__(self):
@@ -314,11 +320,11 @@ def __init__(self):
 
     loop = AgentLoop(FakeAgent(), output_fn=lambda message: None)
 
-    segments = loop._compact_state_segments("Hypotheses + Known Updated\nHypotheses\n  1. h1\nKnown\n  1. fact")
+    segments = loop._compact_state_segments("Leads + Facts Updated\nLeads\n  1. h1\nFacts\n  1. fact")
 
-    assert ("bold ansicyan", "Hypotheses + Known Updated\n") in segments
-    assert ("ansicyan", "Hypotheses\n") in segments
-    assert ("ansicyan", "Known\n") in segments
+    assert ("bold ansicyan", "Leads + Facts Updated\n") in segments
+    assert ("ansicyan", "Leads\n") in segments
+    assert ("ansicyan", "Facts\n") in segments
 
 
 def test_agent_loop_cancelled_message_mentions_context_is_kept(tmp_path):
@@ -353,20 +359,19 @@ def __init__(self):
             self.session = make_session(tmp_path, model="model", yolo=True)
 
     class FakeTool:
+        EFFECT = nanocode.ToolEffect.EDIT
+
         def preview(self):
             return "preview"
 
-        def effect(self):
-            return nanocode.ToolEffect.EDIT
-
     outputs = []
     loop = AgentLoop(FakeAgent(), output_fn=outputs.append)
-    call = ParsedToolCall(name="Edit", intention="edit sample", args=["sample.txt", "old", "new"])
+    call = ParsedToolCall(name="Edit", intention="edit sample", args=["sample.txt", [{"op": "replace", "start": "0:abcdef", "end": "0:abcdef", "content": "new\n"}]])
 
     loop._show_auto_tool_call(call, FakeTool())
 
     assert any("Auto Tool Call | auto approved" in output for output in outputs)
-    assert any('Run     Edit("sample.txt", "old", "new")' in output for output in outputs)
+    assert any('Run     Edit("sample.txt", ' in output for output in outputs)
     assert any("Why     edit sample" in output for output in outputs)
     assert any("Preview\npreview" in output for output in outputs)
 
@@ -377,31 +382,33 @@ def test_agent_loop_command_completer_matches_slash_commands():
     slash_completions = list(completer.get_completions(Document("/"), CompleteEvent(completion_requested=True)))
     config_completions = list(completer.get_completions(Document("/con"), CompleteEvent(completion_requested=True)))
     set_key_completions = list(completer.get_completions(Document("/set provider."), CompleteEvent(completion_requested=True)))
-    set_bool_completions = list(completer.get_completions(Document("/set provider.reasoning "), CompleteEvent(completion_requested=True)))
-    set_effort_completions = list(completer.get_completions(Document("/set provider.effort h"), CompleteEvent(completion_requested=True)))
-    set_plan_timeout_completions = list(completer.get_completions(Document("/set runtime.plan_"), CompleteEvent(completion_requested=True)))
+    set_reasoning_completions = list(completer.get_completions(Document("/set provider.reasoning h"), CompleteEvent(completion_requested=True)))
+    set_chat_reasoning_completions = list(completer.get_completions(Document("/set provider.chat_reasoning rea"), CompleteEvent(completion_requested=True)))
     model_completions = list(nanocode.CommandCompleter(models=["qwen3", "deepseek"]).get_completions(Document("/model q"), CompleteEvent(completion_requested=True)))
-    plan_completions = list(completer.get_completions(Document("/plan "), CompleteEvent(completion_requested=True)))
+    api_completions = list(completer.get_completions(Document("/api r"), CompleteEvent(completion_requested=True)))
+    reason_payload_completions = list(completer.get_completions(Document("/reason-payload rea"), CompleteEvent(completion_requested=True)))
 
     assert "/help" in [completion.text for completion in slash_completions]
-    assert "/plan" in [completion.text for completion in slash_completions]
+    assert "/api" in [completion.text for completion in slash_completions]
+    assert "/reason-payload" in [completion.text for completion in slash_completions]
+    assert "/plan" not in [completion.text for completion in slash_completions]
     assert "/config" in [completion.text for completion in config_completions]
     assert "provider.reasoning" in [completion.text for completion in set_key_completions]
-    assert [completion.text for completion in set_bool_completions] == ["on", "off"]
-    assert [completion.text for completion in set_effort_completions] == ["high"]
-    assert {completion.text for completion in set_plan_timeout_completions} == {"runtime.plan_timeout", "runtime.plan_first_token_timeout"}
+    assert [completion.text for completion in set_reasoning_completions] == ["high"]
+    assert [completion.text for completion in set_chat_reasoning_completions] == ["reasoning", "reasoning_effort"]
     assert [completion.text for completion in model_completions] == ["qwen3"]
-    assert [completion.text for completion in plan_completions] == ["on", "off"]
+    assert [completion.text for completion in api_completions] == ["responses"]
+    assert [completion.text for completion in reason_payload_completions] == ["reasoning", "reasoning_effort"]
 
 
 def test_command_lexer_highlights_known_command_prefix_only():
     lexer = CommandLexer()
 
-    known = lexer.lex_document(Document("/plan how?"))(0)
+    removed = lexer.lex_document(Document("/plan how?"))(0)
     unknown = lexer.lex_document(Document("/somecommand"))(0)
     spaced = lexer.lex_document(Document(" /plan how?"))(0)
 
-    assert known == [("class:command-input", "/plan"), ("", " how?")]
+    assert removed == [("", "/plan how?")]
     assert unknown == [("", "/somecommand")]
     assert spaced == [("", " /plan how?")]
 
@@ -419,25 +426,6 @@ def test_agent_loop_command_completer_completes_provider_names():
     assert {c.text for c in all_completions} == {"qwen", "openai"}
 
 
-def test_reference_file_completer_completes_at_paths_and_keeps_command_fallback(tmp_path):
-    (tmp_path / "README.md").write_text("hello", encoding="utf-8")
-    (tmp_path / "src").mkdir()
-    (tmp_path / "src" / "main.py").write_text("print('hello')", encoding="utf-8")
-
-    completer = ReferenceFileCompleter(str(tmp_path), WordCompleter(["/help"], WORD=True))
-    event = CompleteEvent(completion_requested=True)
-
-    file_completions = list(completer.get_completions(Document("see @READ"), event))
-    dir_completions = list(completer.get_completions(Document("see @sr"), event))
-    nested_completions = list(completer.get_completions(Document("see @src/ma"), event))
-    command_completions = list(completer.get_completions(Document("/he"), event))
-
-    assert "README.md" in [completion.text for completion in file_completions]
-    assert "src/" in [completion.text for completion in dir_completions]
-    assert "src/main.py" in [completion.text for completion in nested_completions]
-    assert "/help" in [completion.text for completion in command_completions]
-
-
 def test_agent_loop_confirmation_accepts_refusal_reason(tmp_path):
     class FakeAgent:
         def __init__(self):
@@ -487,6 +475,26 @@ def tcflush(fd, queue):
     assert outputs == ["Answer: yes"]
 
 
+def test_model_retry_shortcut_signal_only_retries_active_model_request(tmp_path):
+    session = make_session(tmp_path, model="model")
+    shortcut = nanocode.ModelRetryShortcut(session)
+
+    shortcut._handle_signal(0, None)
+
+    assert session.state.manual_model_retry_requested is False
+
+    session.state.current_model_call_started_at = 1.0
+    try:
+        shortcut._handle_signal(0, None)
+    except KeyboardInterrupt:
+        interrupted = True
+    else:
+        interrupted = False
+
+    assert interrupted is True
+    assert session.state.manual_model_retry_requested is True
+
+
 def test_agent_loop_dispatches_commands_and_user_input(tmp_path):
     class FakeAgent:
         def __init__(self):
@@ -494,11 +502,11 @@ def __init__(self):
             self.blackboard = Blackboard()
             self.runs = []
 
-        def run(self, user_input, *, confirm=None, on_auto_approve=None, on_message=None):
+        def run(self, user_input, *, confirm=None, on_auto_approve=None, on_message=None, poll_user_input=None):
             self.runs.append(user_input)
             if on_message is not None:
                 on_message("assistant response")
-            return {"actions": [{"type": "chat", "text": "assistant response"}]}
+            return {"actions": [], "_assistant_text": "assistant response"}
 
     inputs = iter(["/status", "hello", "/exit"])
     outputs = []
@@ -508,11 +516,266 @@ def run(self, user_input, *, confirm=None, on_auto_approve=None, on_message=None
 
     assert result == 0
     assert any("nanocode - AI coding assistant" in output for output in outputs)
-    assert any("model: model reasoning=medium stream=on" in output for output in outputs)
+    assert any("model: model api=chat(auto) reasoning=medium(off) stream=on" in output for output in outputs)
     assert "assistant response" in outputs
     assert loop.agent.runs == ["hello"]
 
 
+def test_agent_loop_welcome_suggests_index_when_missing(tmp_path, monkeypatch):
+    monkeypatch.setattr(nanocode, "_code_index_status", lambda session: ("missing", ""))
+
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+
+    outputs = []
+    AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=outputs.append)._print_welcome()
+
+    assert any("tip: /index initializes indexed code tools" in output for output in outputs)
+
+
+def test_agent_loop_starts_existing_index_refresh_async(tmp_path, monkeypatch):
+    refreshed = []
+
+    def refresh_existing(session, *, progress=None):
+        refreshed.append(progress is not None)
+        if progress is not None:
+            progress("file", done=1, total=2)
+        return True
+
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+            self.blackboard = Blackboard()
+
+    monkeypatch.setattr(nanocode, "_code_index_refresh_existing_async", refresh_existing)
+    outputs = []
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "/exit", output_fn=outputs.append)
+
+    assert loop.run() == 0
+    assert refreshed == [True]
+    assert loop.agent.session.state.status_notice == "index:parse 1/2"
+
+
+def test_agent_loop_consumes_queued_input_before_prompt(tmp_path):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+            self.blackboard = Blackboard()
+            self.runs = []
+
+        def run(self, user_input, **kwargs):
+            self.runs.append(user_input)
+
+    inputs = iter(["/exit"])
+    output = []
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: next(inputs), output_fn=output.append)
+
+    loop._append_queued_input(" queued message ")
+
+    assert loop.run() == 0
+    assert loop.agent.runs == ["queued message"]
+    assert "sent: queued message" in output
+
+
+def test_agent_loop_run_agent_uses_runtime_ui_without_status_thread(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+            self.blackboard = Blackboard()
+            self.runs = []
+            self.poll_user_input = None
+
+        def run(self, user_input, **kwargs):
+            self.runs.append(user_input)
+            self.poll_user_input = kwargs["poll_user_input"]
+
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=lambda message: None)
+    calls = []
+    monkeypatch.setattr(loop, "_start_runtime_ui", lambda: calls.append("start-ui") or True)
+    monkeypatch.setattr(loop, "_stop_runtime_ui", lambda: calls.append("stop-ui") or True)
+    monkeypatch.setattr(loop.status_bar, "reset_timer", lambda: calls.append("reset"))
+    monkeypatch.setattr(loop.status_bar, "resume", lambda: calls.append("resume"))
+    monkeypatch.setattr(loop.status_bar, "pause", lambda: calls.append("pause"))
+    monkeypatch.setattr(nanocode, "_code_index_update_pending", lambda session: calls.append("index"))
+
+    loop._run_agent("hello")
+
+    assert loop.agent.runs == ["hello"]
+    assert loop.agent.poll_user_input.__self__ is loop
+    assert loop.agent.poll_user_input.__func__ is AgentLoop._pop_queued_input
+    assert calls == ["reset", "start-ui", "stop-ui", "index", "pause"]
+
+
+def test_agent_loop_clears_queued_input_on_cancel(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+            self.blackboard = Blackboard()
+
+        def run(self, user_input, **kwargs):
+            raise KeyboardInterrupt
+
+        def cancel_current_goal(self):
+            pass
+
+    output = []
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=output.append)
+    monkeypatch.setattr(loop, "_start_runtime_ui", lambda: False)
+    loop._append_queued_input("queued message")
+
+    loop._run_agent("hello")
+
+    assert loop._pop_queued_input() is None
+    assert "queued cleared: 1" in output
+
+
+def test_agent_loop_runtime_ui_empty_enter_only_refreshes(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+
+    class FakePromptApp:
+        def __init__(self):
+            self.invalidated = 0
+            self.background_tasks = []
+
+        def invalidate(self):
+            self.invalidated += 1
+
+        def create_background_task(self, task):
+            self.background_tasks.append(task)
+
+    class FakeEvent:
+        def __init__(self, app):
+            self.app = app
+
+    def handler(bindings, key):
+        return next(binding.handler for binding in bindings.bindings if binding.keys == (key,))
+
+    prompt_app = FakePromptApp()
+
+    class FakeApplication:
+        def __init__(self, **kwargs):
+            self.bindings = kwargs["key_bindings"]
+
+        def run(self, handle_sigint=False):
+            handler(self.bindings, nanocode.Keys.ControlM)(FakeEvent(prompt_app))
+
+    terminal_calls = []
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=lambda message: None)
+    monkeypatch.setattr(nanocode, "Application", FakeApplication)
+    monkeypatch.setattr(nanocode, "run_in_terminal", lambda *args, **kwargs: terminal_calls.append((args, kwargs)))
+
+    loop._run_runtime_ui()
+
+    assert loop._pop_queued_input() is None
+    assert prompt_app.invalidated == 1
+    assert prompt_app.background_tasks == []
+    assert terminal_calls == []
+
+
+def test_agent_loop_runtime_ui_pause_restarts_for_confirm(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=lambda message: None)
+    calls = []
+    monkeypatch.setattr(loop, "_stop_runtime_ui", lambda: calls.append("stop-ui") or True)
+    monkeypatch.setattr(loop, "_start_runtime_ui", lambda: calls.append("start-ui") or True)
+    monkeypatch.setattr(loop, "_with_status_paused", lambda action: action())
+    monkeypatch.setattr(loop, "_print_tool_call_display", lambda *args, **kwargs: calls.append("display"))
+    monkeypatch.setattr(loop, "_wait_confirm", lambda *args, **kwargs: True)
+
+    result = loop._confirm_tool_call(ParsedToolCall("Edit", "edit", ["a", "b", "c"]), object())
+
+    assert result is True
+    assert calls == ["stop-ui", "display", "start-ui"]
+
+
+def test_agent_loop_bash_live_preview_keeps_latest_lines(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+
+    class FakeApp:
+        def __init__(self):
+            self.invalidated = 0
+
+        def invalidate(self):
+            self.invalidated += 1
+
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "")
+    app = FakeApp()
+    loop._runtime_ui_app = app
+    printed = []
+    monkeypatch.setattr(nanocode, "print_formatted_text", lambda formatted, **kwargs: printed.append(list(formatted)))
+
+    loop._show_tool_live_output("stdout", "\n".join("line" + str(index) for index in range(8)))
+
+    assert app.invalidated == 1
+    assert loop._has_tool_live_preview() is True
+    assert loop._tool_live_preview_fragments() == [("class:bash-preview", "line2\nline3\nline4\nline5\nline6\nline7")]
+
+    loop._show_tool_live_output("", "")
+
+    assert app.invalidated == 2
+    assert loop._has_tool_live_preview() is False
+    assert printed == [[("ansibrightblack", "line2\nline3\nline4\nline5\nline6\nline7\n")]]
+
+
+def test_agent_loop_runtime_interrupt_requests_sigint(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+
+    class FakeApp:
+        def __init__(self):
+            self.exited = False
+
+        def exit(self):
+            self.exited = True
+
+    app = FakeApp()
+    calls = []
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=lambda message: None)
+    loop._runtime_ui_app = app
+    monkeypatch.setattr(nanocode.os, "kill", lambda pid, sig: calls.append((pid, sig)))
+
+    loop._interrupt_current_turn(exit_after=True)
+
+    assert loop._exit_after_current_turn is True
+    assert app.exited is True
+    assert calls == [(nanocode.os.getpid(), nanocode.signal.SIGINT)]
+
+
+def test_agent_loop_runtime_retry_requests_model_retry(tmp_path, monkeypatch):
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="model")
+
+    class FakeApp:
+        def __init__(self):
+            self.exited = False
+
+        def exit(self):
+            self.exited = True
+
+    app = FakeApp()
+    calls = []
+    loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "", output_fn=lambda message: None)
+    loop._runtime_ui_app = app
+    loop.agent.session.state.current_model_call_started_at = 1.0
+    monkeypatch.setattr(nanocode.os, "kill", lambda pid, sig: calls.append((pid, sig)))
+
+    loop._retry_current_model_call()
+
+    assert loop.agent.session.state.manual_model_retry_requested is True
+    assert app.exited is False
+    assert calls == [(nanocode.os.getpid(), nanocode.signal.SIGINT)]
+
+
 def test_agent_loop_model_command_prompts_for_reasoning_effort(tmp_path):
     class FakeAgent:
         def __init__(self):
@@ -523,8 +786,7 @@ def __init__(self):
 
     assert loop.run() == 0
     assert loop.agent.session.config.provider.model == "new-model"
-    assert loop.agent.session.config.provider.reasoning is True
-    assert loop.agent.session.config.provider.reasoning_effort == "high"
+    assert loop.agent.session.config.provider.reasoning == "high"
 
 
 def test_agent_loop_model_command_prompts_for_model_when_available(tmp_path):
@@ -563,16 +825,14 @@ def test_agent_loop_model_command_can_keep_reasoning_effort(tmp_path):
     class FakeAgent:
         def __init__(self):
             self.session = make_session(tmp_path, model="old")
-            self.session.config.provider.reasoning = False
-            self.session.config.provider.reasoning_effort = "xhigh"
+            self.session.config.provider.reasoning = "xhigh"
 
     inputs = iter(["/model new-model", "", "/exit"])
     loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: next(inputs), output_fn=lambda message: None)
 
     assert loop.run() == 0
     assert loop.agent.session.config.provider.model == "new-model"
-    assert loop.agent.session.config.provider.reasoning is False
-    assert loop.agent.session.config.provider.reasoning_effort == "xhigh"
+    assert loop.agent.session.config.provider.reasoning == "xhigh"
 
 
 def test_agent_loop_choice_prompt_styles_selected_effort_and_erases_when_done(tmp_path, monkeypatch):
@@ -606,13 +866,6 @@ def run(self):
     assert attrs.bold is True
     assert captured["erase_when_done"] is True
     assert captured["layout"] is not None
-    assert loop._choice_initial_index(("off", "minimal", "low", "medium"), "medium") == 3
-
-    loop._select_model(("old", "new"), "new")
-    assert loop._choice_initial_index(("old", "new"), "new") == 1
-
-    loop._select_provider(("one", "two"), "two")
-    assert loop._choice_initial_index(("one", "two"), "two") == 1
 
 
 def test_agent_loop_choice_prompt_filters_with_slash_search(tmp_path):
@@ -643,6 +896,59 @@ def __init__(self):
     assert "old" not in outputs[-1]
 
 
+def test_agent_loop_choice_prompt_enter_confirms_search_before_select(tmp_path, monkeypatch):
+    class FakeStdin:
+        @staticmethod
+        def isatty():
+            return True
+
+    class FakeAgent:
+        def __init__(self):
+            self.session = make_session(tmp_path, model="old")
+
+    class FakePromptApp:
+        result = None
+
+        def invalidate(self):
+            pass
+
+        def exit(self, result=None, exception=None):
+            if exception is not None:
+                raise exception
+            self.result = result
+
+    def handler(bindings, key):
+        return next(binding.handler for binding in bindings.bindings if binding.keys == (key,))
+
+    class FakeEvent:
+        def __init__(self, app, data=""):
+            self.app = app
+            self.data = data
+
+    class FakeApplication:
+        def __init__(self, **kwargs):
+            self.bindings = kwargs["key_bindings"]
+
+        def run(self):
+            app = FakePromptApp()
+            handler(self.bindings, "/")(FakeEvent(app, "/"))
+            any_key = handler(self.bindings, nanocode.Keys.Any)
+            for char in "remote":
+                any_key(FakeEvent(app, char))
+            enter = handler(self.bindings, nanocode.Keys.ControlM)
+            enter(FakeEvent(app, "\r"))
+            assert app.result is None
+            enter(FakeEvent(app, "\r"))
+            return app.result
+
+    monkeypatch.setattr(nanocode.sys, "stdin", FakeStdin())
+    monkeypatch.setattr(nanocode, "Application", FakeApplication)
+
+    loop = AgentLoop(FakeAgent(), prompt_session=object())
+
+    assert loop._select_choice("Model", ("old", "remote-a", "remote-b"), current="old") == "remote-a"
+
+
 def test_agent_loop_uses_prompt_toolkit_session(tmp_path):
     calls = []
 
diff --git a/tests/test_nanocode_read_tool.py b/tests/test_nanocode_read_tool.py
index 5e4e2de..ee13a81 100644
--- a/tests/test_nanocode_read_tool.py
+++ b/tests/test_nanocode_read_tool.py
@@ -4,20 +4,25 @@
 from nanocode import ReadTool, Session, ToolCallError
 
 
+def _hashline(index: int, text: str) -> str:
+    return f"{index}:{nanocode._line_hash(text)}|{text}"
+
+
 def test_read_tool_reads_requested_line_range(tmp_path):
     path = tmp_path / "sample.txt"
     path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    tool = ReadTool.make(session, ["sample.txt", "1", "3"])
+    tool = ReadTool.make(session, ["sample.txt", "1,3"])
     result = tool.call()
 
     assert tool.requires_confirmation(session) is False
     assert result.startswith("<ReadToolResult>")
     assert "<range>1:3</range>" in result
-    assert "<fingerprint>" in result
-    assert "beta\ngamma\n" in result
-    assert "alpha" not in result
+    assert "<fingerprint>" not in result
+    assert "<content hashline-numbered>" in result
+    assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result
+    assert "|alpha" not in result
 
 
 def test_read_tool_rejects_empty_args_with_actionable_error(tmp_path):
@@ -48,10 +53,28 @@ def test_read_tool_reads_multiple_line_range_tokens(tmp_path):
     assert "1:2, 3:5" in tool.preview()
     assert "<range>1:2</range>" in result
     assert "<range>3:5</range>" in result
-    assert "one\n" in result
-    assert "three\nfour\n" in result
-    assert "zero\n" not in result
-    assert "two\n" not in result
+    assert _hashline(1, "one\n") in result
+    assert _hashline(3, "three\n") + _hashline(4, "four\n") in result
+    assert "|zero" not in result
+    assert "|two" not in result
+
+
+def test_read_tool_reads_multiple_files(tmp_path):
+    (tmp_path / "pyproject.toml").write_text("[project]\nname = \"demo\"\n", encoding="utf-8")
+    (tmp_path / "uv.lock").write_text("version = 1\n", encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+
+    tool = ReadTool.make(session, ["pyproject.toml", "uv.lock"])
+    result = tool.call()
+
+    assert tool.filepaths == [str(tmp_path / "pyproject.toml"), str(tmp_path / "uv.lock")]
+    assert tool.requires_confirmation(session) is False
+    assert "pyproject.toml, " in tool.preview()
+    assert "<file_count>2</file_count>" in result
+    assert "<path>pyproject.toml</path>" in result
+    assert "<path>uv.lock</path>" in result
+    assert _hashline(0, "[project]\n") in result
+    assert _hashline(0, "version = 1\n") in result
 
 
 def test_read_tool_reads_colon_and_comma_range_tokens(tmp_path):
@@ -66,10 +89,10 @@ def test_read_tool_reads_colon_and_comma_range_tokens(tmp_path):
     assert "1:2, 3:5" in tool.preview()
     assert "<range>1:2</range>" in result
     assert "<range>3:5</range>" in result
-    assert "one\n" in result
-    assert "three\nfour\n" in result
-    assert "zero\n" not in result
-    assert "two\n" not in result
+    assert _hashline(1, "one\n") in result
+    assert _hashline(3, "three\n") + _hashline(4, "four\n") in result
+    assert "|zero" not in result
+    assert "|two" not in result
 
 
 def test_read_tool_reads_to_eof_when_end_is_zero(tmp_path):
@@ -77,10 +100,10 @@ def test_read_tool_reads_to_eof_when_end_is_zero(tmp_path):
     path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    result = ReadTool.make(session, ["sample.txt", "1", "0"]).call()
+    result = ReadTool.make(session, ["sample.txt", "1,0"]).call()
 
-    assert "beta\ngamma\n" in result
-    assert "alpha" not in result
+    assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result
+    assert "|alpha" not in result
 
 
 def test_read_tool_allows_omitted_range_for_full_file_read(tmp_path):
@@ -94,43 +117,21 @@ def test_read_tool_allows_omitted_range_for_full_file_read(tmp_path):
     assert tool.start == 0
     assert tool.end == 0
     assert "<range>0:0</range>" in result
-    assert "alpha\nbeta\n" in result
-
-
-def test_read_tool_accepts_multiple_existing_file_args_for_compatibility(tmp_path):
-    for name, content in {
-        "one.txt": "one\n",
-        "two.txt": "two\n",
-        "three.txt": "three\n",
-    }.items():
-        (tmp_path / name).write_text(content, encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-
-    tool = ReadTool.make(session, ["one.txt", "two.txt", "three.txt"])
-    result = tool.call()
-
-    assert tool.filepaths == [str(tmp_path / "one.txt"), str(tmp_path / "two.txt"), str(tmp_path / "three.txt")]
-    assert "<file_count>3</file_count>" in result
-    assert "<path>" + str(tmp_path / "one.txt") + "</path>" in result
-    assert "<path>" + str(tmp_path / "two.txt") + "</path>" in result
-    assert "<path>" + str(tmp_path / "three.txt") + "</path>" in result
-    assert "one\n" in result
-    assert "two\n" in result
-    assert "three\n" in result
+    assert _hashline(0, "alpha\n") + _hashline(1, "beta\n") in result
 
 
-def test_read_tool_keeps_start_end_args_preferred_over_existing_numeric_filenames(tmp_path):
+def test_read_tool_reads_range_token_when_numeric_filenames_exist(tmp_path):
     (tmp_path / "sample.txt").write_text("zero\none\ntwo\nthree\n", encoding="utf-8")
     (tmp_path / "1").write_text("numeric filename one\n", encoding="utf-8")
     (tmp_path / "3").write_text("numeric filename three\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    tool = ReadTool.make(session, ["sample.txt", "1", "3"])
+    tool = ReadTool.make(session, ["sample.txt", "1,3"])
     result = tool.call()
 
-    assert tool.filepaths == []
+    assert tool.ranges == [(1, 3)]
     assert "<range>1:3</range>" in result
-    assert "one\ntwo\n" in result
+    assert _hashline(1, "one\n") + _hashline(2, "two\n") in result
     assert "numeric filename" not in result
 
 
@@ -145,9 +146,9 @@ def test_read_tool_truncates_full_file_reads_after_600_lines(tmp_path):
     assert "<truncated>true</truncated>" in result
     assert "<total_lines>605</total_lines>" in result
     assert "Read returned 600 lines from 0:600 of 605 total lines" in result
-    assert "Use Search to locate relevant text or Read smaller ranges in batches." in result
-    assert "line-0599\n" in result
-    assert "line-0600\n" not in result
+    assert "Recall with a line range, or Read smaller targeted ranges" in result
+    assert _hashline(599, "line-0599\n") in result
+    assert "|line-0600" not in result
 
 
 def test_read_tool_truncates_large_bounded_ranges_after_600_lines(tmp_path):
@@ -155,14 +156,15 @@ def test_read_tool_truncates_large_bounded_ranges_after_600_lines(tmp_path):
     path.write_text("".join(f"line-{index:04d}\n" for index in range(700)), encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    result = ReadTool.make(session, ["sample.txt", "10", "650"]).call()
+    result = ReadTool.make(session, ["sample.txt", "10,650"]).call()
 
     assert "<range>10:610</range>" in result
     assert "<truncated>true</truncated>" in result
     assert "<total_lines>700</total_lines>" in result
     assert "Read returned 600 lines from 10:610 of 700 total lines" in result
-    assert "line-0609\n" in result
-    assert "line-0610\n" not in result
+    assert "Recall with a line range, or Read smaller targeted ranges" in result
+    assert _hashline(609, "line-0609\n") in result
+    assert "|line-0610" not in result
 
 
 def test_read_tool_bounded_read_stops_at_end(tmp_path, monkeypatch):
@@ -193,9 +195,9 @@ def tracking_open(*args, **kwargs):
 
     monkeypatch.setattr(nanocode, "open", tracking_open, raising=False)
 
-    result = ReadTool.make(session, ["sample.txt", "1", "3"]).call()
+    result = ReadTool.make(session, ["sample.txt", "1,3"]).call()
 
-    assert "one\ntwo\n" in result
+    assert _hashline(1, "one\n") + _hashline(2, "two\n") in result
     assert "three" not in result
     assert lines_read == ["zero\n", "one\n", "two\n"]
 
@@ -205,10 +207,10 @@ def test_read_tool_clamps_out_of_bounds_range(tmp_path):
     path.write_text("alpha\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    result = ReadTool.make(session, ["sample.txt", "10", "20"]).call()
+    result = ReadTool.make(session, ["sample.txt", "10,20"]).call()
 
     assert "alpha" not in result
-    assert "  <content no-indention>\n\n  </content>" in result
+    assert "  <content hashline-numbered>\n\n  </content>" in result
 
 
 def test_read_tool_rejects_non_integer_range(tmp_path):
@@ -216,8 +218,8 @@ def test_read_tool_rejects_non_integer_range(tmp_path):
     path.write_text("alpha\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
 
-    with pytest.raises(ToolCallError, match="invalid start"):
-        ReadTool.make(session, ["sample.txt", "bad", "1"])
+    with pytest.raises(ToolCallError, match="invalid range"):
+        ReadTool.make(session, ["sample.txt", "bad,1"])
 
 
 def test_read_tool_rejects_partial_range(tmp_path):
diff --git a/tests/test_nanocode_replace_range_tool.py b/tests/test_nanocode_replace_range_tool.py
deleted file mode 100644
index 5fbc02c..0000000
--- a/tests/test_nanocode_replace_range_tool.py
+++ /dev/null
@@ -1,390 +0,0 @@
-import pytest
-
-from nanocode import Agent, RangeFingerprintStore, ReadTool, ReplaceRangeTool, Session, ToolCallError
-
-
-def _fingerprint(read_result: str) -> str:
-    return read_result.split("<fingerprint>", 1)[1].split("</fingerprint>", 1)[0]
-
-
-def _replace_args(filepath: str, start: int, end: int, fingerprint: str, before: str, after: str, content: str) -> list[str]:
-    return [filepath, str(start), str(end), fingerprint, before, after, content]
-
-
-def test_replace_range_tool_replaces_range_when_fingerprint_matches(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA\n"))
-    display = tool.preview()
-    result = tool.call()
-
-    assert ReplaceRangeTool.name() == "ReplaceRange"
-    assert tool.requires_confirmation(session) is True
-    assert display.startswith("--- ")
-    assert "-beta\n" in display
-    assert "+BETA\n" in display
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\n"
-    assert result == "\n".join(
-        [
-            "<ReplaceRangeToolResult>",
-            "* path: sample.txt",
-            "* range: 1:2",
-            f"* fingerprint: {fingerprint}",
-            "</ReplaceRangeToolResult>",
-        ]
-    )
-
-
-def test_replace_range_tool_rejects_before_context_mismatch(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "wrong\n", "gamma\n", "BETA\n"))
-
-    assert "# preview unavailable: before_context mismatch" in tool.preview()
-    with pytest.raises(ToolCallError, match="before_context mismatch"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "alpha\nbeta\ngamma\n"
-
-
-def test_replace_range_tool_rejects_after_context_mismatch(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "wrong\n", "BETA\n"))
-
-    assert "# preview unavailable: after_context mismatch" in tool.preview()
-    with pytest.raises(ToolCallError, match="after_context mismatch"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "alpha\nbeta\ngamma\n"
-
-
-def test_replace_range_tool_rejects_content_that_repeats_boundary_context(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    before_tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "alpha\nBETA\n"))
-    after_tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA\ngamma\n"))
-
-    assert "# preview unavailable: content includes before_context" in before_tool.preview()
-    assert "# preview unavailable: content includes after_context" in after_tool.preview()
-
-
-def test_replace_range_tool_creates_missing_file_with_empty_zero_range(tmp_path):
-    path = tmp_path / "created.txt"
-    session = Session(cwd=str(tmp_path))
-
-    tool = ReplaceRangeTool.make(session, _replace_args("created.txt", 0, 0, "", "", "", "alpha\n"))
-    display = tool.preview()
-    result = tool.call()
-
-    assert "+alpha\n" in display
-    assert path.read_text(encoding="utf-8") == "alpha\n"
-    assert result == "\n".join(
-        [
-            "<ReplaceRangeToolResult>",
-            "* path: created.txt",
-            "* range: 0:0",
-            f"* fingerprint: {RangeFingerprintStore().remember(filepath=str(path), start=0, end=0, content='')}",
-            "* created: true",
-            "</ReplaceRangeToolResult>",
-        ]
-    )
-
-
-def test_replace_range_tool_warns_for_broad_preview_ranges(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("".join("line " + str(index) + "\n" for index in range(25)), encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "0,25"]).call())
-
-    display = ReplaceRangeTool.make(session, _replace_args("sample.txt", 0, 25, fingerprint, "", "", "replacement\n")).preview()
-
-    assert display.startswith("# warning: broad range replacement; prefer smaller semantic ranges\n--- ")
-
-
-def test_replace_range_tool_accepts_public_batch_ranges(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\ndelta\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    beta_fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-    delta_fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "3", "4"]).call())
-
-    result = ReplaceRangeTool.make(
-        session,
-        [
-            "sample.txt",
-            [
-                ["1", "2", beta_fingerprint, "alpha\n", "gamma\n", "BETA\n"],
-                ["3", "4", delta_fingerprint, "gamma\n", "", "DELTA\n"],
-            ],
-        ],
-    ).call()
-
-    assert "* replacements: 2" in result
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\nDELTA\n"
-
-
-def test_agent_executes_replace_range_batch_args(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\ndelta\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    beta_fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-    delta_fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "3", "4"]).call())
-    agent = Agent(session)
-
-    latest = agent.execute_tool_calls(
-        [
-            {
-                "name": "ReplaceRange",
-                "intention": "replace two ranges",
-                "args": [
-                    "sample.txt",
-                    [
-                        ["1", "2", beta_fingerprint, "alpha\n", "gamma\n", "BETA\n"],
-                        ["3", "4", delta_fingerprint, "gamma\n", "", "DELTA\n"],
-                    ],
-                ],
-            },
-        ],
-        confirm=lambda call, tool: True,
-    )
-
-    assert "* replacements: 2" in latest
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\nDELTA\n"
-
-
-def test_agent_merges_consecutive_same_file_replace_range_calls(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\ndelta\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    beta_fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-    delta_fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "3", "4"]).call())
-    agent = Agent(session)
-    confirmations = []
-
-    latest = agent.execute_tool_calls(
-        [
-            {"name": "ReplaceRange", "intention": "replace beta", "args": _replace_args("sample.txt", 1, 2, beta_fingerprint, "alpha\n", "gamma\n", "BETA\n")},
-            {"name": "ReplaceRange", "intention": "replace delta", "args": _replace_args("sample.txt", 3, 4, delta_fingerprint, "gamma\n", "", "DELTA\n")},
-        ],
-        confirm=lambda call, tool: confirmations.append(call.executed) or True,
-    )
-
-    assert len(agent.tool_runner.latest_executions) == 1
-    assert confirmations[0].startswith('ReplaceRange("sample.txt", "1", "2"')
-    assert "replace beta; replace delta" in session.state.tool_result_store["tr.1"].description
-    assert "* replacements: 2" in latest
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\nDELTA\n"
-
-
-def test_replace_range_tool_adds_line_break_before_following_content(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA")).call()
-
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\n"
-
-
-def test_replace_range_tool_relocates_cached_fingerprint_after_line_shift(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "2", "3"]).call())
-    path.write_text("zero\nalpha\nbeta\ngamma\n", encoding="utf-8")
-
-    result = ReplaceRangeTool.make(session, _replace_args("sample.txt", 2, 3, fingerprint, "beta\n", "", "GAMMA\n")).call()
-
-    assert path.read_text(encoding="utf-8") == "zero\nalpha\nbeta\nGAMMA\n"
-    assert "* range: 3:4" in result
-    assert "* relocated_from: 2:3" in result
-
-
-def test_replace_range_tool_rejects_ambiguous_cached_relocation(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-    path.write_text("zero\nalpha\nbeta\nbeta\ngamma\n", encoding="utf-8")
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA\n"))
-
-    with pytest.raises(ToolCallError, match="cached range matched multiple locations"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "zero\nalpha\nbeta\nbeta\ngamma\n"
-
-
-def test_replace_range_tool_accepts_full_file_fingerprint_for_partial_range(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt"]).call())
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA\n"))
-    display = tool.preview()
-    result = tool.call()
-
-    assert display.startswith("--- ")
-    assert "# preview unavailable" not in display
-    assert "-beta\n" in display
-    assert "+BETA\n" in display
-    assert "* range: 1:2" in result
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\n"
-
-
-def test_replace_range_tool_reports_fingerprint_cached_range(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "0", "3"]).call())
-    path.write_text("alpha\nBETA\ngamma\n", encoding="utf-8")
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA\n"))
-
-    display = tool.preview()
-    assert "this fingerprint was cached for range(s): 0:3" in display
-    with pytest.raises(ToolCallError, match=r"cached for range\(s\): 0:3"):
-        tool.call()
-
-
-def test_replace_range_tool_rejects_fingerprint_mismatch(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, "bad", "alpha\n", "", "BETA\n"))
-
-    display = tool.preview()
-
-    assert display.startswith("ReplaceRange(")
-    assert "# preview unavailable: fingerprint mismatch" in display
-    assert "current " in display
-    assert "call Read(filepath, 1, 2)" in display
-    with pytest.raises(ToolCallError, match=r"call Read\(filepath, 1, 2\)"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "alpha\nbeta\n"
-
-
-def test_replace_range_cache_is_bounded(tmp_path):
-    session = Session(cwd=str(tmp_path))
-    store = session.state.range_fingerprints
-
-    for index in range(RangeFingerprintStore.MAX_ENTRIES + 5):
-        store.remember(filepath=str(tmp_path / "sample.txt"), start=index, end=index + 1, content="line " + str(index))
-
-    assert len(store) == RangeFingerprintStore.MAX_ENTRIES
-
-
-def test_replace_range_cache_survives_goal_rewording(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    Agent(session).apply_response({"actions": [{"type": "goal", "text": "new goal"}]})
-
-    ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "gamma\n", "BETA\n")).call()
-
-    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\n"
-
-
-def test_replace_range_cache_survives_cancel_until_next_run(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-    agent = Agent(session)
-
-    agent.cancel_current_goal()
-
-    assert len(session.state.range_fingerprints) == 1
-
-    class FakeModelClient:
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
-            return {"actions": [{"type": "chat", "text": "done"}]}
-
-    agent.model_client = FakeModelClient()
-    agent.run("next task")
-
-    assert len(session.state.range_fingerprints) == 0
-
-
-def test_replace_range_cache_clears_when_new_main_run_starts(tmp_path):
-    class FakeModelClient:
-        def request(self, system_prompt, user_prompt, *, activity="agent"):
-            return {"actions": [{"type": "chat", "text": "done"}]}
-
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-    agent = Agent(session)
-    agent.model_client = FakeModelClient()
-
-    agent.run("new task")
-
-    assert len(session.state.range_fingerprints) == 0
-
-
-def test_replace_range_tool_replaces_to_eof_when_end_is_zero(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "0"]).call())
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 0, fingerprint, "alpha\n", "", "tail\n"))
-    result = tool.call()
-
-    assert path.read_text(encoding="utf-8") == "alpha\ntail\n"
-    assert "* range: 1:3" in result
-
-
-def test_replace_range_tool_inserts_when_start_equals_end(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "1"]).call())
-
-    ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 1, fingerprint, "alpha\n", "gamma\n", "beta\n")).call()
-
-    assert path.read_text(encoding="utf-8") == "alpha\nbeta\ngamma\n"
-
-
-def test_replace_range_tool_rejects_wide_fingerprint_for_empty_insert_range(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt"]).call())
-    path.write_text("zero\nalpha\nbeta\ngamma\n", encoding="utf-8")
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 1, fingerprint, "alpha\n", "beta\n", "INSERT\n"))
-
-    assert "# preview unavailable: fingerprint mismatch" in tool.preview()
-    with pytest.raises(ToolCallError, match=r"call Read\(filepath, 1, 1\)"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "zero\nalpha\nbeta\ngamma\n"
-
-
-def test_replace_range_tool_rejects_no_change(tmp_path):
-    path = tmp_path / "sample.txt"
-    path.write_text("alpha\nbeta\n", encoding="utf-8")
-    session = Session(cwd=str(tmp_path))
-    fingerprint = _fingerprint(ReadTool.make(session, ["sample.txt", "1", "2"]).call())
-
-    tool = ReplaceRangeTool.make(session, _replace_args("sample.txt", 1, 2, fingerprint, "alpha\n", "", "beta\n"))
-
-    with pytest.raises(ToolCallError, match="range replacement produced no changes"):
-        tool.call()
-    assert path.read_text(encoding="utf-8") == "alpha\nbeta\n"
diff --git a/tests/test_nanocode_search_tool.py b/tests/test_nanocode_search_tool.py
index a7b43ac..f2ceb7e 100644
--- a/tests/test_nanocode_search_tool.py
+++ b/tests/test_nanocode_search_tool.py
@@ -1,7 +1,9 @@
+import re
+
 import nanocode
 import pytest
 
-from nanocode import SearchTool, Session, ToolCallError
+from nanocode import EditTool, SearchTool, Session, ToolCallError
 
 
 def test_search_tool_python_backend_finds_or_patterns_and_applies_glob(tmp_path, monkeypatch):
@@ -40,7 +42,7 @@ def test_search_tool_rejects_many_plain_args_without_explicit_path(tmp_path):
     session = Session(cwd=str(tmp_path))
 
     with pytest.raises(ToolCallError, match="requires 1 to 4 args"):
-        SearchTool.make(session, ["class Edit", "class Bash", "class Search", "class Read", "class ReplaceRange"])
+        SearchTool.make(session, ["class Edit", "class Bash", "class Search", "class Read", "class CreateFile"])
 
 
 def test_search_tool_treats_second_plain_arg_as_path(tmp_path):
@@ -175,10 +177,24 @@ def test_search_tool_uses_python_when_rg_is_missing(tmp_path, monkeypatch):
 
     assert "* engine: python" in result
     assert "* sample.txt:1: needle" in result
-    assert "  > 1: needle" in result
+    assert "  > 0:" in result and "|needle" in result
+
+
+def test_search_tool_context_anchor_can_drive_edit_file(tmp_path, monkeypatch):
+    path = tmp_path / "sample.txt"
+    path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+    monkeypatch.setattr(nanocode.shutil, "which", lambda name: "")
+
+    result = SearchTool.make(session, ["beta", "sample.txt", "context=0"]).call()
+    anchor = re.search(r">\s+(\d+:[0-9a-f]{6})\|beta", result).group(1)
+
+    EditTool.make(session, ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "BETA\n"}]]).call()
 
+    assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\n"
 
-def test_search_tool_python_backend_includes_four_context_lines(tmp_path, monkeypatch):
+
+def test_search_tool_python_backend_includes_default_context_lines(tmp_path, monkeypatch):
     path = tmp_path / "sample.txt"
     path.write_text("one\ntwo\nthree\nneedle\nfive\nsix\nseven\neight\nnine\n", encoding="utf-8")
     session = Session(cwd=str(tmp_path))
@@ -187,15 +203,13 @@ def test_search_tool_python_backend_includes_four_context_lines(tmp_path, monkey
     result = SearchTool.make(session, ["needle", "sample.txt"]).call()
 
     assert "* sample.txt:4: needle" in result
-    assert "    1: one" in result
-    assert "    2: two" in result
-    assert "    3: three" in result
-    assert "  > 4: needle" in result
-    assert "    5: five" in result
-    assert "    6: six" in result
-    assert "    7: seven" in result
-    assert "    8: eight" in result
-    assert "    9: nine" not in result
+    assert "  > 3:" in result and "|needle" in result
+    assert "|three" not in result
+    assert "|five" not in result
+    assert "|one" not in result
+    assert "|two" not in result
+    assert "|six" not in result
+    assert "|nine" not in result
 
 
 def test_search_tool_python_backend_supports_regex(tmp_path, monkeypatch):
@@ -222,13 +236,28 @@ def test_search_tool_supports_context_option_without_glob(tmp_path, monkeypatch)
 
     result = SearchTool.make(session, ["needle", "sample.txt", "context=3"]).call()
 
-    assert "    1: one" in result
-    assert "    2: two" in result
-    assert "    3: three" in result
-    assert "  > 4: needle" in result
-    assert "    5: five" in result
-    assert "    6: six" in result
-    assert "    7: seven" in result
+    assert "    0:" in result and "|one" in result
+    assert "    1:" in result and "|two" in result
+    assert "    2:" in result and "|three" in result
+    assert "  > 3:" in result and "|needle" in result
+    assert "    4:" in result and "|five" in result
+    assert "    5:" in result and "|six" in result
+    assert "    6:" in result and "|seven" in result
+
+
+def test_search_tool_omits_context_before_outer_excerpt(tmp_path, monkeypatch):
+    path = tmp_path / "sample.txt"
+    path.write_text(("before " + "x" * 300 + "\nneedle\n") * 4, encoding="utf-8")
+    session = Session(cwd=str(tmp_path))
+    monkeypatch.setattr(nanocode.shutil, "which", lambda name: "")
+    monkeypatch.setattr(SearchTool, "OUTPUT_CHARS", 700)
+
+    result = SearchTool.make(session, ["needle", "sample.txt", "context=1"]).call()
+
+    assert "* context_omitted:" in result
+    assert "* sample.txt:2: needle" in result
+    assert "|before " not in result
+    assert "[tool result excerpt]" not in result
 
 
 def test_search_tool_accepts_context_30(tmp_path):
@@ -248,11 +277,11 @@ def test_search_tool_supports_numeric_context_option_with_glob(tmp_path, monkeyp
     result = SearchTool.make(session, ["needle", ".", "*.txt", "2"]).call()
 
     assert "* keep.txt:3: needle" in result
-    assert "    1: zero" in result
-    assert "    2: one" in result
-    assert "  > 3: needle" in result
-    assert "    4: three" in result
-    assert "    5: four" in result
+    assert "    0:" in result and "|zero" in result
+    assert "    1:" in result and "|one" in result
+    assert "  > 2:" in result and "|needle" in result
+    assert "    3:" in result and "|three" in result
+    assert "    4:" in result and "|four" in result
     assert "skip.py" not in result
 
 
@@ -265,7 +294,7 @@ def test_search_tool_supports_glob_and_context_option(tmp_path, monkeypatch):
     result = SearchTool.make(session, ["needle", ".", "*.txt", "context=1"]).call()
 
     assert "* keep.txt:2: needle" in result
-    assert "  > 2: needle" in result
+    assert "  > 1:" in result and "|needle" in result
     assert "skip.py" not in result
 
 
@@ -363,8 +392,8 @@ def test_search_tool_supports_multiline_regex(tmp_path, monkeypatch):
     assert tool.pattern == "@dataclass.*\nclass.*State"
     assert "* engine: python-multiline" in result
     assert "* sample.py:1: @dataclass class State" in result
-    assert "  > 1: @dataclass" in result
-    assert "    2: class State:" in result
+    assert "  > 0:" in result and "|@dataclass" in result
+    assert "    1:" in result and "|class State:" in result
 
 
 def test_search_tool_rejects_invalid_context(tmp_path):