From 8515aed3d9bbe7420bc4555a8a549535728df40f Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 00:08:47 -0700 Subject: [PATCH 01/29] fix: rename EditFile to Edit in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b307d88..870a491 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ nanocode currently targets macOS and Linux. Windows is not supported. - File: `Read`, `LineCount`, `List`, `Search`. - Code navigation: `InspectCode` after `/index` builds the project index. -- Edit: `CreateFile`, `EditFile`. +- Edit: `CreateFile`, `Edit`. - Shell: `Bash`, `Git`. - Memory: `Recall` reads stored tool results by key. From c41ef528107d5d2dbe2ed478aa34b88857f1066b Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 01:16:43 -0700 Subject: [PATCH 02/29] Add dynamic file context for Read results --- nanocode.py | 231 +++++++++++++++++++++++++++++++++-- tests/test_nanocode_agent.py | 160 ++++++++++++++++++++++-- 2 files changed, 369 insertions(+), 22 deletions(-) diff --git a/nanocode.py b/nanocode.py index 3cc885c..618040b 100644 --- a/nanocode.py +++ b/nanocode.py @@ -20,6 +20,7 @@ import platform import re import selectors +import shlex import shutil import signal import subprocess @@ -1465,6 +1466,175 @@ def compact_block(cls, block: str) -> str: parts.append(_shorten(" ".join(output.split()), cls.COMPACT_OUTPUT_SUMMARY_CHARS)) return header + "\n out: " + ("; ".join(parts) if parts else "ok") + @classmethod + def render_blocks_for_prompt(cls, blocks: list[str]) -> list[str]: + return [cls.render_block_for_prompt(block) for block in blocks] + + @classmethod + def render_block_for_prompt(cls, block: str) -> str: + if not cls._is_read_result_block(block): + return block + compact = cls.compact_block(block) + if "\n out: " in compact: + content = "file_context" if cls._read_block_file_lines(block) else "recall" + return compact + "; content=" + content + return compact + + @classmethod + def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: + files: dict[str, dict[int, tuple[str, str]]] = {} + entries = sorted(cls._file_context_entries(blocks), key=lambda item: item[0]) + for _order, source, path, number, line in entries: + if source: + files.setdefault(path, {})[number] = (source, line) + if not files: + return "" + + lines = [ + "Source Policy:", + "- Built dynamically for this prompt from active raw Read results.", + "- Overlapping lines use the newest active Read result.", + "", + ] + for path in sorted(files): + segments = cls._file_context_segments(files[path]) + if not segments: + continue + lines.extend(["File: " + path, "Ranges:"]) + for start, end, source, _segment_lines in segments: + lines.append("- " + str(start) + ":" + str(end) + " source=" + source) + lines.append("Content:") + for start, end, source, segment_lines in segments: + lines.append("@@ " + str(start) + ":" + str(end) + " source=" + source) + lines.extend(segment_lines) + lines.append("") + + rendered = "\n".join(lines).rstrip() + return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered + + @classmethod + def _file_context_entries(cls, blocks: list[str]) -> list[tuple[int, str, str, int, str]]: + entries: list[tuple[int, str, str, int, str]] = [] + for block in blocks: + source = cls.result_key(block) + if source and cls._is_read_result_block(block): + entries.extend((cls.result_counter(block), source, path, number, line) for path, number, line in cls._read_block_file_lines(block)) + continue + entries.extend(cls._recall_block_file_entries(block)) + return entries + + @staticmethod + def _file_context_segments(file_lines: dict[int, tuple[str, str]]) -> list[tuple[int, int, str, list[str]]]: + items = sorted(file_lines.items()) + if not items: + return [] + segments: list[tuple[int, int, str, list[str]]] = [] + start = previous = items[0][0] + source, first_line = items[0][1] + segment_lines = [first_line] + for number, (line_source, line) in items[1:]: + if number == previous + 1 and line_source == source: + segment_lines.append(line) + previous = number + continue + segments.append((start, previous + 1, source, segment_lines)) + start = previous = number + source = line_source + segment_lines = [line] + segments.append((start, previous + 1, source, segment_lines)) + return segments + + @classmethod + def _read_block_file_lines(cls, block: str) -> list[tuple[str, int, str]]: + if not cls._is_read_result_block(block): + return [] + header, output = block.split("\n output:\n", 1) + default_path = cls._read_block_default_path(header) + return cls._read_output_file_lines(output, default_path=default_path) + + @classmethod + def _read_output_file_lines(cls, output: str, *, default_path: str) -> list[tuple[str, int, str]]: + file_lines: list[tuple[str, int, str]] = [] + for path, section in cls._read_output_file_sections(output, default_path=default_path): + if not path: + continue + for content in cls._read_output_content_sections(section): + for line in content.splitlines(): + match = re.match(r"(\d+):[0-9a-f]{6}\|", line) + if match: + file_lines.append((path, int(match.group(1)), line)) + return file_lines + + @classmethod + def _is_read_result_block(cls, block: str) -> bool: + if not cls.is_full_block(block): + return False + header, output = block.split("\n output:\n", 1) + return bool(re.search(r"\btool=Read\b", header) and "" in output) + + @classmethod + def _recall_block_file_entries(cls, block: str) -> list[tuple[int, str, str, int, str]]: + if not cls.is_full_block(block): + return [] + header, output = block.split("\n output:\n", 1) + if not re.search(r"\btool=Recall\b", header) or "RecallToolResult:" not in output: + return [] + entries: list[tuple[int, str, str, int, str]] = [] + for source, description, content in cls._recall_output_items(output): + default_path = cls._read_description_default_path(description) + order = cls._result_key_counter(source) + entries.extend((order, source, path, number, line) for path, number, line in cls._read_output_file_lines(content, default_path=default_path)) + return entries + + @staticmethod + def _recall_output_items(output: str) -> Iterator[tuple[str, str, str]]: + for match in re.finditer(r"(?ms)^- result_key: (tr\.\d+)\n(.*?)(?=^- result_key: |\Z)", output): + source = match.group(1) + body = match.group(2) + description_match = re.search(r"(?m)^ description: (.*)$", body) + content_match = re.search(r"(?ms)^ \n(.*)^ ", body) + if content_match: + yield source, (description_match.group(1) if description_match else ""), content_match.group(1) + + @staticmethod + def _read_description_default_path(description: str) -> str: + match = re.match(r"(?:success|failure) Read\s+(.+?)(?:\s+-\s+.*)?$", description) + if match is None: + return "" + try: + tokens = shlex.split(match.group(1)) + except ValueError: + return "" + return tokens[0] if tokens else "" + + @staticmethod + def _read_block_default_path(header: str) -> str: + marker = " args=" + start = header.find(marker) + if start < 0: + return "" + try: + args, _end = json.JSONDecoder().raw_decode(header[start + len(marker) :]) + except json.JSONDecodeError: + return "" + return str(args[0]) if isinstance(args, list) and args else "" + + @staticmethod + def _read_output_file_sections(output: str, *, default_path: str) -> Iterator[tuple[str, str]]: + file_matches = list(re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", output)) + if not file_matches: + yield default_path, output + return + for match in file_matches: + section = match.group(1) + path_match = re.search(r"(.*?)", section) + yield (path_match.group(1).strip() if path_match else default_path), section + + @staticmethod + def _read_output_content_sections(text: str) -> Iterator[str]: + for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", text): + yield match.group(1) + @classmethod def bound_block(cls, block: str, *, max_chars: int) -> str: if len(block) <= max_chars: @@ -1486,6 +1656,10 @@ def result_key(cls, block: str) -> str: @classmethod def result_counter(cls, block: str) -> int: key = cls.result_key(block) + return cls._result_key_counter(key) + + @staticmethod + def _result_key_counter(key: str) -> int: return int(key.split(".", 1)[1]) if key else 0 @classmethod @@ -3387,6 +3561,9 @@ def _state_tool_schema(name: str) -> Json: Tool Result Index: {tool_result_index} +File Context: +{file_context} + Kept Tool Results: {kept_tool_results} @@ -3439,6 +3616,9 @@ def _state_tool_schema(name: str) -> Json: --- Tool Context --- +File Context: +{file_context} + Kept Tool Results: {kept_tool_results} @@ -5073,6 +5253,7 @@ def __init__(self, session: Session): self.failed_tool_call_count = 0 self.agent_feedback_errors: list[str] = [] self.observe_feedback_errors: list[str] = [] + self.recalled_context_executions: list[ToolCallExecution] = [] self.task_alignment_required = False self.incomplete_task_context_at_turn_start = False self.stream_stop_requested = False @@ -5089,13 +5270,19 @@ def apply_context_budget(self) -> None: def build_user_prompt(self) -> str: tool_result_index, unreduced_tool_results, latest_tool_results = self._format_act_tool_result_context() + budget = self.context_budget() + file_context = ToolResultContext.format_file_context( + self._act_file_context_blocks(), + max_chars=budget.raw_chars + budget.kept_chars, + ) conversation = self.session.state.conversation return AGENT_USER_PROMPT_TEMPLATE.format( environment=self._format_environment(), conversation_history="\n\n".join(item.format() for item in conversation) if conversation else "(empty)", user_rules=self.session.state.user_rules.format(), - kept_tool_results="\n\n".join(self.tool_context.kept_results) or "(empty)", + kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", tool_result_index=tool_result_index or "(empty)", + file_context=file_context or "(empty)", unreduced_tool_results=unreduced_tool_results or "(empty)", latest_tool_results=latest_tool_results or "(empty)", state_sections=self._format_state_sections(), @@ -5150,14 +5337,21 @@ def _format_environment(self) -> str: def build_observe_prompt(self) -> str: current = self.blackboard - unreduced = "\n\n".join(self._unreferenced_unreduced_blocks()) + unreduced_blocks = self._unreferenced_unreduced_blocks() + budget = self.context_budget() + file_context = ToolResultContext.format_file_context( + self.tool_context.kept_results + unreduced_blocks, + max_chars=budget.raw_chars + budget.kept_chars, + ) + unreduced = "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced_blocks)) return AGENT_OBSERVE_USER_PROMPT_TEMPLATE.format( user_rules=self.session.state.user_rules.format(), goal=current.goal or "(empty)", plan="\n".join(item.format() for item in current.plan) if current.plan else "(empty)", leads="\n".join(item.format() for item in current.leads) if current.leads else "(empty)", known="\n".join(KnownItem.format_item(item) for item in current.known) if current.known else "(empty)", - kept_tool_results="\n\n".join(self.tool_context.kept_results) or "(empty)", + file_context=file_context or "(empty)", + kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", errors="\n".join("- " + error for error in self.observe_feedback_errors) or "(empty)", unreduced_tool_results=unreduced or "(empty)", user_request=self._format_user_request(), @@ -5332,7 +5526,16 @@ def _format_act_tool_result_context(self) -> tuple[str, str, str]: sections.append("Archived Recall Index:\n" + "\n".join(archived)) if timeline: sections.append("Current Task Timeline:\n" + "\n".join(timeline)) - return "\n\n".join(sections), "\n\n".join(unreduced), "\n\n".join(latest) + return ( + "\n\n".join(sections), + "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced)), + "\n\n".join(ToolResultContext.render_blocks_for_prompt(latest)), + ) + + def _act_file_context_blocks(self) -> list[str]: + checkpoint = self.blackboard.memory_checkpoint_tool_result_counter + recalled_blocks = [ToolResultContext.format_execution(execution) for execution in self.recalled_context_executions] + return self.tool_context.kept_results + self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() + recalled_blocks def _prune_tool_result_store(self) -> None: keep = self._protected_tool_result_keys() @@ -5559,12 +5762,19 @@ def execute_tool_calls( append_to_latest: bool = False, ) -> str: self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) - self.tool_context.append_latest( - self.tool_runner.latest_executions, - max_index_items=self.context_budget().index_items, - checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, - append=append_to_latest, - ) + context_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name != ToolResultTool.NAME] + recalled = [execution for execution in self.tool_runner.latest_executions if execution.call.name == ToolResultTool.NAME and execution.outcome == "success"] + if append_to_latest: + self.recalled_context_executions.extend(recalled) + else: + self.recalled_context_executions = recalled + if context_executions: + self.tool_context.append_latest( + context_executions, + max_index_items=self.context_budget().index_items, + checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, + append=append_to_latest, + ) self.session.state.turn_tool_calls += len(self.tool_runner.latest_executions) self.session.state.session_tool_calls += len(self.tool_runner.latest_executions) for execution in self.tool_runner.latest_executions: @@ -6245,6 +6455,7 @@ def run( ) self._prune_tool_result_store() self.mode = AgentMode.ACT + self.recalled_context_executions = [] self.session.state.turn_tool_calls = 0 self.session.state.turn_model_calls = 0 old_goal = self.blackboard.goal diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 5195355..c66e7d9 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -27,6 +27,27 @@ def _blocks_text(blocks): return "\n".join(blocks) +def _prompt_section(prompt: str, title: str, next_title: str) -> str: + remainder = prompt.split(title + ":\n", 1)[1] + markers = ("\n\n" + next_title + ":", "\n\n--- " + next_title + " ---") + indexes = [index for marker in markers for index in [remainder.find(marker)] if index >= 0] + return remainder[: min(indexes)] if indexes else remainder + + +def _stored_read_result(line: str) -> str: + return "\n".join( + [ + "", + ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', + " 0:1", + " ", + "0:aaaaaa|" + line, + " ", + "", + ] + ) + + def _observe_tool_result_context(agent): return "\n\n".join(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) @@ -391,10 +412,13 @@ def test_referenced_unreduced_results_do_not_count_toward_observe_threshold(tmp_ assert agent.mode == nanocode.AgentMode.OBSERVE observe_prompt = agent.build_observe_prompt() + file_context = _prompt_section(observe_prompt, "File Context", "Kept Tool Results") observe_raw = observe_prompt.split("Unreduced Raw Tool Results:\n", 1)[1].split("\n--- Blocking Feedback ---", 1)[0] - assert "one.txt" not in observe_raw - assert "two.txt" in observe_raw - assert "three.txt" in observe_raw + assert "one.txt" not in file_context + assert "two.txt" in file_context + assert "three.txt" in file_context + assert "" not in observe_raw + assert "content=file_context" in observe_raw def test_unsourced_known_does_not_cover_unreduced_result(tmp_path, monkeypatch): @@ -425,12 +449,111 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk assert "key=tr.1" in _blocks_text(agent.tool_context.recent) index, unreduced, latest = agent._format_act_tool_result_context() assert "one.txt" in unreduced - assert "one\n" in unreduced + assert "|one" not in unreduced + assert "content=file_context" in unreduced assert "two.txt" in latest - assert "two\n" in latest + assert "|two" not in latest + assert "content=file_context" in latest assert "recall=tr.1" in index assert "recall=tr.2" in index assert "output:\n" not in index + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") + assert "File: one.txt" in file_context + assert "|one" in file_context + assert "File: two.txt" in file_context + assert "|two" in file_context + + +def test_act_prompt_file_context_replaces_overlapping_read_lines(tmp_path, monkeypatch): + path = tmp_path / "sample.txt" + path.write_text("old0\nold1\nold2\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + _set_context_budget(monkeypatch, agent, raw_chars=10_000) + + agent.execute_tool_calls([{"name": "Read", "intention": "read head", "args": ["sample.txt", "0,2"]}]) + path.write_text("old0\nnew1\nnew2\n", encoding="utf-8") + agent.execute_tool_calls([{"name": "Read", "intention": "read overlap", "args": ["sample.txt", "1,3"]}]) + + prompt = agent.build_user_prompt() + file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") + unreduced = _prompt_section(prompt, "Unreduced Tool Results", "Latest Tool Results") + assert "File: sample.txt" in file_context + assert "0:1 source=tr.1" in file_context + assert "1:3 source=tr.2" in file_context + assert "|old0" in file_context + assert "|new1" in file_context + assert "|new2" in file_context + assert "|old1" not in file_context + assert "" not in latest + assert "" not in unreduced + assert "content=file_context" in latest + assert "content=file_context" in unreduced + + +def test_act_prompt_folds_excerpted_read_result(tmp_path): + path = tmp_path / "sample.txt" + path.write_text("x" * 20_000 + "\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + + agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": ["sample.txt", "0,1"]}]) + + prompt = agent.build_user_prompt() + latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") + assert "" not in latest + assert "excerpt" in latest + assert "recall=tr.1" in latest + assert "content=file_context" in latest or "content=recall" in latest + assert "x" * 100 not in latest + + +def test_recall_read_projects_into_file_context_without_result_context(tmp_path): + session = Session(cwd=str(tmp_path)) + session.state.tool_result_counter = 1 + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem( + description="success Read sample.txt 0,1", + value=_stored_read_result("alpha"), + ) + agent = Agent(session) + + latest = agent.execute_tool_calls([{"name": "Recall", "intention": "recall read", "args": ["tr.1"]}]) + + assert latest == "" + assert agent.tool_context.latest == [] + assert list(session.state.tool_result_store) == ["tr.1"] + assert session.state.tool_result_counter == 1 + prompt = agent.build_user_prompt() + file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + latest_results = _prompt_section(prompt, "Latest Tool Results", "Current Input") + assert "File: sample.txt" in file_context + assert "0:1 source=tr.1" in file_context + assert "|alpha" in file_context + assert "tool=Recall" not in latest_results + assert "RecallToolResult" not in prompt + + +def test_recalled_read_does_not_override_newer_read(tmp_path): + (tmp_path / "sample.txt").write_text("new\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + session.state.tool_result_counter = 1 + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem( + description="success Read sample.txt 0,1", + value=_stored_read_result("old"), + ) + agent = Agent(session) + + agent.execute_tool_calls([{"name": "Read", "intention": "read new", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall old", "args": ["tr.1"]}]) + + assert list(session.state.tool_result_store) == ["tr.1", "tr.2"] + assert session.state.tool_result_counter == 2 + prompt = agent.build_user_prompt() + file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + latest_results = _prompt_section(prompt, "Latest Tool Results", "Current Input") + assert "0:1 source=tr.2" in file_context + assert "|new" in file_context + assert "|old" not in file_context + assert "tool=Recall" not in latest_results def test_empty_observe_compacts_unreduced_tool_results(tmp_path, monkeypatch): @@ -706,7 +829,11 @@ def test_act_prompt_includes_kept_tool_results(tmp_path): prompt = agent.build_user_prompt() assert "Kept Tool Results:" in prompt - assert "alpha unique" in prompt + file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + kept = _prompt_section(prompt, "Kept Tool Results", "Unreduced Tool Results") + assert "alpha unique" in file_context + assert "" not in kept + assert "content=file_context" in kept assert "beta unique" not in prompt assert len(agent.tool_context.kept_results) == 1 @@ -3005,10 +3132,12 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert "log: .nanocode/sessions/" not in messages[0] assert messages[-1] == "done" assert len(fake_client.user_prompts) == 3 - assert "" in fake_client.user_prompts[1] + assert "File Context:" in fake_client.user_prompts[1] + assert "alpha" in fake_client.user_prompts[1] + assert "" not in fake_client.user_prompts[1] assert "alpha" in fake_client.user_prompts[2] assert "Kept Tool Results:" in fake_client.user_prompts[2] - assert "" in fake_client.user_prompts[2] + assert "" not in fake_client.user_prompts[2] assert 'tool=Read args=["sample.txt","0,1"]' in _blocks_text(agent.tool_context.latest) assert agent.tool_context.recent == [] assert agent.blackboard.known == ["Read sample.txt and found alpha."] @@ -3293,10 +3422,15 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert response["actions"][-1]["message_for_complete"] == "done" assert len(agent.model_client.user_prompts) == 4 - assert "" in agent.model_client.user_prompts[1] - assert "" in agent.model_client.user_prompts[2] + assert "File Context:" in agent.model_client.user_prompts[1] + assert "alpha" in agent.model_client.user_prompts[1] + assert "" not in agent.model_client.user_prompts[1] + assert "File Context:" in agent.model_client.user_prompts[2] + assert "alpha" in agent.model_client.user_prompts[2] + assert "" not in agent.model_client.user_prompts[2] assert "Kept Tool Results:" in agent.model_client.user_prompts[3] - assert "" in agent.model_client.user_prompts[3] + assert "alpha" in agent.model_client.user_prompts[3] + assert "" not in agent.model_client.user_prompts[3] assert 'tool=Read args=["sample.txt","0,1"]' in _blocks_text(agent.tool_context.latest) assert agent.tool_context.recent == [] @@ -3375,7 +3509,9 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert response["actions"][-1]["message_for_complete"] == "done too early" assert "done too early" in messages assert len(agent.model_client.user_prompts) == 3 - assert "" in agent.model_client.user_prompts[1] + assert "File Context:" in agent.model_client.user_prompts[1] + assert "alpha" in agent.model_client.user_prompts[1] + assert "" not in agent.model_client.user_prompts[1] assert "" not in agent.model_client.user_prompts[2] From 92e28710592ff1e8d58ad342a07b533bd3066214 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 01:25:55 -0700 Subject: [PATCH 03/29] Unify tool result context management as tools --- nanocode.py | 274 ++++++++++++++++++++++++++++++++--- tests/test_nanocode_agent.py | 60 +++++++- 2 files changed, 307 insertions(+), 27 deletions(-) diff --git a/nanocode.py b/nanocode.py index 618040b..4f8f062 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1310,12 +1310,14 @@ class ToolResultContext: latest: list[str] = field(default_factory=list) recent: list[str] = field(default_factory=list) kept_results: list[str] = field(default_factory=list) + reactivated_keys: set[str] = field(default_factory=set) def forget_results(self, keys: list[str]) -> list[str]: wanted = set(keys) if not wanted: return [] removed = [] + self.reactivated_keys.difference_update(wanted) def update(blocks: list[str], *, compact: bool) -> list[str]: updated = [] @@ -1340,6 +1342,10 @@ def keep_results(self, actions: list[Json], observed_blocks: list[str], *, max_c if _json_str(action.get("type")) == "keep": wanted.extend(key for key in _source_from_json(action) if key.startswith("tr.")) wanted = list(dict.fromkeys(wanted)) + return self.keep_result_keys(wanted, observed_blocks, max_chars=max_chars, max_block_chars=max_block_chars) + + def keep_result_keys(self, keys: list[str], observed_blocks: list[str], *, max_chars: int, max_block_chars: int) -> list[str]: + wanted = list(dict.fromkeys(keys)) if not wanted: return [] by_key = self.blocks_by_key(observed_blocks) @@ -1376,8 +1382,10 @@ def prune_recent(self, *, max_index_items: int, checkpoint: int) -> None: def compact_observed(self, observed_blocks: list[str]) -> None: observed = {self.result_counter(block) for block in observed_blocks} + observed_keys = {self.result_key(block) for block in observed_blocks} if not observed: return + self.reactivated_keys.difference_update(observed_keys) def compact(block: str) -> str: if self.is_full_block(block) and self.result_counter(block) in observed: @@ -1427,9 +1435,9 @@ def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = N def raw_context_chars(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> int: return len("\n\n".join(self.unreduced_recent_blocks(checkpoint, exclude_keys=exclude_keys) + self.latest_raw_blocks(exclude_keys=exclude_keys))) - @classmethod - def _needs_reduction(cls, block: str, checkpoint: int) -> bool: - return cls.is_full_block(block) and cls.result_counter(block) > checkpoint + def _needs_reduction(self, block: str, checkpoint: int) -> bool: + key = self.result_key(block) + return self.is_full_block(block) and (self.result_counter(block) > checkpoint or key in self.reactivated_keys) @classmethod def blocks_by_key(cls, blocks: list[str]) -> dict[str, str]: @@ -1447,6 +1455,20 @@ def format_execution(execution: ToolCallExecution) -> str: lines.extend([" output:", execution.output]) return "\n".join(lines) + def reactivate_result_blocks(self, blocks: list[str], *, max_index_items: int, checkpoint: int, append: bool = False) -> list[str]: + blocks = [block for block in blocks if self.is_full_block(block) and self.result_key(block)] + keys = set(self.blocks_by_key(blocks)) + if not keys: + return [] + self.recent = [block for block in self.recent if self.result_key(block) not in keys] + self.latest = [block for block in self.latest if self.result_key(block) not in keys] + self.reactivated_keys.update(keys) + if self.latest and not append: + self.recent.extend(self.latest) + self.latest = [*self.latest, *blocks] if append else blocks + self.prune_recent(max_index_items=max_index_items, checkpoint=checkpoint) + return [key for key in self.blocks_by_key(blocks) if key in keys] + @staticmethod def is_full_block(block: str) -> bool: return "\n output:\n" in block @@ -1586,6 +1608,37 @@ def _recall_block_file_entries(cls, block: str) -> list[tuple[int, str, str, int entries.extend((order, source, path, number, line) for path, number, line in cls._read_output_file_lines(content, default_path=default_path)) return entries + @classmethod + def recalled_result_blocks(cls, recall_block: str) -> list[str]: + if not cls.is_full_block(recall_block): + return [] + header, output = recall_block.split("\n output:\n", 1) + if not re.search(r"\btool=Recall\b", header) or "RecallToolResult:" not in output: + return [] + return [cls._stored_result_block(source, description, content) for source, description, content in cls._recall_output_items(output)] + + @classmethod + def _stored_result_block(cls, source: str, description: str, content: str) -> str: + status, tool_name, args, intention = cls._stored_result_summary(description) + lines = ["- " + status + " tool=" + tool_name + " args=" + json.dumps(args, ensure_ascii=False, separators=(",", ":")) + " key=" + source] + if intention: + lines.append(" why: " + intention) + lines.extend([" output:", content]) + return "\n".join(lines) + + @staticmethod + def _stored_result_summary(description: str) -> tuple[str, str, list[str], str]: + raw_status, separator, rest = description.partition(" ") + status = "ok" if raw_status == "success" else "fail" if raw_status == "failure" else "ok" + if not separator: + return status, "Recall", [], "" + call_text, intention = (rest.split(" - ", 1) + [""])[:2] if " - " in rest else (rest, "") + try: + tokens = shlex.split(call_text) + except ValueError: + tokens = call_text.split() + return status, (tokens[0] if tokens else "Recall"), tokens[1:], intention + @staticmethod def _recall_output_items(output: str) -> Iterator[tuple[str, str, str]]: for match in re.finditer(r"(?ms)^- result_key: (tr\.\d+)\n(.*?)(?=^- result_key: |\Z)", output): @@ -3352,6 +3405,74 @@ def _content(self, item: ToolResultItem) -> str: return "\n".join(chunks) +def _tool_result_keys_from_args(args: list[JsonValue]) -> list[str]: + keys: list[str] = [] + values: list[JsonValue] = [] + for arg in args: + values.extend(arg if isinstance(arg, list) else [arg]) + for value in values: + key = str(value).strip() + if not re.fullmatch(r"tr\.\d+", key): + raise ToolCallArgError("invalid result key: use tr.N") + keys.append(key) + keys = list(dict.fromkeys(keys)) + if not keys: + raise ToolCallArgError("requires at least one tr.N key") + return keys + + +@dataclass +class ForgetTool(Tool): + NAME: ClassVar[str] = "Forget" + EFFECT: ClassVar[ToolEffect] = ToolEffect.OTHER + DESCRIPTION: ClassVar[tuple[str, ...]] = ( + "Remove visible tool result keys from active context; keys remain recallable.", + "This is the inverse of Recall for active context membership.", + "Does not create a new result key.", + ) + SIGNATURE: ClassVar[str] = "Forget(key[, key...]) -> remove active context entries" + EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["tr.1", "tr.2"]',) + REQUIRES_CONFIRMATION: ClassVar[bool | None] = False + + keys: list[str] + + @classmethod + def make(cls, session: Session, args: list[JsonValue]) -> Self: + return cls(keys=_tool_result_keys_from_args(args)) + + def preview(self) -> str: + return "Forget " + ", ".join(self.keys) + + def call(self) -> str: + return "\n* requested: " + ", ".join(self.keys) + "\n" + + +@dataclass +class KeepTool(Tool): + NAME: ClassVar[str] = "Keep" + EFFECT: ClassVar[ToolEffect] = ToolEffect.OTHER + DESCRIPTION: ClassVar[tuple[str, ...]] = ( + "Keep visible raw tool result keys in active context.", + "Use during observe or when a visible result should survive context reduction.", + "Does not create a new result key.", + ) + SIGNATURE: ClassVar[str] = "Keep(key[, key...]) -> keep active context entries" + EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["tr.1", "tr.2"]',) + REQUIRES_CONFIRMATION: ClassVar[bool | None] = False + + keys: list[str] + + @classmethod + def make(cls, session: Session, args: list[JsonValue]) -> Self: + return cls(keys=_tool_result_keys_from_args(args)) + + def preview(self) -> str: + return "Keep " + ", ".join(self.keys) + + def call(self) -> str: + return "\n* requested: " + ", ".join(self.keys) + "\n" + + ############################ # Tool Registry ############################ @@ -3368,7 +3489,11 @@ def _content(self, item: ToolResultItem) -> str: BashTool.NAME: BashTool, GitTool.NAME: GitTool, ToolResultTool.NAME: ToolResultTool, + ForgetTool.NAME: ForgetTool, + KeepTool.NAME: KeepTool, } +CONTEXT_TOOL_NAMES: frozenset[str] = frozenset({ToolResultTool.NAME, ForgetTool.NAME, KeepTool.NAME}) +CONTEXT_TOOL_CLASSES: tuple[ToolClass, ...] = (ToolResultTool, ForgetTool, KeepTool) def _canonical_tool_name(name: str | None) -> str: @@ -4617,7 +4742,7 @@ def execute( call = ParsedToolCall(name="InvalidToolCall", intention=summary, args=[]) result_key = "" result_excerpted = False - if call.name != ToolResultTool.NAME: + if call.name not in CONTEXT_TOOL_NAMES: result_key = self._store_tool_result(call, outcome, output) item = self.session.state.tool_result_store[result_key] output = item.value @@ -5214,7 +5339,7 @@ class Agent: MAX_AGENT_FEEDBACK_ERROR_LEN: ClassVar[int] = 220 MODEL_TIMEOUT_RETRY_DELAYS: ClassVar[tuple[int, ...]] = (3, 10, 20, 30, 60, 120) ACT_ACTION_TYPES: ClassVar[set[str]] = {"goal", "plan", "lead", "known", "tool", "verify", "user_rule", "forget"} - OBSERVE_ACTION_TYPES: ClassVar[set[str]] = {"keep", "lead", "known", "forget"} + OBSERVE_ACTION_TYPES: ClassVar[set[str]] = {"keep", "lead", "known", "forget", "tool"} COMPLETED_PLAN_STATUSES: ClassVar[set[PlanStatus]] = {PlanStatus.DONE, PlanStatus.BLOCKED} MAX_COMPLETED_GOAL_TOOL_RESULTS: ClassVar[int] = 50 RECENT_EDITS: ClassVar[int] = 20 @@ -5253,7 +5378,9 @@ def __init__(self, session: Session): self.failed_tool_call_count = 0 self.agent_feedback_errors: list[str] = [] self.observe_feedback_errors: list[str] = [] - self.recalled_context_executions: list[ToolCallExecution] = [] + self.latest_context_tool_kept: list[str] = [] + self.latest_context_tool_forgotten: list[str] = [] + self.latest_context_tool_recalled: list[str] = [] self.task_alignment_required = False self.incomplete_task_context_at_turn_start = False self.stream_stop_requested = False @@ -5534,8 +5661,7 @@ def _format_act_tool_result_context(self) -> tuple[str, str, str]: def _act_file_context_blocks(self) -> list[str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter - recalled_blocks = [ToolResultContext.format_execution(execution) for execution in self.recalled_context_executions] - return self.tool_context.kept_results + self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() + recalled_blocks + return self.tool_context.kept_results + self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() def _prune_tool_result_store(self) -> None: keep = self._protected_tool_result_keys() @@ -5630,10 +5756,10 @@ def _step_prompts(self) -> tuple[str, str, str]: def _tool_schemas(self) -> list[Json]: if self.mode == AgentMode.OBSERVE: - action_names = self.OBSERVE_ACTION_TYPES - tool_classes: Iterable[ToolClass] = () + action_names = self.OBSERVE_ACTION_TYPES - {"tool", "keep", "forget"} + tool_classes: Iterable[ToolClass] = CONTEXT_TOOL_CLASSES else: - action_names = self.ACT_ACTION_TYPES - {"tool"} + action_names = self.ACT_ACTION_TYPES - {"tool", "forget"} tool_classes = tuple(TOOL_REGISTRY.values()) if not _code_index_available(self.session): tool_classes = tuple(tool for tool in tool_classes if tool is not InspectCodeTool) @@ -5760,21 +5886,25 @@ def execute_tool_calls( confirm: ConfirmCallback | None = None, on_auto_approve: ToolDisplayCallback | None = None, append_to_latest: bool = False, + context_keep_blocks: list[str] | None = None, ) -> str: self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) - context_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name != ToolResultTool.NAME] - recalled = [execution for execution in self.tool_runner.latest_executions if execution.call.name == ToolResultTool.NAME and execution.outcome == "success"] - if append_to_latest: - self.recalled_context_executions.extend(recalled) - else: - self.recalled_context_executions = recalled - if context_executions: + self.latest_context_tool_kept = [] + self.latest_context_tool_forgotten = [] + self.latest_context_tool_recalled = [] + regular_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name not in CONTEXT_TOOL_NAMES] + if regular_executions: self.tool_context.append_latest( - context_executions, + regular_executions, max_index_items=self.context_budget().index_items, checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, append=append_to_latest, ) + self._apply_context_tool_executions( + self.tool_runner.latest_executions, + append_to_latest=append_to_latest or bool(regular_executions), + keep_source_blocks=context_keep_blocks, + ) self.session.state.turn_tool_calls += len(self.tool_runner.latest_executions) self.session.state.session_tool_calls += len(self.tool_runner.latest_executions) for execution in self.tool_runner.latest_executions: @@ -5783,6 +5913,43 @@ def execute_tool_calls( self.mode = AgentMode.OBSERVE return "\n\n".join(self.tool_context.latest) + def _apply_context_tool_executions( + self, + executions: list[ToolCallExecution], + *, + append_to_latest: bool, + keep_source_blocks: list[str] | None, + ) -> None: + for execution in executions: + if execution.outcome != "success": + continue + if execution.call.name == ToolResultTool.NAME: + blocks = ToolResultContext.recalled_result_blocks(ToolResultContext.format_execution(execution)) + self.latest_context_tool_recalled.extend( + self.tool_context.reactivate_result_blocks( + blocks, + max_index_items=self.context_budget().index_items, + checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, + append=append_to_latest or bool(self.tool_context.latest), + ) + ) + elif execution.call.name == ForgetTool.NAME: + self.latest_context_tool_forgotten.extend(self.tool_context.forget_results(_tool_result_keys_from_args(execution.call.args))) + elif execution.call.name == KeepTool.NAME: + source_blocks = keep_source_blocks if keep_source_blocks is not None else self._visible_raw_tool_result_blocks() + self.latest_context_tool_kept.extend( + self.tool_context.keep_result_keys( + _tool_result_keys_from_args(execution.call.args), + source_blocks, + max_chars=self.context_budget().kept_chars, + max_block_chars=self.context_budget().kept_block_chars, + ) + ) + + def _visible_raw_tool_result_blocks(self) -> list[str]: + checkpoint = self.blackboard.memory_checkpoint_tool_result_counter + return self.tool_context.unreduced_blocks(checkpoint) + self.tool_context.latest_raw_blocks() + self.tool_context.kept_results + def _should_observe_after_tools(self) -> bool: pending = self._unreferenced_unreduced_blocks() if not pending: @@ -5924,6 +6091,12 @@ def _response_actions(self, response: Json) -> list[Json]: @staticmethod def _normalize_action(action: Json) -> Json: action_type = _json_str(action.get("type")) + tool_name = _canonical_tool_name(action_type) + if tool_name in TOOL_REGISTRY and ("args" in action or "intention" in action): + normalized = dict(action) + normalized["type"] = "tool" + normalized["name"] = tool_name + return normalized canonical_action_type = _canonical_protocol_action_type(action_type) if canonical_action_type in PROTOCOL_ACTION_TYPES: if canonical_action_type == action_type: @@ -5931,7 +6104,6 @@ def _normalize_action(action: Json) -> Json: normalized = dict(action) normalized["type"] = canonical_action_type return normalized - tool_name = _canonical_tool_name(action_type) if tool_name not in TOOL_REGISTRY: return action normalized = dict(action) @@ -5939,6 +6111,38 @@ def _normalize_action(action: Json) -> Json: normalized["name"] = tool_name return normalized + def _context_actions_from_tool_calls(self, tool_calls: list[JsonValue]) -> list[Json]: + actions: list[Json] = [] + for value in tool_calls: + try: + call = self.tool_runner.parse_tool_call(value) + except ToolCallArgError: + continue + if call.name == ForgetTool.NAME: + try: + keys = _tool_result_keys_from_args(call.args) + except ToolCallArgError: + continue + actions.append({"type": "forget", "source": keys, "reason": call.intention or "context tool"}) + elif call.name == KeepTool.NAME: + try: + keys = _tool_result_keys_from_args(call.args) + except ToolCallArgError: + continue + actions.append({"type": "keep", "source": keys, "reason": call.intention or "context tool"}) + return actions + + def _non_context_tool_error(self, tool_calls: list[JsonValue]) -> str: + invalid = [] + for value in tool_calls: + try: + call = self.tool_runner.parse_tool_call(value) + except ToolCallArgError: + continue + if call.name not in CONTEXT_TOOL_NAMES: + invalid.append(call.name) + return ", ".join(dict.fromkeys(invalid)) + def _gate_action_types( self, actions: list[Json], @@ -6126,7 +6330,8 @@ def _gate_protocol_actions(self, ctx: ResponseContext, on_message: MessageCallba ) def _gate_tool_actions(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool: - if self._gate_forget_actions(ctx.actions, on_message, self._remember_agent_error) is not None: + context_actions = ctx.actions + self._context_actions_from_tool_calls(ctx.tool_calls) + if self._gate_forget_actions(context_actions, on_message, self._remember_agent_error) is not None: return True repeated_tool_retry_error = self._repeated_tool_retry_error(ctx.tool_calls) if repeated_tool_retry_error: @@ -6259,6 +6464,11 @@ def _run_tool_actions( report = ToolCallDisplayFormatter.latest_report(self.tool_runner.latest_executions) if report: on_message(report) + self._emit_tool_context_update( + [*self.latest_context_tool_recalled, *self.latest_context_tool_kept], + self.latest_context_tool_forgotten, + on_message, + ) if self.session.settings.debug and self.tool_runner.skipped_after_failure_count: on_message(f"Tool Calls Skipped: {self.tool_runner.skipped_after_failure_count} after {self.tool_runner.skipped_after_failure_key} failed") self.compactor.maybe_compact() @@ -6292,13 +6502,26 @@ def _handle_observe_response( ) if gate_result is not None: return gate_result - forget_gate = self._gate_forget_actions(ctx.actions, on_message, self._remember_observe_error) + non_context_tool_error = self._non_context_tool_error(ctx.tool_calls) + if non_context_tool_error: + return self._reject_result( + self._remember_observe_error, + on_message, + self._error("observe only accepts context tools: " + non_context_tool_error + ".", "use Keep, Forget, or Recall while observing."), + "Retrying: observe latest results with context tools only.", + "Protocol_Gate: invalid observe tool(s): " + non_context_tool_error + ".", + ) + context_actions = ctx.actions + self._context_actions_from_tool_calls(ctx.tool_calls) + forget_gate = self._gate_forget_actions(context_actions, on_message, self._remember_observe_error) if forget_gate is not None: return forget_gate observed_blocks = self._unreferenced_unreduced_blocks() observed_counter = ToolResultContext.max_counter(observed_blocks) forgotten_keys = self.apply_response(response) self._emit_state_and_text(ctx, on_message) + if ctx.tool_calls: + self.execute_tool_calls(ctx.tool_calls, context_keep_blocks=observed_blocks) + forgotten_keys.extend(self.latest_context_tool_forgotten) self.mode = AgentMode.ACT kept_keys = self.tool_context.keep_results( ctx.actions, @@ -6306,10 +6529,11 @@ def _handle_observe_response( max_chars=self.context_budget().kept_chars, max_block_chars=self.context_budget().kept_block_chars, ) + kept_keys.extend(self.latest_context_tool_kept) self.tool_context.compact_observed(observed_blocks) self._mark_memory_checkpoint(observed_counter) self.observe_feedback_errors = [] - self._warn_weak_observe_memory(ctx.actions) + self._warn_weak_observe_memory(context_actions) self._emit_tool_context_update(kept_keys, forgotten_keys, on_message) self._promote_required_checks(ctx) return AgentRunResult() @@ -6455,7 +6679,9 @@ def run( ) self._prune_tool_result_store() self.mode = AgentMode.ACT - self.recalled_context_executions = [] + self.latest_context_tool_kept = [] + self.latest_context_tool_forgotten = [] + self.latest_context_tool_recalled = [] self.session.state.turn_tool_calls = 0 self.session.state.turn_model_calls = 0 old_goal = self.blackboard.goal diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index c66e7d9..8869001 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -507,7 +507,7 @@ def test_act_prompt_folds_excerpted_read_result(tmp_path): assert "x" * 100 not in latest -def test_recall_read_projects_into_file_context_without_result_context(tmp_path): +def test_recall_read_reactivates_original_result_for_file_context(tmp_path): session = Session(cwd=str(tmp_path)) session.state.tool_result_counter = 1 session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem( @@ -518,8 +518,10 @@ def test_recall_read_projects_into_file_context_without_result_context(tmp_path) latest = agent.execute_tool_calls([{"name": "Recall", "intention": "recall read", "args": ["tr.1"]}]) - assert latest == "" - assert agent.tool_context.latest == [] + assert "tool=Read" in latest + assert "tool=Recall" not in latest + assert "key=tr.1" in latest + assert agent.tool_context.reactivated_keys == {"tr.1"} assert list(session.state.tool_result_store) == ["tr.1"] assert session.state.tool_result_counter == 1 prompt = agent.build_user_prompt() @@ -556,6 +558,58 @@ def test_recalled_read_does_not_override_newer_read(tmp_path): assert "tool=Recall" not in latest_results +def test_forget_tool_removes_visible_result_without_new_key(tmp_path): + (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + agent = Agent(session) + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + latest = agent.execute_tool_calls([{"name": "Forget", "intention": "drop sample", "args": ["tr.1"]}]) + + assert session.state.tool_result_counter == 1 + assert list(session.state.tool_result_store) == ["tr.1"] + assert "tool=Forget" not in latest + assert "recall=tr.1" in latest + assert "" not in latest + prompt = agent.build_user_prompt() + assert "File Context:\n(empty)" in prompt + + +def test_recall_tool_reactivates_forgotten_result_without_new_key(tmp_path): + (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + agent = Agent(session) + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Forget", "intention": "drop sample", "args": ["tr.1"]}]) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall sample", "args": ["tr.1"]}]) + + assert session.state.tool_result_counter == 1 + assert list(session.state.tool_result_store) == ["tr.1"] + prompt = agent.build_user_prompt() + file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + latest_results = _prompt_section(prompt, "Latest Tool Results", "Current Input") + assert "File: sample.txt" in file_context + assert "|alpha" in file_context + assert "tool=Recall" not in latest_results + + +def test_observe_keep_tool_keeps_result_without_new_key(tmp_path, monkeypatch): + (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + agent = Agent(session) + _set_context_budget(monkeypatch, agent, observe_after_results=1) + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + assert agent.mode == nanocode.AgentMode.OBSERVE + agent.handle_response({"actions": [{"type": "tool", "name": "Keep", "intention": "keep sample", "args": ["tr.1"]}]}) + + assert agent.mode == nanocode.AgentMode.ACT + assert session.state.tool_result_counter == 1 + assert list(session.state.tool_result_store) == ["tr.1"] + assert "key=tr.1" in _blocks_text(agent.tool_context.kept_results) + + def test_empty_observe_compacts_unreduced_tool_results(tmp_path, monkeypatch): (tmp_path / "one.txt").write_text("one\n", encoding="utf-8") (tmp_path / "two.txt").write_text("two\n", encoding="utf-8") From 3f8ee670add696f8859c2b5b22e45016f2e9dc46 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 01:47:08 -0700 Subject: [PATCH 04/29] Use structured args for Read and Search tools --- nanocode.py | 388 +++++++++++++++++++++-------- tests/test_nanocode_agent.py | 184 ++++++++------ tests/test_nanocode_edit_tool.py | 4 +- tests/test_nanocode_read_tool.py | 70 ++++-- tests/test_nanocode_search_tool.py | 105 ++++---- 5 files changed, 490 insertions(+), 261 deletions(-) diff --git a/nanocode.py b/nanocode.py index 4f8f062..ce8bada 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1670,7 +1670,17 @@ def _read_block_default_path(header: str) -> str: args, _end = json.JSONDecoder().raw_decode(header[start + len(marker) :]) except json.JSONDecodeError: return "" - return str(args[0]) if isinstance(args, list) and args else "" + if not isinstance(args, list) or not args: + return "" + payload = _json_dict(args[0]) + if payload: + if "files" in payload: + files = _json_list(payload.get("files")) + if files: + return _json_str(_json_dict(files[0]).get("path")) or "" + return "" + return _json_str(payload.get("path")) or "" + return str(args[0]) @staticmethod def _read_output_file_sections(output: str, *, default_path: str) -> Iterator[tuple[str, str]]: @@ -1858,9 +1868,16 @@ def _parse_line_range_token(value: str) -> tuple[int, int]: return _parse_line_range(match.group(1), match.group(2)) -def _looks_like_read_range_error(value: JsonValue) -> bool: - text = str(value).strip() - return bool(re.fullmatch(r"\d+(?:\s*[-:,]\s*)?", text) or re.search(r"[:,]", text)) +def _parse_structured_line_range(value: JsonValue, *, label: str = "range") -> tuple[int, int]: + raw = _json_list(value) + if len(raw) != 2: + raise ToolCallArgError(label + " must be a [start, end] integer pair") + start, end = raw + if not isinstance(start, int) or isinstance(start, bool): + raise ToolCallArgError(label + " start must be an integer") + if not isinstance(end, int) or isinstance(end, bool): + raise ToolCallArgError(label + " end must be an integer") + return _parse_line_range(str(start), str(end)) @dataclass @@ -1870,18 +1887,18 @@ class ReadTool(Tool): EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY DESCRIPTION: ClassVar[tuple[str, ...]] = ( "Read one or more UTF-8 files with line:hash anchors.", - "Multiple files: pass filepaths only; each file returns first 600 lines.", - "Ranges: pass one filepath then 0-based start,end tokens; each range returns at most 600 lines.", + "Pass one structured object. Use path for one file, or files for multiple files.", + "Each file can omit range for the first 600 lines, pass range=[start,end], or ranges=[[start,end],...].", ) SIGNATURES: ClassVar[tuple[str, ...]] = ( - "Read(filepath) -> first 600 lines with line:hash anchors", - "Read(filepath, filepath...) -> first 600 lines from each file", - "Read(filepath, range[, range...]) -> selected ranges from one file", + "Read({path, range?}) -> selected range or first 600 lines", + "Read({path, ranges}) -> selected ranges from one file", + "Read({files:[{path, range?|ranges?}, ...]}) -> selected ranges from multiple files", ) EXAMPLE: ClassVar[tuple[str, ...]] = ( - 'Example args: ["pyproject.toml", "uv.lock"]', - 'Example args: ["code.py", "0,80", "160,220"]', - 'Example args: ["code.py"]', + 'Example args: [{"path":"code.py","range":[0,80]}]', + 'Example args: [{"path":"code.py","ranges":[[0,80],[160,220]]}]', + 'Example args: [{"files":[{"path":"pyproject.toml"},{"path":"uv.lock","range":[0,120]}]}]', ) filepath: str = "" @@ -1889,81 +1906,214 @@ class ReadTool(Tool): end: int = 0 ranges: list[tuple[int, int]] = field(default_factory=list) filepaths: list[str] = field(default_factory=list) + targets: list[tuple[str, list[tuple[int, int]]]] = field(default_factory=list) cwd: str = "" @classmethod def cli_args(cls, args: list[JsonValue]) -> list[str]: - if not args: - return [] - tokens = [cls.cli_token(args[0])] - return tokens + [str(arg) for arg in args[1:]] + payload = _json_dict(args[0]) if len(args) == 1 else {} + if not payload: + return [cls.cli_token(arg) for arg in args] + raw_files = _json_list(payload.get("files")) if "files" in payload else [payload] + tokens: list[str] = [] + for raw_file in raw_files: + spec = _json_dict(raw_file) + path = _json_str(spec.get("path")) or "" + if not path: + continue + ranges = cls._cli_range_tokens(spec) + if not ranges: + tokens.append(path) + continue + tokens.append(path) + tokens.extend(ranges) + return tokens or [cls.cli_token(args[0])] + + @staticmethod + def _cli_range_tokens(spec: Json) -> list[str]: + if "range" in spec: + raw_ranges = [spec.get("range")] + else: + raw_ranges = _json_list(spec.get("ranges")) if "ranges" in spec else [] + tokens = [] + for raw_range in raw_ranges: + values = _json_list(raw_range) + if len(values) == 2: + tokens.append(str(values[0]) + ":" + str(values[1])) + return tokens + + @classmethod + def tool_schema(cls) -> Json: + range_schema = { + "type": "array", + "items": {"type": "integer", "minimum": 0}, + "minItems": 2, + "maxItems": 2, + "description": "0-based [start, end]. Use end=0 to read to EOF, capped at 600 lines.", + } + file_schema = _tool_object_schema( + { + "path": {"type": "string", "description": "File path to read."}, + "range": range_schema, + "ranges": {"type": "array", "items": range_schema, "description": "Multiple 0-based [start, end] ranges for this file."}, + }, + ["path"], + ) + read_arg_schema = { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Single file path to read."}, + "range": range_schema, + "ranges": {"type": "array", "items": range_schema, "description": "Multiple 0-based [start, end] ranges for the single file."}, + "files": {"type": "array", "items": file_schema, "minItems": 1, "description": "Multiple files to read, each with its own optional range/ranges."}, + }, + "additionalProperties": False, + "description": "Use either path or files.", + } + return _function_tool_schema( + cls.NAME, + cls.schema_description(), + _tool_object_schema( + { + "intention": {"type": "string", "description": "Question being answered or concrete outcome needed."}, + "args": { + "type": "array", + "items": read_arg_schema, + "minItems": 1, + "maxItems": 1, + "description": "Exactly one structured Read request object.", + }, + }, + ["intention", "args"], + ), + ) @classmethod def make(cls, session: Session, args: list[JsonValue]) -> Self: - if len(args) == 0: - raise ToolCallArgError( - 'Read args error: got 0 args; expected ["filepath"] or ["filepath", "start,end"]. Example: Read("nanocode.py", "2065,2095"). Do not call Read().' - ) - filepath = session.resolve_path(str(args[0])) - if len(args) == 1: - ranges = [(0, 0)] - elif all(re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", str(arg)) for arg in args[1:]): - ranges = [_parse_line_range_token(str(arg)) for arg in args[1:]] - elif not any(_looks_like_read_range_error(arg) for arg in args[1:]): - filepaths = [session.resolve_path(str(arg)) for arg in args] - return cls(filepath=filepaths[0], start=0, end=0, ranges=[(0, 0)], filepaths=filepaths, cwd=session.cwd) - elif len(args) == 2: - raise ToolCallArgError( - 'Read args error: invalid range token; expected ["filepath", "start,end"] or ["file1", "file2"]. Example: Read("nanocode.py", "2065,2095").' - ) - else: - raise ToolCallArgError('Read args error: for multiple ranges use comma tokens. Example: Read("nanocode.py", "0,40", "200,260").') + if len(args) != 1 or not isinstance(args[0], dict): + raise ToolCallArgError('Read args error: expected exactly one object, e.g. [{"path":"nanocode.py","range":[2065,2095]}]') + payload = _json_dict(args[0]) + targets = cls._parse_targets(session, payload) + filepath, ranges = targets[0] start, end = ranges[0] - return cls(filepath=filepath, start=start, end=end, ranges=ranges, filepaths=[filepath], cwd=session.cwd) + return cls( + filepath=filepath, + start=start, + end=end, + ranges=ranges, + filepaths=[path for path, _ranges in targets], + targets=targets, + cwd=session.cwd, + ) + + @classmethod + def _parse_targets(cls, session: Session, payload: Json) -> list[tuple[str, list[tuple[int, int]]]]: + if "files" in payload: + unexpected = sorted(set(payload) - {"files"}) + if unexpected: + raise ToolCallArgError("Read args error: files cannot be combined with " + ", ".join(unexpected)) + raw_files = _json_list(payload.get("files")) + if not raw_files: + raise ToolCallArgError("Read args error: files must be a non-empty array") + else: + raw_files = [payload] + targets = [cls._parse_file_spec(session, raw_file, index=index) for index, raw_file in enumerate(raw_files)] + if not targets: + raise ToolCallArgError("Read args error: no files requested") + return targets + + @classmethod + def _parse_file_spec(cls, session: Session, value: JsonValue, *, index: int) -> tuple[str, list[tuple[int, int]]]: + spec = _json_dict(value) + if not spec: + raise ToolCallArgError("Read args error: each file must be an object") + unexpected = sorted(set(spec) - {"path", "range", "ranges"}) + if unexpected: + raise ToolCallArgError("Read args error: unexpected field in file request: " + ", ".join(unexpected)) + path = _json_str(spec.get("path")) + if not path: + raise ToolCallArgError("Read args error: each file request needs a non-empty path") + if "range" in spec and "ranges" in spec: + raise ToolCallArgError("Read args error: use range or ranges, not both") + if "ranges" in spec: + raw_ranges = _json_list(spec.get("ranges")) + if not raw_ranges: + raise ToolCallArgError("Read args error: ranges must be a non-empty array") + ranges = [_parse_structured_line_range(raw_range, label=f"files[{index}].ranges[{range_index}]") for range_index, raw_range in enumerate(raw_ranges)] + elif "range" in spec: + ranges = [_parse_structured_line_range(spec.get("range"), label=f"files[{index}].range")] + else: + ranges = [(0, 0)] + return session.resolve_path(path), ranges + + def _targets(self) -> list[tuple[str, list[tuple[int, int]]]]: + if self.targets: + return self.targets + return [(self.filepath, self.ranges or [(self.start, self.end)])] def requires_confirmation(self, session: Session) -> bool: - return any(not session.is_path_in_cwd(filepath) for filepath in (self.filepaths or [self.filepath])) + return any(not session.is_path_in_cwd(filepath) for filepath, _ranges in self._targets()) def preview(self) -> str: - if len(self.filepaths) > 1: - return "Read(" + ", ".join(self.filepaths) + ")" - if len(self.ranges) > 1: - ranges = ", ".join(str(start) + ":" + str(end) for start, end in self.ranges) - return f"Read({self.filepath}, {ranges})" - return f"Read({self.filepath}, {self.start}, {self.end})" + targets = self._targets() + if len(targets) > 1: + chunks = [] + for filepath, ranges in targets: + range_text = ",".join(str(start) + ":" + str(end) for start, end in ranges) + chunks.append(filepath + (":" + range_text if range_text != "0:0" else "")) + return "Read(" + ", ".join(chunks) + ")" + filepath, ranges = targets[0] + if len(ranges) > 1: + range_text = ", ".join(str(start) + ":" + str(end) for start, end in ranges) + return f"Read({filepath}, {range_text})" + start, end = ranges[0] + return f"Read({filepath}, {start}, {end})" def call(self) -> str: - if len(self.filepaths) > 1: + targets = self._targets() + if len(targets) > 1: lines = [ "", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', - " " + str(len(self.filepaths)) + "", + " " + str(len(targets)) + "", ] - for filepath in self.filepaths: - content, returned_end, range_end, truncated, total_lines = self._read_range(0, 0, filepath=filepath) + for filepath, ranges in targets: lines.extend([" ", " " + os.path.relpath(filepath, self.cwd) + ""]) - lines.extend(self._format_range_result(0, returned_end, range_end, truncated, total_lines, content, indent=" ")) + if len(ranges) > 1: + lines.append(" " + str(len(ranges)) + "") + for start, end in ranges: + if len(ranges) > 1: + lines.append(" ") + indent = " " + else: + indent = " " + content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) + lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=indent)) + if len(ranges) > 1: + lines.append(" ") lines.append(" ") lines.append("") return "\n".join(lines) - if len(self.ranges) > 1: + filepath, ranges = targets[0] + if len(ranges) > 1: lines = [ "", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', - " " + str(len(self.ranges)) + "", + " " + str(len(ranges)) + "", ] - for start, end in self.ranges: - content, returned_end, range_end, truncated, total_lines = self._read_range(start, end) + for start, end in ranges: + content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) lines.append(" ") lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=" ")) lines.append(" ") lines.append("") return "\n".join(lines) - content, returned_end, range_end, truncated, total_lines = self._read_range(self.start, self.end) + start, end = ranges[0] + content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) lines = ["", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.'] - lines.extend(self._format_range_result(self.start, returned_end, range_end, truncated, total_lines, content, indent=" ")) + lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=" ")) lines.append("") return "\n".join(lines) @@ -2138,15 +2288,15 @@ class SearchTool(Tool): DESCRIPTION: ClassVar[tuple[str, ...]] = ( "Case-insensitive regex search across files; use before Read when location is unknown.", "Returns file:line matches and optional line:hash context anchors.", - "Options: path=FILE_OR_DIR, glob=GLOB, context=N. Use at most one glob per call.", + "Pass one structured object with pattern, optional path, optional glob, and optional context.", "Use InspectCode for symbol structure; use Bash rg/grep for custom shell pipelines.", "Escape regex metacharacters for literal text; use A|B for alternatives and \\n for multiline.", ) - SIGNATURES: ClassVar[tuple[str, ...]] = ("Search(pattern[, path=FILE_OR_DIR][, glob=GLOB][, context=N]) -> matching lines",) + SIGNATURES: ClassVar[tuple[str, ...]] = ("Search({pattern, path?, glob?, context?}) -> matching lines",) EXAMPLE: ClassVar[tuple[str, ...]] = ( - 'Example args: ["class .*Tool", "path=nanocode.py"]', - 'Example args: ["TODO|FIXME", "path=.", "glob=*.py", "context=2"]', - 'Literal paren args: ["def __init__\\(", "path=.", "glob=*.py"]', + 'Example args: [{"pattern":"class .*Tool","path":"nanocode.py"}]', + 'Example args: [{"pattern":"TODO|FIXME","path":".","glob":"*.py","context":2}]', + 'Literal paren args: [{"pattern":"def __init__\\\\(","path":".","glob":"*.py"}]', ) @dataclass(frozen=True) @@ -2164,64 +2314,80 @@ class Match: gitignore_patterns: list[str] = field(default_factory=list) @classmethod - def make(cls, session: Session, args: list[str]) -> Self: - args = [str(arg) for arg in args] - path_index = next((index for index, value in enumerate(args[1:], start=1) if value.startswith("path=")), None) - if path_index is not None and path_index > 1: - args = ["|".join(args[:path_index]), *args[path_index:]] - if len(args) < 1 or len(args) > 4: - raise ToolCallArgError("requires 1 to 4 args: pattern[, path=path][, glob=pattern][, context=N]") - if any(str(arg).startswith("ignore_case") or str(arg).startswith("case_sensitive") for arg in args[1:]): - raise ToolCallArgError("Search supports only path=, glob=, and context= options; ignore_case is not supported") - raw_pattern = str(args[0]) + def cli_args(cls, args: list[JsonValue]) -> list[str]: + payload = _json_dict(args[0]) if len(args) == 1 else {} + if not payload: + return [cls.cli_token(arg) for arg in args] + tokens = [cls.cli_token(payload.get("pattern", ""))] + if "path" in payload: + tokens.append("path=" + str(payload.get("path") or ".")) + if "glob" in payload: + tokens.append("glob=" + str(payload.get("glob") or "")) + if "context" in payload: + tokens.append("context=" + str(payload.get("context"))) + return tokens + + @classmethod + def tool_schema(cls) -> Json: + search_arg_schema = _tool_object_schema( + { + "pattern": {"type": "string", "description": "Case-insensitive regex. Use A|B for alternatives and \\n for multiline."}, + "path": {"type": "string", "description": "File or directory to search. Defaults to current working directory."}, + "glob": {"type": "string", "description": "Optional single glob filter such as *.py."}, + "context": { + "type": "integer", + "minimum": 0, + "maximum": cls.MAX_CONTEXT_LINES, + "description": "Context lines around each match; 0 returns only the matching line.", + }, + }, + ["pattern"], + ) + return _function_tool_schema( + cls.NAME, + cls.schema_description(), + _tool_object_schema( + { + "intention": {"type": "string", "description": "Question being answered or concrete outcome needed."}, + "args": { + "type": "array", + "items": search_arg_schema, + "minItems": 1, + "maxItems": 1, + "description": "Exactly one structured Search request object.", + }, + }, + ["intention", "args"], + ), + ) + + @classmethod + def make(cls, session: Session, args: list[JsonValue]) -> Self: + if len(args) != 1 or not isinstance(args[0], dict): + raise ToolCallArgError('Search args error: expected exactly one object, e.g. [{"pattern":"class Foo","path":"."}]') + payload = _json_dict(args[0]) + unexpected = sorted(set(payload) - {"pattern", "path", "glob", "context"}) + if unexpected: + raise ToolCallArgError("unexpected search option: " + ", ".join(unexpected)) + raw_pattern = _json_str(payload.get("pattern")) or "" if not raw_pattern: raise ToolCallArgError("pattern cannot be empty") pattern = raw_pattern[3:] if raw_pattern.startswith("re:") else raw_pattern if not pattern: raise ToolCallArgError("pattern cannot be empty") pattern = pattern.replace("\\n", "\n").replace("\\r", "\r") - target_path_arg = "." - glob_pattern = "" + target_path_arg = _json_str(payload.get("path")) if "path" in payload else "." + target_path_arg = target_path_arg or "." + glob_pattern = _json_str(payload.get("glob")) if "glob" in payload else "" + glob_pattern = glob_pattern or "" + if "glob" in payload and not glob_pattern: + raise ToolCallArgError("glob option cannot be empty") context_lines = cls.CONTEXT_LINES - path_set = False - for raw_option in args[1:]: - option = str(raw_option) - if option.startswith("path="): - if path_set: - raise ToolCallArgError("path option cannot be combined with positional path") - target_path_arg = option.split("=", 1)[1] or "." - path_set = True - continue - if option.startswith("context=") or option.isdigit(): - try: - raw_context = option[len("context=") :] if option.startswith("context=") else option - context_lines = int(raw_context) - if context_lines < 0 or context_lines > cls.MAX_CONTEXT_LINES: - raise ValueError - except ValueError: - raise ToolCallArgError(f"context must be an integer between 0 and {cls.MAX_CONTEXT_LINES}") - continue - if option.startswith("glob=") or option.startswith("glob_pattern="): - if glob_pattern: - raise ToolCallArgError("unexpected search option: " + option) - option = option.split("=", 1)[1] - if not option: - raise ToolCallArgError("glob option cannot be empty") - glob_pattern = option - continue - if not option: - if path_set: - raise ToolCallArgError("unexpected search option: " + option) - target_path_arg = "." - path_set = True - continue - if path_set and not glob_pattern: - glob_pattern = option - continue - if path_set: - raise ToolCallArgError("unexpected search option: " + option) - target_path_arg = option - path_set = True + if "context" in payload: + raw_context = payload.get("context") + if not isinstance(raw_context, int) or isinstance(raw_context, bool) or raw_context < 0 or raw_context > cls.MAX_CONTEXT_LINES: + raise ToolCallArgError(f"context must be an integer between 0 and {cls.MAX_CONTEXT_LINES}") + context_lines = raw_context try: re.compile(pattern) except re.error as error: @@ -4839,7 +5005,9 @@ def parse_tool_call(self, value: JsonValue) -> ParsedToolCall: item = _json_dict(value) name = _json_str(item.get("name")) if not name: - raise ToolCallArgError('tool action missing required field: name. Use {"type":"tool","name":"Read","intention":"...","args":["path"]}.') + raise ToolCallArgError( + 'tool action missing required field: name. Use {"type":"tool","name":"Read","intention":"...","args":[{"path":"path.py"}]}.' + ) name = _canonical_tool_name(name) intention = _json_str(item.get("intention")) or "" return ParsedToolCall(name=name, intention=intention, args=list(_json_list(item.get("args")))) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 8869001..492cd30 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -48,6 +48,26 @@ def _stored_read_result(line: str) -> str: ) +def _read_args(path: str, *, line_range: list[int] | None = None, ranges: list[list[int]] | None = None): + spec: dict[str, object] = {"path": path} + if line_range is not None: + spec["range"] = line_range + if ranges is not None: + spec["ranges"] = ranges + return [spec] + + +def _search_args(pattern: str, *, path: str | None = None, glob: str | None = None, context: int | object | None = None): + spec: dict[str, object] = {"pattern": pattern} + if path is not None: + spec["path"] = path + if glob is not None: + spec["glob"] = glob + if context is not None: + spec["context"] = context + return [spec] + + def _observe_tool_result_context(agent): return "\n\n".join(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) @@ -58,7 +78,7 @@ def _set_context_budget(monkeypatch, agent, **overrides): def _read_anchors(session: Session, filepath: str) -> list[str]: - result = nanocode.ReadTool.make(session, [filepath]).call() + result = nanocode.ReadTool.make(session, _read_args(filepath)).call() return re.findall(r"^(\d+:[0-9a-f]{6})\|", result, re.MULTILINE) @@ -178,13 +198,13 @@ def test_agent_tool_results_go_to_latest_tool_results_and_store(tmp_path): { "name": "Read", "intention": "read sample", - "args": ["sample.txt", "0,1"], + "args": _read_args("sample.txt", line_range=[0, 1]), } ] ) assert "alpha" in latest - assert '- ok tool=Read args=["sample.txt","0,1"] key=tr.1' in latest + assert '- ok tool=Read args=[{"path":"sample.txt","range":[0,1]}] key=tr.1' in latest assert "why: read sample" in latest assert "output:\n" in latest assert session.state.tool_result_store["tr.1"].value.startswith("") @@ -206,8 +226,8 @@ def test_agent_dedupes_same_batch_readonly_tool_calls_keeping_latest(tmp_path): latest = agent.execute_tool_calls( [ - {"name": "Read", "intention": "first read", "args": ["sample.txt", "0,1"]}, - {"name": "Read", "intention": "second read", "args": ["sample.txt", "0,1"]}, + {"name": "Read", "intention": "first read", "args": _read_args("sample.txt", line_range=[0, 1])}, + {"name": "Read", "intention": "second read", "args": _read_args("sample.txt", line_range=[0, 1])}, ] ) @@ -223,14 +243,14 @@ def test_agent_can_append_streamed_tool_calls_to_latest_batch(tmp_path): (tmp_path / "two.txt").write_text("two\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}]) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}], append_to_latest=True) + agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}], append_to_latest=True) latest = _blocks_text(agent.tool_context.latest) assert "one" in latest assert "two" in latest - assert 'tool=Read args=["one.txt","0,1"]' in latest - assert 'tool=Read args=["two.txt","0,1"]' in latest + assert 'tool=Read args=[{"path":"one.txt","range":[0,1]}]' in latest + assert 'tool=Read args=[{"path":"two.txt","range":[0,1]}]' in latest assert agent.tool_context.recent == [] @@ -242,9 +262,9 @@ def test_agent_does_not_dedupe_nonconsecutive_same_batch_readonly_tool_calls(tmp agent.execute_tool_calls( [ - {"name": "Read", "intention": "first read", "args": ["sample.txt", "0,1"]}, - {"name": "Read", "intention": "middle read", "args": ["sample.txt", "1,2"]}, - {"name": "Read", "intention": "second read", "args": ["sample.txt", "0,1"]}, + {"name": "Read", "intention": "first read", "args": _read_args("sample.txt", line_range=[0, 1])}, + {"name": "Read", "intention": "middle read", "args": _read_args("sample.txt", line_range=[1, 2])}, + {"name": "Read", "intention": "second read", "args": _read_args("sample.txt", line_range=[0, 1])}, ] ) @@ -297,7 +317,7 @@ def test_agent_tool_results_are_bounded_and_logged(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - latest = agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": ["sample.txt", "0,1"]}]) + latest = agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) item = session.state.tool_result_store["tr.1"] assert item.excerpted is True @@ -319,7 +339,7 @@ def test_search_tool_result_uses_larger_output_budget(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - agent.execute_tool_calls([{"name": "Search", "intention": "search large result", "args": ["needle", "sample.txt", "context=0"]}]) + agent.execute_tool_calls([{"name": "Search", "intention": "search large result", "args": _search_args("needle", path="sample.txt", context=0)}]) item = session.state.tool_result_store["tr.1"] assert item.excerpted is False @@ -334,7 +354,7 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path, monkeypat _set_context_budget(monkeypatch, agent, index_items=2, observe_after_results=4) for name in ["one.txt", "two.txt", "three.txt", "four.txt"]: - agent.execute_tool_calls([{"name": "Read", "intention": "read " + name, "args": [name, "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read " + name, "args": _read_args(name, line_range=[0, 1])}]) latest = _blocks_text(agent.tool_context.latest) recent = _blocks_text(agent.tool_context.recent) @@ -362,8 +382,8 @@ def test_agent_observes_full_latest_result_when_it_becomes_recent(tmp_path, monk agent = Agent(Session(cwd=str(tmp_path))) _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}]) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) context = _observe_tool_result_context(agent) assert agent.mode == nanocode.AgentMode.OBSERVE @@ -399,16 +419,16 @@ def test_referenced_unreduced_results_do_not_count_toward_observe_threshold(tmp_ agent = Agent(Session(cwd=str(tmp_path))) _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) agent.apply_response({"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "one.txt was inspected."}]}]}) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) assert agent.mode == nanocode.AgentMode.ACT assert agent.blackboard.memory_checkpoint_tool_result_counter == 0 assert len(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) == 2 assert [nanocode.ToolResultContext.result_key(block) for block in agent._unreferenced_unreduced_blocks()] == ["tr.2"] - agent.execute_tool_calls([{"name": "Read", "intention": "read three", "args": ["three.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read three", "args": _read_args("three.txt", line_range=[0, 1])}]) assert agent.mode == nanocode.AgentMode.OBSERVE observe_prompt = agent.build_observe_prompt() @@ -427,9 +447,9 @@ def test_unsourced_known_does_not_cover_unreduced_result(tmp_path, monkeypatch): agent = Agent(Session(cwd=str(tmp_path))) _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) agent.apply_response({"actions": [{"type": "known", "items": ["one.txt was inspected."]}]}) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) assert agent.mode == nanocode.AgentMode.OBSERVE assert agent.blackboard.memory_checkpoint_tool_result_counter == 0 @@ -442,8 +462,8 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk agent = Agent(Session(cwd=str(tmp_path))) _set_context_budget(monkeypatch, agent, raw_chars=10_000) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}]) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) assert agent.mode == nanocode.AgentMode.ACT assert "key=tr.1" in _blocks_text(agent.tool_context.recent) @@ -470,9 +490,9 @@ def test_act_prompt_file_context_replaces_overlapping_read_lines(tmp_path, monke agent = Agent(Session(cwd=str(tmp_path))) _set_context_budget(monkeypatch, agent, raw_chars=10_000) - agent.execute_tool_calls([{"name": "Read", "intention": "read head", "args": ["sample.txt", "0,2"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read head", "args": _read_args("sample.txt", line_range=[0, 2])}]) path.write_text("old0\nnew1\nnew2\n", encoding="utf-8") - agent.execute_tool_calls([{"name": "Read", "intention": "read overlap", "args": ["sample.txt", "1,3"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read overlap", "args": _read_args("sample.txt", line_range=[1, 3])}]) prompt = agent.build_user_prompt() file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") @@ -496,7 +516,7 @@ def test_act_prompt_folds_excerpted_read_result(tmp_path): path.write_text("x" * 20_000 + "\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read large sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) prompt = agent.build_user_prompt() latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") @@ -544,7 +564,7 @@ def test_recalled_read_does_not_override_newer_read(tmp_path): ) agent = Agent(session) - agent.execute_tool_calls([{"name": "Read", "intention": "read new", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read new", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.execute_tool_calls([{"name": "Recall", "intention": "recall old", "args": ["tr.1"]}]) assert list(session.state.tool_result_store) == ["tr.1", "tr.2"] @@ -563,7 +583,7 @@ def test_forget_tool_removes_visible_result_without_new_key(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) latest = agent.execute_tool_calls([{"name": "Forget", "intention": "drop sample", "args": ["tr.1"]}]) assert session.state.tool_result_counter == 1 @@ -580,7 +600,7 @@ def test_recall_tool_reactivates_forgotten_result_without_new_key(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.execute_tool_calls([{"name": "Forget", "intention": "drop sample", "args": ["tr.1"]}]) agent.execute_tool_calls([{"name": "Recall", "intention": "recall sample", "args": ["tr.1"]}]) @@ -600,7 +620,7 @@ def test_observe_keep_tool_keeps_result_without_new_key(tmp_path, monkeypatch): agent = Agent(session) _set_context_budget(monkeypatch, agent, observe_after_results=1) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) assert agent.mode == nanocode.AgentMode.OBSERVE agent.handle_response({"actions": [{"type": "tool", "name": "Keep", "intention": "keep sample", "args": ["tr.1"]}]}) @@ -616,8 +636,8 @@ def test_empty_observe_compacts_unreduced_tool_results(tmp_path, monkeypatch): agent = Agent(Session(cwd=str(tmp_path))) _set_context_budget(monkeypatch, agent, raw_chars=300, observe_after_results=2) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}]) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) agent.handle_response({"actions": [], "_assistant_text": "checking result"}) @@ -629,7 +649,7 @@ def test_empty_observe_compacts_unreduced_tool_results(tmp_path, monkeypatch): def test_assistant_text_does_not_mark_memory_checkpoint(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.apply_response({"actions": [], "_assistant_text": "reading sample"}) @@ -867,8 +887,8 @@ def test_act_prompt_includes_kept_tool_results(tmp_path): agent.execute_tool_calls( [ - {"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}, - {"name": "Read", "intention": "read other", "args": ["other.txt", "0,1"]}, + {"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, + {"name": "Read", "intention": "read other", "args": _read_args("other.txt", line_range=[0, 1])}, ] ) agent.mode = nanocode.AgentMode.OBSERVE @@ -896,7 +916,7 @@ def test_kept_tool_results_deduplicate_by_tool_key(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.mode = nanocode.AgentMode.OBSERVE agent.handle_response( { @@ -914,7 +934,7 @@ def test_kept_tool_results_deduplicate_by_tool_key(tmp_path): def test_observe_reports_kept_tool_result_keys(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.mode = nanocode.AgentMode.OBSERVE messages = [] @@ -1124,7 +1144,7 @@ def test_keep_tool_results_ignore_non_tool_sources(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.mode = nanocode.AgentMode.OBSERVE agent.handle_response( { @@ -1152,7 +1172,7 @@ def test_keep_action_is_observe_only(tmp_path): def test_observe_rejects_invalid_action_and_allows_empty_actions(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.mode = nanocode.AgentMode.OBSERVE agent.handle_response({"actions": [{"type": "goal", "text": "answer", "complete": False}]}) @@ -1264,7 +1284,7 @@ def test_kept_tool_results_respect_per_block_char_budget(tmp_path, monkeypatch): def test_observe_checkpoint_clears_observe_errors(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.mode = nanocode.AgentMode.OBSERVE agent.observe_feedback_errors = ["old observe error"] @@ -1281,7 +1301,7 @@ def test_agent_tool_result_raw_budget_triggers_observe(tmp_path, monkeypatch): path = tmp_path / "sample.txt" path.write_text("x" * 400 + "\n", encoding="utf-8") - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) assert agent.mode == nanocode.AgentMode.OBSERVE assert agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter) >= agent.context_budget().raw_chars @@ -1297,7 +1317,7 @@ def test_referenced_raw_context_does_not_force_observe(tmp_path, monkeypatch): path = tmp_path / "sample.txt" path.write_text("x" * 400 + "\n", encoding="utf-8") - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) agent.apply_response( {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "sample.txt content was inspected."}]}]} ) @@ -1694,7 +1714,7 @@ def test_agent_request_sends_function_tool_schema_and_parses_tool_call(tmp_path, { "function": { "name": "Read", - "arguments": '{"intention":"read sample","args":["sample.txt","0","1"]}', + "arguments": '{"intention":"read sample","args":[{"path":"sample.txt","range":[0,1]}]}', } } ], @@ -1713,7 +1733,7 @@ def test_agent_request_sends_function_tool_schema_and_parses_tool_call(tmp_path, assert payload["tool_choice"] == "auto" assert payload["parallel_tool_calls"] is True assert response == { - "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0", "1"]}], + "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}], "_assistant_text": "Reading the file.", } assert session.state.last_total_tokens == 5 @@ -1843,7 +1863,7 @@ def create(self, **kwargs): [ _stream_chunk({"content": "Reading."}), _stream_chunk({"tool_calls": [{"index": "0", "function": {"name": "Read", "arguments": '{"intention":"read sample",'}}]}), - _stream_chunk({"tool_calls": [{"index": "0", "function": {"arguments": '"args":["sample.txt","0","1"]}'}}]}), + _stream_chunk({"tool_calls": [{"index": "0", "function": {"arguments": '"args":[{"path":"sample.txt","range":[0,1]}]}'}}]}), _stream_chunk(usage={"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5}, choices=False), ] ) @@ -1865,7 +1885,7 @@ def __init__(self, **_kwargs): "type": "tool", "name": "Read", "intention": "read sample", - "args": ["sample.txt", "0", "1"], + "args": _read_args("sample.txt", line_range=[0, 1]), } ], "_assistant_text": "Reading.", @@ -1880,8 +1900,8 @@ def test_agent_stream_step_preserves_same_response_tool_batch_in_latest(tmp_path class FakeModelClient: def request(self, *_args, on_stream_action=None, **_kwargs): assert on_stream_action is not None - on_stream_action({"type": "tool", "name": "Read", "intention": "read one", "args": ["one.txt", "0,1"]}) - on_stream_action({"type": "tool", "name": "Read", "intention": "read two", "args": ["two.txt", "0,1"]}) + on_stream_action({"type": "tool", "name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}) + on_stream_action({"type": "tool", "name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}) return {"actions": []} agent = Agent(Session(cwd=str(tmp_path))) @@ -1895,8 +1915,8 @@ def request(self, *_args, on_stream_action=None, **_kwargs): assert committed is True assert "one" in latest assert "two" in latest - assert 'tool=Read args=["one.txt","0,1"]' in latest - assert 'tool=Read args=["two.txt","0,1"]' in latest + assert 'tool=Read args=[{"path":"one.txt","range":[0,1]}]' in latest + assert 'tool=Read args=[{"path":"two.txt","range":[0,1]}]' in latest assert agent.tool_context.recent == [] @@ -2992,7 +3012,7 @@ def test_agent_execute_tool_calls_returns_malformed_tool_call_error(tmp_path): latest = agent.execute_tool_calls([{"intention": "bad call", "args": []}]) assert "ToolCallError: tool action missing required field: name" in latest - assert '{"type":"tool","name":"Read","intention":"...","args":["path"]}' in latest + assert '{"type":"tool","name":"Read","intention":"...","args":[{"path":"path.py"}]}' in latest assert "InvalidToolCall" in latest assert "bad call" not in latest assert session.state.conversation == [] @@ -3003,12 +3023,12 @@ def test_agent_execute_tool_calls_records_arg_errors_in_feedback(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - latest = agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": ["sample.txt", "bad,1"]}]) + latest = agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": _read_args("sample.txt", line_range=["bad", 1])}]) - assert "ToolCallError: Read args error: invalid range token" in latest + assert "ToolCallError: files[0].range start must be an integer" in latest assert len(agent.agent_feedback_errors) == 1 - assert 'tool=Read args=["sample.txt","bad,1"]' in agent.agent_feedback_errors[0] - assert "invalid range token" in agent.agent_feedback_errors[0] + assert 'tool=Read args=[{"path":"sample.txt","range":["bad",1]}]' in agent.agent_feedback_errors[0] + assert "range start must be an integer" in agent.agent_feedback_errors[0] def test_agent_execute_tool_calls_reports_arg_count_details(tmp_path): @@ -3039,7 +3059,7 @@ def test_tool_arg_error_does_not_force_observe(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": ["sample.txt", "bad,1"]}]) + agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": _read_args("sample.txt", line_range=["bad", 1])}]) assert agent.mode == nanocode.AgentMode.ACT assert agent.agent_feedback_errors @@ -3062,7 +3082,7 @@ def test_agent_blocks_repeated_identical_failed_tool_call(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) _seed_plan(agent, "read sample") - action = {"type": "tool", "name": "Read", "intention": "bad range", "args": ["sample.txt", "bad,1"]} + action = {"type": "tool", "name": "Read", "intention": "bad range", "args": _read_args("sample.txt", line_range=["bad", 1])} agent.handle_response({"actions": [action]}) agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "failed read has no useful result"}]}) @@ -3098,7 +3118,7 @@ def test_agent_execute_tool_calls_does_not_record_runtime_errors_in_feedback(tmp session = Session(cwd=str(tmp_path)) agent = Agent(session) - latest = agent.execute_tool_calls([{"name": "Read", "intention": "missing file", "args": ["missing.txt", "0,1"]}]) + latest = agent.execute_tool_calls([{"name": "Read", "intention": "missing file", "args": _read_args("missing.txt", line_range=[0, 1])}]) assert "ToolCallError: " in latest assert agent.agent_feedback_errors == [] @@ -3109,9 +3129,9 @@ def test_main_agent_accepts_search_tool(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) - latest = agent.execute_tool_calls([{"name": "Search", "intention": "find symbol", "args": ["class Foo"]}]) + latest = agent.execute_tool_calls([{"name": "Search", "intention": "find symbol", "args": _search_args("class Foo")}]) - assert '- ok tool=Search args=["class Foo"] key=tr.1' in latest + assert '- ok tool=Search args=[{"pattern":"class Foo"}] key=tr.1' in latest assert "sample.py" in latest @@ -3150,7 +3170,7 @@ def __init__(self): self.user_prompts = [] self.responses = [ { - "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}] + "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}] }, {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, { @@ -3181,7 +3201,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): response = agent.run("read sample", on_message=messages.append) assert response["actions"][-1]["message_for_complete"] == "done" - assert messages[0].startswith("[success] Read sample.txt 0,1 -> tr.1") + assert messages[0].startswith("[success] Read sample.txt 0:1 -> tr.1") assert "why:" not in messages[0] assert "log: .nanocode/sessions/" not in messages[0] assert messages[-1] == "done" @@ -3192,7 +3212,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert "alpha" in fake_client.user_prompts[2] assert "Kept Tool Results:" in fake_client.user_prompts[2] assert "" not in fake_client.user_prompts[2] - assert 'tool=Read args=["sample.txt","0,1"]' in _blocks_text(agent.tool_context.latest) + assert 'tool=Read args=[{"path":"sample.txt","range":[0,1]}]' in _blocks_text(agent.tool_context.latest) assert agent.tool_context.recent == [] assert agent.blackboard.known == ["Read sample.txt and found alpha."] assert agent.blackboard.user_input == "read sample" @@ -3260,7 +3280,7 @@ def test_agent_normalizes_protocol_action_type_case(tmp_path): {"type": "USER_RULE", "text": "prefer concise", "message": "saved"}, {"type": "FORGET", "source": ["tr.1"], "reason": "old"}, {"type": "KEEP", "source": ["tr.2"], "reason": "useful"}, - {"type": "Tool", "name": "search", "intention": "find", "args": ["needle"]}, + {"type": "Tool", "name": "search", "intention": "find", "args": _search_args("needle")}, ] } ) @@ -3315,7 +3335,7 @@ def test_agent_normalizes_lowercase_repo_tool_names(tmp_path): messages = [] result = agent.handle_response( - {"actions": [{"type": "search", "intention": "find sample", "args": ["needle", "sample.txt"]}]}, + {"actions": [{"type": "search", "intention": "find sample", "args": _search_args("needle", path="sample.txt")}]}, on_message=messages.append, ) @@ -3331,7 +3351,7 @@ class FakeModelClient: def __init__(self): self.user_prompts = [] self.responses = [ - {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]}, + {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, { "actions": [ {"type": "goal", "text": "answer sample", "complete": True, "message_for_complete": "sample contains alpha"}, @@ -3456,7 +3476,7 @@ class FakeModelClient: def __init__(self): self.user_prompts = [] self.responses = [ - {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]}, + {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, {"_format_error": "Invalid function-tool response: plain answer", "actions": []}, {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, {"actions": _final_actions("read sample")}, @@ -3485,7 +3505,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert "Kept Tool Results:" in agent.model_client.user_prompts[3] assert "alpha" in agent.model_client.user_prompts[3] assert "" not in agent.model_client.user_prompts[3] - assert 'tool=Read args=["sample.txt","0,1"]' in _blocks_text(agent.tool_context.latest) + assert 'tool=Read args=[{"path":"sample.txt","range":[0,1]}]' in _blocks_text(agent.tool_context.latest) assert agent.tool_context.recent == [] @@ -3498,7 +3518,7 @@ def __init__(self): self.responses = [ { "actions": [ - {"type": "tool", "name": "Read", "intention": f"read {index}", "args": [f"sample-{index}.txt", "0,1"]} + {"type": "tool", "name": "Read", "intention": f"read {index}", "args": _read_args(f"sample-{index}.txt", line_range=[0, 1])} for index in range(51) ] }, @@ -3543,7 +3563,7 @@ class FakeModelClient: def __init__(self): self.user_prompts = [] self.responses = [ - {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]}, + {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, {"actions": [{"type": "forget", "source": ["tr.1"], "reason": "sample content is not needed"}]}, {"actions": _final_actions("read sample", "done too early")}, ] @@ -3579,7 +3599,7 @@ def __init__(self): { "actions": [ {"type": "goal", "text": "read sample", "complete": False}, - {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, { @@ -3616,13 +3636,13 @@ def __init__(self): { "actions": [ {"type": "goal", "text": "new goal", "complete": False}, - {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, { "actions": [ {"type": "goal", "text": "new goal", "complete": False}, - {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, { @@ -3632,7 +3652,7 @@ def __init__(self): "type": "plan", "items": [{"id": "p1", "text": "Read sample", "status": "doing"}], }, - {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, @@ -3669,7 +3689,7 @@ def test_agent_run_requires_task_alignment_before_work_with_old_context(tmp_path class FakeModelClient: def __init__(self): self.responses = [ - {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]}, + {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, { "actions": [ {"type": "goal", "text": "run lint", "complete": False}, @@ -3677,7 +3697,7 @@ def __init__(self): "type": "plan", "items": [{"id": "p1", "text": "Read sample", "status": "doing"}], }, - {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, { @@ -3723,7 +3743,7 @@ def __init__(self): ] }, {"actions": [{"type": "goal", "text": "read sample again", "complete": False}]}, - {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt", "0,1"]}]}, + {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, { "actions": [ @@ -4006,7 +4026,7 @@ def test_agent_allows_tool_after_completed_plan_and_checks(tmp_path): result = agent.handle_response( { "actions": [ - {"type": "tool", "name": "Read", "intention": "inspect again", "args": ["sample.txt", "0,1"]} + {"type": "tool", "name": "Read", "intention": "inspect again", "args": _read_args("sample.txt", line_range=[0, 1])} ] }, on_message=messages.append, @@ -4041,7 +4061,7 @@ def test_agent_allows_tool_after_reopening_completed_plan_with_context(tmp_path) } ], }, - {"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "inspect sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] } ) @@ -4073,7 +4093,7 @@ def test_agent_allows_tool_after_reopening_completed_plan_without_context(tmp_pa "mode": "patch", "items": [{"id": "p2", "text": "Inspect the remaining issue", "status": "doing"}], }, - {"type": "tool", "name": "Read", "intention": "inspect sample", "args": ["sample.txt", "0,1"]}, + {"type": "tool", "name": "Read", "intention": "inspect sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, on_message=messages.append, @@ -4515,7 +4535,7 @@ def __init__(self): self.responses = [ { "actions": [ - {"type": "tool", "name": "Read", "intention": "read sample", "args": ["sample.txt"]}, + {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt")}, ], "_assistant_text": "reading sample", }, diff --git a/tests/test_nanocode_edit_tool.py b/tests/test_nanocode_edit_tool.py index 5db5bd4..edce8e9 100644 --- a/tests/test_nanocode_edit_tool.py +++ b/tests/test_nanocode_edit_tool.py @@ -10,7 +10,7 @@ def _anchors(read_result: str) -> list[str]: def _read_anchors(session: Session, filepath: str, range_token: str = "0,0") -> list[str]: - args = [filepath] if range_token == "0,0" else [filepath, range_token] + args = [{"path": filepath}] if range_token == "0,0" else [{"path": filepath, "range": [int(part) for part in re.split(r"[-:,]", range_token)]}] return _anchors(ReadTool.make(session, args).call()) @@ -43,7 +43,7 @@ def test_edit_file_accepts_full_hashline_anchor(tmp_path): path = tmp_path / "sample.txt" path.write_text("alpha\nbeta\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - read_result = ReadTool.make(session, ["sample.txt"]).call() + read_result = ReadTool.make(session, [{"path": "sample.txt"}]).call() full_hashline = next(line for line in read_result.splitlines() if line.endswith("|beta")) EditTool.make(session, ["sample.txt", [{"op": "replace", "start": full_hashline, "end": full_hashline, "content": "BETA\n"}]]).call() diff --git a/tests/test_nanocode_read_tool.py b/tests/test_nanocode_read_tool.py index ee13a81..1154216 100644 --- a/tests/test_nanocode_read_tool.py +++ b/tests/test_nanocode_read_tool.py @@ -8,12 +8,25 @@ def _hashline(index: int, text: str) -> str: return f"{index}:{nanocode._line_hash(text)}|{text}" +def _read(path: str, *, line_range: list[int] | None = None, ranges: list[list[int]] | None = None): + spec: dict[str, object] = {"path": path} + if line_range is not None: + spec["range"] = line_range + if ranges is not None: + spec["ranges"] = ranges + return [spec] + + +def _read_files(*files: dict[str, object]): + return [{"files": list(files)}] + + def test_read_tool_reads_requested_line_range(tmp_path): path = tmp_path / "sample.txt" path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = ReadTool.make(session, ["sample.txt", "1,3"]) + tool = ReadTool.make(session, _read("sample.txt", line_range=[1, 3])) result = tool.call() assert tool.requires_confirmation(session) is False @@ -28,25 +41,25 @@ def test_read_tool_reads_requested_line_range(tmp_path): def test_read_tool_rejects_empty_args_with_actionable_error(tmp_path): session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match=r'Read args error: got 0 args; expected \["filepath"\]'): + with pytest.raises(ToolCallError, match="Read args error: expected exactly one object"): ReadTool.make(session, []) -def test_read_tool_rejects_multiple_start_end_pairs(tmp_path): +def test_read_tool_rejects_positional_args(tmp_path): path = tmp_path / "sample.txt" path.write_text("zero\none\ntwo\nthree\nfour\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="Read args error: for multiple ranges use comma tokens"): + with pytest.raises(ToolCallError, match="Read args error: expected exactly one object"): ReadTool.make(session, ["sample.txt", "1", "2", "3", "5"]) -def test_read_tool_reads_multiple_line_range_tokens(tmp_path): +def test_read_tool_reads_multiple_structured_line_ranges(tmp_path): path = tmp_path / "sample.txt" path.write_text("zero\none\ntwo\nthree\nfour\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = ReadTool.make(session, ["sample.txt", "1-2", "3-5"]) + tool = ReadTool.make(session, _read("sample.txt", ranges=[[1, 2], [3, 5]])) result = tool.call() assert tool.ranges == [(1, 2), (3, 5)] @@ -64,7 +77,7 @@ def test_read_tool_reads_multiple_files(tmp_path): (tmp_path / "uv.lock").write_text("version = 1\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = ReadTool.make(session, ["pyproject.toml", "uv.lock"]) + tool = ReadTool.make(session, _read_files({"path": "pyproject.toml"}, {"path": "uv.lock"})) result = tool.call() assert tool.filepaths == [str(tmp_path / "pyproject.toml"), str(tmp_path / "uv.lock")] @@ -77,12 +90,29 @@ def test_read_tool_reads_multiple_files(tmp_path): assert _hashline(0, "version = 1\n") in result -def test_read_tool_reads_colon_and_comma_range_tokens(tmp_path): +def test_read_tool_reads_multiple_files_with_independent_ranges(tmp_path): + (tmp_path / "one.txt").write_text("zero\none\ntwo\n", encoding="utf-8") + (tmp_path / "two.txt").write_text("alpha\nbeta\ngamma\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + + tool = ReadTool.make(session, _read_files({"path": "one.txt", "range": [1, 2]}, {"path": "two.txt", "range": [1, 3]})) + result = tool.call() + + assert tool.filepaths == [str(tmp_path / "one.txt"), str(tmp_path / "two.txt")] + assert "one.txt" in result + assert "two.txt" in result + assert _hashline(1, "one\n") in result + assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result + assert "|zero" not in result + assert "|alpha" not in result + + +def test_read_tool_reads_structured_ranges(tmp_path): path = tmp_path / "sample.txt" path.write_text("zero\none\ntwo\nthree\nfour\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = ReadTool.make(session, ["sample.txt", "1:2", "3,5"]) + tool = ReadTool.make(session, _read("sample.txt", ranges=[[1, 2], [3, 5]])) result = tool.call() assert tool.ranges == [(1, 2), (3, 5)] @@ -100,7 +130,7 @@ def test_read_tool_reads_to_eof_when_end_is_zero(tmp_path): path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - result = ReadTool.make(session, ["sample.txt", "1,0"]).call() + result = ReadTool.make(session, _read("sample.txt", line_range=[1, 0])).call() assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result assert "|alpha" not in result @@ -111,7 +141,7 @@ def test_read_tool_allows_omitted_range_for_full_file_read(tmp_path): path.write_text("alpha\nbeta\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = ReadTool.make(session, ["sample.txt"]) + tool = ReadTool.make(session, _read("sample.txt")) result = tool.call() assert tool.start == 0 @@ -126,7 +156,7 @@ def test_read_tool_reads_range_token_when_numeric_filenames_exist(tmp_path): (tmp_path / "3").write_text("numeric filename three\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = ReadTool.make(session, ["sample.txt", "1,3"]) + tool = ReadTool.make(session, _read("sample.txt", line_range=[1, 3])) result = tool.call() assert tool.ranges == [(1, 3)] @@ -140,7 +170,7 @@ def test_read_tool_truncates_full_file_reads_after_600_lines(tmp_path): path.write_text("".join(f"line-{index:04d}\n" for index in range(605)), encoding="utf-8") session = Session(cwd=str(tmp_path)) - result = ReadTool.make(session, ["sample.txt"]).call() + result = ReadTool.make(session, _read("sample.txt")).call() assert "0:600" in result assert "true" in result @@ -156,7 +186,7 @@ def test_read_tool_truncates_large_bounded_ranges_after_600_lines(tmp_path): path.write_text("".join(f"line-{index:04d}\n" for index in range(700)), encoding="utf-8") session = Session(cwd=str(tmp_path)) - result = ReadTool.make(session, ["sample.txt", "10,650"]).call() + result = ReadTool.make(session, _read("sample.txt", line_range=[10, 650])).call() assert "10:610" in result assert "true" in result @@ -195,7 +225,7 @@ def tracking_open(*args, **kwargs): monkeypatch.setattr(nanocode, "open", tracking_open, raising=False) - result = ReadTool.make(session, ["sample.txt", "1,3"]).call() + result = ReadTool.make(session, _read("sample.txt", line_range=[1, 3])).call() assert _hashline(1, "one\n") + _hashline(2, "two\n") in result assert "three" not in result @@ -207,7 +237,7 @@ def test_read_tool_clamps_out_of_bounds_range(tmp_path): path.write_text("alpha\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - result = ReadTool.make(session, ["sample.txt", "10,20"]).call() + result = ReadTool.make(session, _read("sample.txt", line_range=[10, 20])).call() assert "alpha" not in result assert " \n\n " in result @@ -218,8 +248,8 @@ def test_read_tool_rejects_non_integer_range(tmp_path): path.write_text("alpha\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="invalid range"): - ReadTool.make(session, ["sample.txt", "bad,1"]) + with pytest.raises(ToolCallError, match="range start must be an integer"): + ReadTool.make(session, _read("sample.txt", line_range=["bad", 1])) def test_read_tool_rejects_partial_range(tmp_path): @@ -227,5 +257,5 @@ def test_read_tool_rejects_partial_range(tmp_path): path.write_text("alpha\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="Read args error: invalid range token"): - ReadTool.make(session, ["sample.txt", "0"]) + with pytest.raises(ToolCallError, match=r"range must be a \[start, end\] integer pair"): + ReadTool.make(session, _read("sample.txt", line_range=[0])) diff --git a/tests/test_nanocode_search_tool.py b/tests/test_nanocode_search_tool.py index f2ceb7e..21e75e4 100644 --- a/tests/test_nanocode_search_tool.py +++ b/tests/test_nanocode_search_tool.py @@ -6,6 +6,17 @@ from nanocode import EditTool, SearchTool, Session, ToolCallError +def _search(pattern: str, *, path: str | None = None, glob: str | None = None, context: int | object | None = None, **extra: object): + spec: dict[str, object] = {"pattern": pattern, **extra} + if path is not None: + spec["path"] = path + if glob is not None: + spec["glob"] = glob + if context is not None: + spec["context"] = context + return [spec] + + def test_search_tool_python_backend_finds_or_patterns_and_applies_glob(tmp_path, monkeypatch): (tmp_path / ".gitignore").write_text("ignored.txt\nignored_dir/\n", encoding="utf-8") (tmp_path / "keep.txt").write_text("alpha needle\nsecond hit\n", encoding="utf-8") @@ -24,7 +35,7 @@ def test_search_tool_python_backend_finds_or_patterns_and_applies_glob(tmp_path, session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - tool = SearchTool.make(session, ["needle|second", ".", "*.txt"]) + tool = SearchTool.make(session, _search("needle|second", path=".", glob="*.txt")) result = tool.call() assert "* engine: python" in result @@ -38,31 +49,31 @@ def test_search_tool_python_backend_finds_or_patterns_and_applies_glob(tmp_path, assert "hidden.txt" not in result -def test_search_tool_rejects_many_plain_args_without_explicit_path(tmp_path): +def test_search_tool_rejects_positional_args(tmp_path): session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="requires 1 to 4 args"): + with pytest.raises(ToolCallError, match="Search args error: expected exactly one object"): SearchTool.make(session, ["class Edit", "class Bash", "class Search", "class Read", "class CreateFile"]) -def test_search_tool_treats_second_plain_arg_as_path(tmp_path): +def test_search_tool_uses_structured_path(tmp_path): path = tmp_path / "sample.py" path.write_text("class EditTool:\nclass BashTool:\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["class Edit|class Bash", "sample.py"]) + tool = SearchTool.make(session, _search("class Edit|class Bash", path="sample.py")) assert tool.pattern == "class Edit|class Bash" assert tool.target_path == str(path) -def test_search_tool_accepts_explicit_path_option_with_regex_and_context(tmp_path, monkeypatch): +def test_search_tool_accepts_structured_path_with_regex_and_context(tmp_path, monkeypatch): path = tmp_path / "nanocode.py" path.write_text("class EditTool:\nclass BashTool:\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - tool = SearchTool.make(session, ["class .*Tool", "path=nanocode.py", "context=0"]) + tool = SearchTool.make(session, _search("class .*Tool", path="nanocode.py", context=0)) result = tool.call() assert tool.target_path == str(path) @@ -71,23 +82,23 @@ def test_search_tool_accepts_explicit_path_option_with_regex_and_context(tmp_pat assert "* nanocode.py:2: class BashTool:" in result -def test_search_tool_accepts_explicit_path_option_as_second_arg(tmp_path): +def test_search_tool_uses_default_context_when_omitted(tmp_path): path = tmp_path / "nanocode.py" path.write_text("class EditTool:\nclass BashTool:\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["class Edit", "path=nanocode.py"]) + tool = SearchTool.make(session, _search("class Edit", path="nanocode.py")) assert tool.target_path == str(path) assert tool.context_lines == SearchTool.CONTEXT_LINES -def test_search_tool_accepts_explicit_path_option_with_multiple_terms(tmp_path): +def test_search_tool_accepts_regex_alternatives(tmp_path): path = tmp_path / "nanocode.py" path.write_text("class EditTool:\nclass BashTool:\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["class Edit", "class Bash", "path=nanocode.py"]) + tool = SearchTool.make(session, _search("class Edit|class Bash", path="nanocode.py")) assert tool.pattern == "class Edit|class Bash" assert tool.target_path == str(path) @@ -97,10 +108,10 @@ def test_search_tool_rejects_ignore_case_option(tmp_path): (tmp_path / "sample.py").write_text("Needle\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="ignore_case is not supported"): - SearchTool.make(session, ["needle", "ignore_case=true"]) - with pytest.raises(ToolCallError, match="ignore_case is not supported"): - SearchTool.make(session, ["needle", "path=sample.py", "ignore_case=true"]) + with pytest.raises(ToolCallError, match="unexpected search option: ignore_case"): + SearchTool.make(session, _search("needle", ignore_case=True)) + with pytest.raises(ToolCallError, match="unexpected search option: ignore_case"): + SearchTool.make(session, _search("needle", path="sample.py", ignore_case=True)) def test_search_tool_uses_pipe_as_regex_or(tmp_path): @@ -108,7 +119,7 @@ def test_search_tool_uses_pipe_as_regex_or(tmp_path): path.write_text("alpha\nbeta\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["alpha|beta", "sample.txt"]) + tool = SearchTool.make(session, _search("alpha|beta", path="sample.txt")) result = tool.call() assert "* sample.txt:1: alpha" in result @@ -119,7 +130,7 @@ def test_search_tool_prefers_rg_backend(tmp_path, monkeypatch): path = tmp_path / "sample.txt" path.write_text("needle\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle", "sample.txt"]) + tool = SearchTool.make(session, _search("needle", path="sample.txt")) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "/fake/rg" if name == "rg" else "") monkeypatch.setattr(SearchTool, "_call_rg", lambda self, rg: f"rg:{rg}") @@ -157,7 +168,7 @@ def fake_run(cmd, text, stdout, stderr, timeout): monkeypatch.setattr(nanocode.shutil, "which", lambda name: "/fake/rg" if name == "rg" else "") monkeypatch.setattr(nanocode.subprocess, "run", fake_run) - result = SearchTool.make(session, [r"(?\s+(\d+:[0-9a-f]{6})\|beta", result).group(1) EditTool.make(session, ["sample.txt", [{"op": "replace", "start": anchor, "end": anchor, "content": "BETA\n"}]]).call() @@ -200,7 +211,7 @@ def test_search_tool_python_backend_includes_default_context_lines(tmp_path, mon session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", "sample.txt"]).call() + result = SearchTool.make(session, _search("needle", path="sample.txt")).call() assert "* sample.txt:4: needle" in result assert " > 3:" in result and "|needle" in result @@ -221,7 +232,7 @@ def test_search_tool_python_backend_supports_regex(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, [r"def __init__\([^)]*,[^)]*\)", "sample.py"]).call() + result = SearchTool.make(session, _search(r"def __init__\([^)]*,[^)]*\)", path="sample.py")).call() assert "* engine: python" in result assert "* sample.py:5: def __init__(self, name):" in result @@ -234,7 +245,7 @@ def test_search_tool_supports_context_option_without_glob(tmp_path, monkeypatch) session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", "sample.txt", "context=3"]).call() + result = SearchTool.make(session, _search("needle", path="sample.txt", context=3)).call() assert " 0:" in result and "|one" in result assert " 1:" in result and "|two" in result @@ -252,7 +263,7 @@ def test_search_tool_omits_context_before_outer_excerpt(tmp_path, monkeypatch): monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") monkeypatch.setattr(SearchTool, "OUTPUT_CHARS", 700) - result = SearchTool.make(session, ["needle", "sample.txt", "context=1"]).call() + result = SearchTool.make(session, _search("needle", path="sample.txt", context=1)).call() assert "* context_omitted:" in result assert "* sample.txt:2: needle" in result @@ -263,7 +274,7 @@ def test_search_tool_omits_context_before_outer_excerpt(tmp_path, monkeypatch): def test_search_tool_accepts_context_30(tmp_path): session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle", ".", "context=30"]) + tool = SearchTool.make(session, _search("needle", path=".", context=30)) assert tool.context_lines == 30 @@ -274,7 +285,7 @@ def test_search_tool_supports_numeric_context_option_with_glob(tmp_path, monkeyp session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", ".", "*.txt", "2"]).call() + result = SearchTool.make(session, _search("needle", path=".", glob="*.txt", context=2)).call() assert "* keep.txt:3: needle" in result assert " 0:" in result and "|zero" in result @@ -291,7 +302,7 @@ def test_search_tool_supports_glob_and_context_option(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", ".", "*.txt", "context=1"]).call() + result = SearchTool.make(session, _search("needle", path=".", glob="*.txt", context=1)).call() assert "* keep.txt:2: needle" in result assert " > 1:" in result and "|needle" in result @@ -304,7 +315,7 @@ def test_search_tool_accepts_named_glob_option(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", ".", "glob_pattern=*.py"]).call() + result = SearchTool.make(session, _search("needle", path=".", glob="*.py")).call() assert "* keep.py:1: needle" in result assert "skip.txt" not in result @@ -314,7 +325,7 @@ def test_search_tool_defaults_path_to_cwd_when_omitted(tmp_path): (tmp_path / "sample.txt").write_text("needle\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle"]) + tool = SearchTool.make(session, _search("needle")) assert tool.target_path == str(tmp_path) @@ -324,7 +335,7 @@ def test_search_tool_accepts_context_option_without_path(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - tool = SearchTool.make(session, ["needle", "context=0"]) + tool = SearchTool.make(session, _search("needle", context=0)) result = tool.call() assert tool.target_path == str(tmp_path) @@ -339,7 +350,7 @@ def test_search_tool_accepts_glob_option_without_path(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - tool = SearchTool.make(session, ["needle", "glob=*.py"]) + tool = SearchTool.make(session, _search("needle", glob="*.py")) result = tool.call() assert tool.target_path == str(tmp_path) @@ -352,14 +363,14 @@ def test_search_tool_rejects_empty_pattern(tmp_path): session = Session(cwd=str(tmp_path)) with pytest.raises(ToolCallError, match="pattern cannot be empty"): - SearchTool.make(session, ["", "."]) + SearchTool.make(session, _search("", path=".")) def test_search_tool_treats_empty_path_as_cwd(tmp_path): (tmp_path / "sample.txt").write_text("needle\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle", ""]) + tool = SearchTool.make(session, _search("needle", path="")) assert tool.target_path == str(tmp_path) @@ -368,14 +379,14 @@ def test_search_tool_rejects_invalid_regex(tmp_path): session = Session(cwd=str(tmp_path)) with pytest.raises(ToolCallError, match="invalid regex"): - SearchTool.make(session, ["[", "."]) + SearchTool.make(session, _search("[", path=".")) def test_search_tool_defaults_to_regex(tmp_path): (tmp_path / "sample.py").write_text("class SearchTool:\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["class.*Tool", "sample.py"]) + tool = SearchTool.make(session, _search("class.*Tool", path="sample.py")) result = tool.call() assert "* sample.py:1: class SearchTool:" in result @@ -386,7 +397,7 @@ def test_search_tool_supports_multiline_regex(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - tool = SearchTool.make(session, [r"@dataclass.*\nclass.*State", "sample.py", "context=1"]) + tool = SearchTool.make(session, _search(r"@dataclass.*\nclass.*State", path="sample.py", context=1)) result = tool.call() assert tool.pattern == "@dataclass.*\nclass.*State" @@ -400,20 +411,20 @@ def test_search_tool_rejects_invalid_context(tmp_path): session = Session(cwd=str(tmp_path)) with pytest.raises(ToolCallError, match="context must be an integer"): - SearchTool.make(session, ["needle", ".", "context=bad"]) + SearchTool.make(session, _search("needle", path=".", context="bad")) def test_search_tool_rejects_missing_target(tmp_path): session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle", "missing.txt"]) + tool = SearchTool.make(session, _search("needle", path="missing.txt")) with pytest.raises(ToolCallError, match="not a file or directory"): tool.call() -def test_search_tool_keeps_plain_second_arg_as_path_when_only_two_args(tmp_path): +def test_search_tool_uses_structured_path_for_plain_names(tmp_path): session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle", "TOOLS"]) + tool = SearchTool.make(session, _search("needle", path="TOOLS")) assert tool.pattern == "needle" assert tool.target_path == str(tmp_path / "TOOLS") @@ -421,7 +432,7 @@ def test_search_tool_keeps_plain_second_arg_as_path_when_only_two_args(tmp_path) def test_search_tool_rejects_placeholder_path_with_guidance(tmp_path): session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["needle", "path", "*.py"]) + tool = SearchTool.make(session, _search("needle", path="path", glob="*.py")) with pytest.raises(ToolCallError, match='"path" is a placeholder'): tool.call() @@ -432,7 +443,7 @@ def test_search_tool_returns_no_matches_for_glob_mismatch(tmp_path, monkeypatch) session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", "sample.py", "*.txt"]).call() + result = SearchTool.make(session, _search("needle", path="sample.py", glob="*.txt")).call() assert result == "\n".join( [ @@ -451,7 +462,7 @@ def test_search_tool_truncates_python_results(tmp_path, monkeypatch): monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") monkeypatch.setattr(SearchTool, "MAX_MATCHES", 2) - result = SearchTool.make(session, ["needle", "sample.txt"]).call() + result = SearchTool.make(session, _search("needle", path="sample.txt")).call() assert "* sample.txt:1: needle 1" in result assert "* sample.txt:2: needle 2" in result @@ -466,7 +477,7 @@ def test_search_tool_python_backend_honors_gitignore_glob(tmp_path, monkeypatch) session = Session(cwd=str(tmp_path)) monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") - result = SearchTool.make(session, ["needle", "."]).call() + result = SearchTool.make(session, _search("needle", path=".")).call() assert "keep.txt" in result assert "skip.log" not in result @@ -476,7 +487,7 @@ def test_search_tool_python_fallback_case_insensitive_normal(tmp_path): session = Session(cwd=str(tmp_path)) (tmp_path / "test.txt").write_text("Hello World\n", encoding="utf-8") - tool = SearchTool.make(session, ["hello", "."]) + tool = SearchTool.make(session, _search("hello", path=".")) assert tool._line_matches("Hello World") is True assert tool._line_matches("hello world") is True assert tool._line_matches("HELLO WORLD") is True @@ -486,7 +497,7 @@ def test_search_tool_python_fallback_case_insensitive_regex(tmp_path): session = Session(cwd=str(tmp_path)) (tmp_path / "test.txt").write_text("Hello World\n", encoding="utf-8") - tool = SearchTool.make(session, ["[h]ello", "."]) + tool = SearchTool.make(session, _search("[h]ello", path=".")) assert tool._line_matches("Hello World") is True assert tool._line_matches("hello world") is True assert tool._line_matches("HELLO WORLD") is True @@ -495,6 +506,6 @@ def test_search_tool_python_fallback_case_insensitive_regex(tmp_path): def test_search_tool_rg_backend_is_case_insensitive(tmp_path): session = Session(cwd=str(tmp_path)) - tool = SearchTool.make(session, ["hello", "."]) + tool = SearchTool.make(session, _search("hello", path=".")) assert "-i" in tool._rg_command("rg") From 0c01b620b639efcab4d55eb506c970249beab220 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 01:51:27 -0700 Subject: [PATCH 05/29] Clean up structured read context handling --- nanocode.py | 109 +++++++------------------------ tests/test_nanocode_read_tool.py | 17 +++-- 2 files changed, 33 insertions(+), 93 deletions(-) diff --git a/nanocode.py b/nanocode.py index ce8bada..8688462 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1541,8 +1541,6 @@ def _file_context_entries(cls, blocks: list[str]) -> list[tuple[int, str, str, i source = cls.result_key(block) if source and cls._is_read_result_block(block): entries.extend((cls.result_counter(block), source, path, number, line) for path, number, line in cls._read_block_file_lines(block)) - continue - entries.extend(cls._recall_block_file_entries(block)) return entries @staticmethod @@ -1580,11 +1578,12 @@ def _read_output_file_lines(cls, output: str, *, default_path: str) -> list[tupl for path, section in cls._read_output_file_sections(output, default_path=default_path): if not path: continue - for content in cls._read_output_content_sections(section): + for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", section): + content = match.group(1) for line in content.splitlines(): - match = re.match(r"(\d+):[0-9a-f]{6}\|", line) - if match: - file_lines.append((path, int(match.group(1)), line)) + line_match = re.match(r"(\d+):[0-9a-f]{6}\|", line) + if line_match: + file_lines.append((path, int(line_match.group(1)), line)) return file_lines @classmethod @@ -1594,20 +1593,6 @@ def _is_read_result_block(cls, block: str) -> bool: header, output = block.split("\n output:\n", 1) return bool(re.search(r"\btool=Read\b", header) and "" in output) - @classmethod - def _recall_block_file_entries(cls, block: str) -> list[tuple[int, str, str, int, str]]: - if not cls.is_full_block(block): - return [] - header, output = block.split("\n output:\n", 1) - if not re.search(r"\btool=Recall\b", header) or "RecallToolResult:" not in output: - return [] - entries: list[tuple[int, str, str, int, str]] = [] - for source, description, content in cls._recall_output_items(output): - default_path = cls._read_description_default_path(description) - order = cls._result_key_counter(source) - entries.extend((order, source, path, number, line) for path, number, line in cls._read_output_file_lines(content, default_path=default_path)) - return entries - @classmethod def recalled_result_blocks(cls, recall_block: str) -> list[str]: if not cls.is_full_block(recall_block): @@ -1649,17 +1634,6 @@ def _recall_output_items(output: str) -> Iterator[tuple[str, str, str]]: if content_match: yield source, (description_match.group(1) if description_match else ""), content_match.group(1) - @staticmethod - def _read_description_default_path(description: str) -> str: - match = re.match(r"(?:success|failure) Read\s+(.+?)(?:\s+-\s+.*)?$", description) - if match is None: - return "" - try: - tokens = shlex.split(match.group(1)) - except ValueError: - return "" - return tokens[0] if tokens else "" - @staticmethod def _read_block_default_path(header: str) -> str: marker = " args=" @@ -1693,11 +1667,6 @@ def _read_output_file_sections(output: str, *, default_path: str) -> Iterator[tu path_match = re.search(r"(.*?)", section) yield (path_match.group(1).strip() if path_match else default_path), section - @staticmethod - def _read_output_content_sections(text: str) -> Iterator[str]: - for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", text): - yield match.group(1) - @classmethod def bound_block(cls, block: str, *, max_chars: int) -> str: if len(block) <= max_chars: @@ -1901,11 +1870,6 @@ class ReadTool(Tool): 'Example args: [{"files":[{"path":"pyproject.toml"},{"path":"uv.lock","range":[0,120]}]}]', ) - filepath: str = "" - start: int = 0 - end: int = 0 - ranges: list[tuple[int, int]] = field(default_factory=list) - filepaths: list[str] = field(default_factory=list) targets: list[tuple[str, list[tuple[int, int]]]] = field(default_factory=list) cwd: str = "" @@ -1921,7 +1885,12 @@ def cli_args(cls, args: list[JsonValue]) -> list[str]: path = _json_str(spec.get("path")) or "" if not path: continue - ranges = cls._cli_range_tokens(spec) + raw_ranges = [spec.get("range")] if "range" in spec else _json_list(spec.get("ranges")) if "ranges" in spec else [] + ranges = [] + for raw_range in raw_ranges: + values = _json_list(raw_range) + if len(values) == 2: + ranges.append(str(values[0]) + ":" + str(values[1])) if not ranges: tokens.append(path) continue @@ -1929,19 +1898,6 @@ def cli_args(cls, args: list[JsonValue]) -> list[str]: tokens.extend(ranges) return tokens or [cls.cli_token(args[0])] - @staticmethod - def _cli_range_tokens(spec: Json) -> list[str]: - if "range" in spec: - raw_ranges = [spec.get("range")] - else: - raw_ranges = _json_list(spec.get("ranges")) if "ranges" in spec else [] - tokens = [] - for raw_range in raw_ranges: - values = _json_list(raw_range) - if len(values) == 2: - tokens.append(str(values[0]) + ":" + str(values[1])) - return tokens - @classmethod def tool_schema(cls) -> Json: range_schema = { @@ -1993,18 +1949,7 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: if len(args) != 1 or not isinstance(args[0], dict): raise ToolCallArgError('Read args error: expected exactly one object, e.g. [{"path":"nanocode.py","range":[2065,2095]}]') payload = _json_dict(args[0]) - targets = cls._parse_targets(session, payload) - filepath, ranges = targets[0] - start, end = ranges[0] - return cls( - filepath=filepath, - start=start, - end=end, - ranges=ranges, - filepaths=[path for path, _ranges in targets], - targets=targets, - cwd=session.cwd, - ) + return cls(targets=cls._parse_targets(session, payload), cwd=session.cwd) @classmethod def _parse_targets(cls, session: Session, payload: Json) -> list[tuple[str, list[tuple[int, int]]]]: @@ -2046,23 +1991,17 @@ def _parse_file_spec(cls, session: Session, value: JsonValue, *, index: int) -> ranges = [(0, 0)] return session.resolve_path(path), ranges - def _targets(self) -> list[tuple[str, list[tuple[int, int]]]]: - if self.targets: - return self.targets - return [(self.filepath, self.ranges or [(self.start, self.end)])] - def requires_confirmation(self, session: Session) -> bool: - return any(not session.is_path_in_cwd(filepath) for filepath, _ranges in self._targets()) + return any(not session.is_path_in_cwd(filepath) for filepath, _ranges in self.targets) def preview(self) -> str: - targets = self._targets() - if len(targets) > 1: + if len(self.targets) > 1: chunks = [] - for filepath, ranges in targets: + for filepath, ranges in self.targets: range_text = ",".join(str(start) + ":" + str(end) for start, end in ranges) chunks.append(filepath + (":" + range_text if range_text != "0:0" else "")) return "Read(" + ", ".join(chunks) + ")" - filepath, ranges = targets[0] + filepath, ranges = self.targets[0] if len(ranges) > 1: range_text = ", ".join(str(start) + ":" + str(end) for start, end in ranges) return f"Read({filepath}, {range_text})" @@ -2070,14 +2009,13 @@ def preview(self) -> str: return f"Read({filepath}, {start}, {end})" def call(self) -> str: - targets = self._targets() - if len(targets) > 1: + if len(self.targets) > 1: lines = [ "", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', - " " + str(len(targets)) + "", + " " + str(len(self.targets)) + "", ] - for filepath, ranges in targets: + for filepath, ranges in self.targets: lines.extend([" ", " " + os.path.relpath(filepath, self.cwd) + ""]) if len(ranges) > 1: lines.append(" " + str(len(ranges)) + "") @@ -2095,7 +2033,7 @@ def call(self) -> str: lines.append("") return "\n".join(lines) - filepath, ranges = targets[0] + filepath, ranges = self.targets[0] if len(ranges) > 1: lines = [ "", @@ -2117,17 +2055,16 @@ def call(self) -> str: lines.append("") return "\n".join(lines) - def _read_range(self, start: int, end: int, *, filepath: str | None = None) -> tuple[str, int, int, bool, int]: - target_filepath = filepath or self.filepath + def _read_range(self, start: int, end: int, *, filepath: str) -> tuple[str, int, int, bool, int]: total_lines = 0 selected_lines = [] truncated = False bounded_read_lines = end - start if end else 0 if end and bounded_read_lines <= self.MAX_LINES: - with open(target_filepath, "r", encoding="utf-8") as f: + with open(filepath, "r", encoding="utf-8") as f: selected_lines = list(itertools.islice(f, start, end)) else: - with open(target_filepath, "r", encoding="utf-8") as f: + with open(filepath, "r", encoding="utf-8") as f: for index, line in enumerate(f): total_lines = index + 1 if index < start: diff --git a/tests/test_nanocode_read_tool.py b/tests/test_nanocode_read_tool.py index 1154216..673331d 100644 --- a/tests/test_nanocode_read_tool.py +++ b/tests/test_nanocode_read_tool.py @@ -21,6 +21,10 @@ def _read_files(*files: dict[str, object]): return [{"files": list(files)}] +def _target_paths(tool: ReadTool) -> list[str]: + return [path for path, _ranges in tool.targets] + + def test_read_tool_reads_requested_line_range(tmp_path): path = tmp_path / "sample.txt" path.write_text("alpha\nbeta\ngamma\n", encoding="utf-8") @@ -62,7 +66,7 @@ def test_read_tool_reads_multiple_structured_line_ranges(tmp_path): tool = ReadTool.make(session, _read("sample.txt", ranges=[[1, 2], [3, 5]])) result = tool.call() - assert tool.ranges == [(1, 2), (3, 5)] + assert tool.targets[0][1] == [(1, 2), (3, 5)] assert "1:2, 3:5" in tool.preview() assert "1:2" in result assert "3:5" in result @@ -80,7 +84,7 @@ def test_read_tool_reads_multiple_files(tmp_path): tool = ReadTool.make(session, _read_files({"path": "pyproject.toml"}, {"path": "uv.lock"})) result = tool.call() - assert tool.filepaths == [str(tmp_path / "pyproject.toml"), str(tmp_path / "uv.lock")] + assert _target_paths(tool) == [str(tmp_path / "pyproject.toml"), str(tmp_path / "uv.lock")] assert tool.requires_confirmation(session) is False assert "pyproject.toml, " in tool.preview() assert "2" in result @@ -98,7 +102,7 @@ def test_read_tool_reads_multiple_files_with_independent_ranges(tmp_path): tool = ReadTool.make(session, _read_files({"path": "one.txt", "range": [1, 2]}, {"path": "two.txt", "range": [1, 3]})) result = tool.call() - assert tool.filepaths == [str(tmp_path / "one.txt"), str(tmp_path / "two.txt")] + assert _target_paths(tool) == [str(tmp_path / "one.txt"), str(tmp_path / "two.txt")] assert "one.txt" in result assert "two.txt" in result assert _hashline(1, "one\n") in result @@ -115,7 +119,7 @@ def test_read_tool_reads_structured_ranges(tmp_path): tool = ReadTool.make(session, _read("sample.txt", ranges=[[1, 2], [3, 5]])) result = tool.call() - assert tool.ranges == [(1, 2), (3, 5)] + assert tool.targets[0][1] == [(1, 2), (3, 5)] assert "1:2, 3:5" in tool.preview() assert "1:2" in result assert "3:5" in result @@ -144,8 +148,7 @@ def test_read_tool_allows_omitted_range_for_full_file_read(tmp_path): tool = ReadTool.make(session, _read("sample.txt")) result = tool.call() - assert tool.start == 0 - assert tool.end == 0 + assert tool.targets[0][1] == [(0, 0)] assert "0:0" in result assert _hashline(0, "alpha\n") + _hashline(1, "beta\n") in result @@ -159,7 +162,7 @@ def test_read_tool_reads_range_token_when_numeric_filenames_exist(tmp_path): tool = ReadTool.make(session, _read("sample.txt", line_range=[1, 3])) result = tool.call() - assert tool.ranges == [(1, 3)] + assert tool.targets[0][1] == [(1, 3)] assert "1:3" in result assert _hashline(1, "one\n") + _hashline(2, "two\n") in result assert "numeric filename" not in result From fada8706beb975c51e1534cabfdb4757d2e3d149 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 01:54:14 -0700 Subject: [PATCH 06/29] Make tool call display human readable --- nanocode.py | 14 ++++++++++++-- tests/test_nanocode_loop.py | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/nanocode.py b/nanocode.py index 8688462..8603e5e 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1181,7 +1181,17 @@ class Tool: @classmethod def cli_args(cls, args: list[JsonValue]) -> list[str]: - return [cls.cli_token(arg) for arg in args] + tokens: list[str] = [] + for arg in args: + if isinstance(arg, dict): + tokens.extend(cls.cli_object_args(arg)) + else: + tokens.append(cls.cli_token(arg)) + return tokens + + @classmethod + def cli_object_args(cls, value: Json) -> list[str]: + return [str(key) + "=" + cls.cli_token(item) for key, item in value.items()] @staticmethod def cli_content_summary(value: str) -> str: @@ -1192,7 +1202,7 @@ def cli_content_summary(value: str) -> str: @staticmethod def cli_token(value: JsonValue) -> str: - text = json.dumps(value, ensure_ascii=False, separators=(",", ":")) if isinstance(value, (dict, list)) else str(value) + text = json.dumps(value, ensure_ascii=False, separators=(",", ":")) if isinstance(value, (dict, list, bool)) or value is None else str(value) if "\n" in text: return Tool.cli_content_summary(text) text = _shorten(text, 100) diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py index d9fbb40..084ed1a 100644 --- a/tests/test_nanocode_loop.py +++ b/tests/test_nanocode_loop.py @@ -268,6 +268,28 @@ def __init__(self): assert ("ansibrightblack", " | excerpt") in keyed_segments +def test_tool_call_display_formats_structured_args_for_humans(): + read = ParsedToolCall( + name="Read", + intention="", + args=[{"files": [{"path": "one.py", "range": [0, 10]}, {"path": "two.py", "ranges": [[20, 30], [40, 50]]}]}], + ) + search = ParsedToolCall( + name="Search", + intention="", + args=[{"pattern": "class Foo", "path": ".", "glob": "*.py", "context": 2}], + ) + inspect_code = ParsedToolCall( + name="InspectCode", + intention="", + args=["find", "Tool", {"kind": "class", "limit": 20, "exact_only": True}], + ) + + assert ToolCallDisplayFormatter.format_call(read) == "Read one.py 0:10 two.py 20:30 40:50" + assert ToolCallDisplayFormatter.format_call(search) == 'Search "class Foo" path=. glob=*.py context=2' + assert ToolCallDisplayFormatter.format_call(inspect_code) == "InspectCode find Tool kind=class limit=20 exact_only=true" + + def test_tool_call_report_compacts_interrupted_bash_result(): output = "\n* exit_code: -1\n* interrupted: true\n* reason: user_ctrl_c\n" From f5ad31e3ea09191a2c92f09e99584adfe4d36ad3 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 02:50:56 -0700 Subject: [PATCH 07/29] Project edit results into file context --- nanocode.py | 116 +++++++++++++++++++++++-------- tests/test_nanocode_agent.py | 32 +++++++++ tests/test_nanocode_edit_tool.py | 20 +++--- 3 files changed, 130 insertions(+), 38 deletions(-) diff --git a/nanocode.py b/nanocode.py index 8603e5e..fb24b5f 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1504,28 +1504,52 @@ def render_blocks_for_prompt(cls, blocks: list[str]) -> list[str]: @classmethod def render_block_for_prompt(cls, block: str) -> str: - if not cls._is_read_result_block(block): + if not cls._is_file_context_result_block(block): return block - compact = cls.compact_block(block) + compact = cls.compact_file_context_block(block) if "\n out: " in compact: - content = "file_context" if cls._read_block_file_lines(block) else "recall" + content = "file_context" if cls._file_context_block_items(block) else "recall" return compact + "; content=" + content return compact + @classmethod + def compact_file_context_block(cls, block: str) -> str: + if not cls.is_full_block(block): + return block + header, output = block.split("\n output:\n", 1) + summary_output = re.sub(r"(?ms)^[ \t]*\n.*?^[ \t]*", "...", output) + summary_output = re.sub(r"(?m)^[ \t]*[ \t]*$", "", summary_output) + parts = [str(_tool_output_line_count(output)) + " lines, " + str(len(output)) + " chars"] + if "[tool result excerpt]" in output or "excerpted: true" in output: + parts.append("excerpt") + if key := cls.result_key(block): + parts.append("recall=" + key) + if summary_output: + parts.append(_shorten(" ".join(summary_output.split()), cls.COMPACT_OUTPUT_SUMMARY_CHARS)) + return header + "\n out: " + "; ".join(parts) + @classmethod def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: files: dict[str, dict[int, tuple[str, str]]] = {} - entries = sorted(cls._file_context_entries(blocks), key=lambda item: item[0]) - for _order, source, path, number, line in entries: - if source: - files.setdefault(path, {})[number] = (source, line) + items = sorted(cls._file_context_items(blocks), key=lambda item: (item[0], item[1], item[4], item[5])) + for _order, _phase, kind, source, path, start, end, line in items: + if not source or not path: + continue + file_lines = files.setdefault(path, {}) + if kind == "clear": + for number in list(file_lines): + if number >= start and (end == 0 or number < end): + del file_lines[number] + continue + file_lines[start] = (source, line) if not files: return "" lines = [ "Source Policy:", - "- Built dynamically for this prompt from active raw Read results.", - "- Overlapping lines use the newest active Read result.", + "- Built dynamically for this prompt from active raw Read and Edit results.", + "- Overlapping lines use the newest active Read/Edit result.", + "- Edit results can clear stale older lines when edits shift line numbers.", "", ] for path in sorted(files): @@ -1545,13 +1569,11 @@ def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered @classmethod - def _file_context_entries(cls, blocks: list[str]) -> list[tuple[int, str, str, int, str]]: - entries: list[tuple[int, str, str, int, str]] = [] + def _file_context_items(cls, blocks: list[str]) -> list[tuple[int, int, str, str, str, int, int, str]]: + items: list[tuple[int, int, str, str, str, int, int, str]] = [] for block in blocks: - source = cls.result_key(block) - if source and cls._is_read_result_block(block): - entries.extend((cls.result_counter(block), source, path, number, line) for path, number, line in cls._read_block_file_lines(block)) - return entries + items.extend(cls._file_context_block_items(block)) + return items @staticmethod def _file_context_segments(file_lines: dict[int, tuple[str, str]]) -> list[tuple[int, int, str, list[str]]]: @@ -1575,33 +1597,40 @@ def _file_context_segments(file_lines: dict[int, tuple[str, str]]) -> list[tuple return segments @classmethod - def _read_block_file_lines(cls, block: str) -> list[tuple[str, int, str]]: - if not cls._is_read_result_block(block): + def _file_context_block_items(cls, block: str) -> list[tuple[int, int, str, str, str, int, int, str]]: + if not cls._is_file_context_result_block(block): return [] header, output = block.split("\n output:\n", 1) - default_path = cls._read_block_default_path(header) - return cls._read_output_file_lines(output, default_path=default_path) + default_path = cls._read_block_default_path(header) if re.search(r"\btool=Read\b", header) else "" + return cls._file_context_output_items(output, default_path=default_path, order=cls.result_counter(block), source=cls.result_key(block)) @classmethod - def _read_output_file_lines(cls, output: str, *, default_path: str) -> list[tuple[str, int, str]]: - file_lines: list[tuple[str, int, str]] = [] - for path, section in cls._read_output_file_sections(output, default_path=default_path): + def _file_context_output_items( + cls, output: str, *, default_path: str, order: int, source: str + ) -> list[tuple[int, int, str, str, str, int, int, str]]: + items: list[tuple[int, int, str, str, str, int, int, str]] = [] + for path, section in cls._file_context_file_sections(output, default_path=default_path): if not path: continue + for clear_match in re.finditer(r"(?m)^[ \t]*(\d+):(\d+)", section): + items.append((order, 0, "clear", source, path, int(clear_match.group(1)), int(clear_match.group(2)), "")) for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", section): content = match.group(1) for line in content.splitlines(): line_match = re.match(r"(\d+):[0-9a-f]{6}\|", line) if line_match: - file_lines.append((path, int(line_match.group(1)), line)) - return file_lines + items.append((order, 1, "line", source, path, int(line_match.group(1)), 0, line)) + return items @classmethod - def _is_read_result_block(cls, block: str) -> bool: + def _is_file_context_result_block(cls, block: str) -> bool: if not cls.is_full_block(block): return False header, output = block.split("\n output:\n", 1) - return bool(re.search(r"\btool=Read\b", header) and "" in output) + return bool( + (re.search(r"\btool=Read\b", header) and "" in output) + or (re.search(r"\btool=Edit\b", header) and "" in output and ("" in output or "" in output)) + ) @classmethod def recalled_result_blocks(cls, recall_block: str) -> list[str]: @@ -1667,13 +1696,13 @@ def _read_block_default_path(header: str) -> str: return str(args[0]) @staticmethod - def _read_output_file_sections(output: str, *, default_path: str) -> Iterator[tuple[str, str]]: - file_matches = list(re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", output)) + def _file_context_file_sections(output: str, *, default_path: str) -> Iterator[tuple[str, str]]: + file_matches = list(re.finditer(r"(?ms)^[ \t]*<(?PReadFile|EditFile)>\n(.*?)^[ \t]*", output)) if not file_matches: yield default_path, output return for match in file_matches: - section = match.group(1) + section = match.group(2) path_match = re.search(r"(.*?)", section) yield (path_match.group(1).strip() if path_match else default_path), section @@ -3150,9 +3179,38 @@ def call(self) -> str: lines.append(f"* replace_all[{index}]: {end} replacements") else: lines.append(f"* range[{index}]: {start}:{end}") + lines.extend(self._format_file_context_update(relpath, replacements)) lines.append("") return "\n".join(lines) + def _format_file_context_update(self, relpath: str, replacements: list[tuple[int, int, list[str]]]) -> list[str]: + lines = [" ", " " + relpath + ""] + if any(start < 0 for start, _end, _replacement in replacements): + lines.extend([" 0:0", " "]) + return lines + + delta = 0 + for start, end, replacement in sorted(replacements, key=lambda item: item[0]): + old_len = end - start + new_start = start + delta + shown = replacement[: ReadTool.MAX_LINES] + new_end = new_start + len(shown) + clear_end = 0 if len(replacement) != old_len else new_start + old_len + lines.extend([" ", " " + str(new_start) + ":" + str(clear_end) + ""]) + lines.append(" " + str(new_start) + ":" + str(new_end) + "") + if len(shown) < len(replacement): + lines.append(" true") + lines.extend(self._format_hashline_content(new_start, shown, indent=" ")) + lines.append(" ") + delta += len(replacement) - old_len + lines.append(" ") + return lines + + @staticmethod + def _format_hashline_content(start: int, lines: list[str], *, indent: str) -> list[str]: + content = "".join(f"{start + index}:{_line_hash(line)}|{line}" for index, line in enumerate(lines)) + return [indent + "", content, indent + ""] + def _preview(self) -> tuple[str, str, list[tuple[int, int, list[str]]]]: try: with open(self.filepath, "r", encoding="utf-8") as f: diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 492cd30..c2501d6 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -511,6 +511,38 @@ def test_act_prompt_file_context_replaces_overlapping_read_lines(tmp_path, monke assert "content=file_context" in unreduced +def test_act_prompt_file_context_uses_edit_result_as_newest_file_content(tmp_path, monkeypatch): + path = tmp_path / "sample.txt" + path.write_text("old0\nold1\nold2\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + agent = Agent(session) + _set_context_budget(monkeypatch, agent, raw_chars=10_000) + anchors = _read_anchors(session, "sample.txt") + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 3])}]) + agent.execute_tool_calls( + [{"name": "Edit", "intention": "replace middle", "args": ["sample.txt", [{"op": "replace", "start": anchors[1], "end": anchors[1], "content": "new1\n"}]]}], + confirm=lambda call, tool: True, + ) + + prompt = agent.build_user_prompt() + file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") + assert path.read_text(encoding="utf-8") == "old0\nnew1\nold2\n" + assert "File: sample.txt" in file_context + assert "0:1 source=tr.1" in file_context + assert "1:2 source=tr.2" in file_context + assert "2:3 source=tr.1" in file_context + assert "|old0" in file_context + assert "|new1" in file_context + assert "|old2" in file_context + assert "|old1" not in file_context + assert "tool=Edit" in latest + assert "path: sample.txt" in latest + assert "range[1]: 1:2" in latest + assert "content=file_context" in latest + + def test_act_prompt_folds_excerpted_read_result(tmp_path): path = tmp_path / "sample.txt" path.write_text("x" * 20_000 + "\n", encoding="utf-8") diff --git a/tests/test_nanocode_edit_tool.py b/tests/test_nanocode_edit_tool.py index edce8e9..5132738 100644 --- a/tests/test_nanocode_edit_tool.py +++ b/tests/test_nanocode_edit_tool.py @@ -28,15 +28,16 @@ def test_edit_file_replaces_range_from_read_anchors(tmp_path): assert "-beta\n" in display assert "+BETA\n" in display assert path.read_text(encoding="utf-8") == "alpha\nBETA\ngamma\n" - assert result == "\n".join( - [ - "", - "* path: sample.txt", - "* edits: 1", - "* range[1]: 1:2", - "", - ] - ) + assert result.startswith("") + assert "* path: sample.txt" in result + assert "* edits: 1" in result + assert "* range[1]: 1:2" in result + assert "" in result + assert "sample.txt" in result + assert "1:2" in result + assert "1:2" in result + assert "|BETA" in result + assert result.endswith("") def test_edit_file_accepts_full_hashline_anchor(tmp_path): @@ -87,6 +88,7 @@ def test_edit_file_replace_all_literal_text_without_anchors(tmp_path): assert path.read_text(encoding="utf-8") == "NewName alpha\nNewName beta\n" assert "* edits: 1" in result assert "* replace_all[1]: 2 replacements" in result + assert "0:0" in result def test_edit_file_replace_all_rejects_no_match_or_mixed_edits(tmp_path): From 449371911357fa5065aa2997d9b4c6890e3f1fa4 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 03:05:28 -0700 Subject: [PATCH 08/29] Validate file context projections with file stat and hashes --- nanocode.py | 171 ++++++++++++++++++++++++++++--- tests/test_nanocode_agent.py | 38 ++++++- tests/test_nanocode_edit_tool.py | 1 + tests/test_nanocode_read_tool.py | 2 + 4 files changed, 199 insertions(+), 13 deletions(-) diff --git a/nanocode.py b/nanocode.py index fb24b5f..8f75c86 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1314,6 +1314,20 @@ def _tool_call_args_key(args: list[JsonValue]) -> tuple[str, ...]: return tuple(json.dumps(arg, ensure_ascii=False, sort_keys=True, separators=(",", ":")) for arg in args) +@dataclass(frozen=True) +class FileContextItem: + order: int + phase: int + kind: str + source: str + path: str + start: int + end: int + line: str + mtime_ns: int = 0 + size: int = -1 + + @dataclass class ToolResultContext: COMPACT_OUTPUT_SUMMARY_CHARS: ClassVar[int] = 120 @@ -1518,6 +1532,7 @@ def compact_file_context_block(cls, block: str) -> str: return block header, output = block.split("\n output:\n", 1) summary_output = re.sub(r"(?ms)^[ \t]*\n.*?^[ \t]*", "...", output) + summary_output = re.sub(r"(?m)^[ \t]*]*>[ \t]*$", "", summary_output) summary_output = re.sub(r"(?m)^[ \t]*[ \t]*$", "", summary_output) parts = [str(_tool_output_line_count(output)) + " lines, " + str(len(output)) + " chars"] if "[tool result excerpt]" in output or "excerpted: true" in output: @@ -1529,10 +1544,19 @@ def compact_file_context_block(cls, block: str) -> str: return header + "\n out: " + "; ".join(parts) @classmethod - def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: + def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int) -> str: files: dict[str, dict[int, tuple[str, str]]] = {} - items = sorted(cls._file_context_items(blocks), key=lambda item: (item[0], item[1], item[4], item[5])) - for _order, _phase, kind, source, path, start, end, line in items: + omitted: dict[str, dict[str, int]] = {} + items = sorted(cls._file_context_items(blocks), key=lambda item: (item.order, item.phase, item.path, item.start)) + line_numbers_by_path: dict[str, set[int]] = {} + for item in items: + if item.kind == "line" and item.path: + line_numbers_by_path.setdefault(item.path, set()).add(item.start) + current_stats: dict[str, tuple[int, int] | None] = {} + current_lines: dict[str, dict[int, str] | None] = {} + + for item in items: + kind, source, path, start, end, line = item.kind, item.source, item.path, item.start, item.end, item.line if not source or not path: continue file_lines = files.setdefault(path, {}) @@ -1541,8 +1565,18 @@ def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: if number >= start and (end == 0 or number < end): del file_lines[number] continue + if not cls._file_context_item_is_current( + item, + cwd=cwd, + line_numbers_by_path=line_numbers_by_path, + current_stats=current_stats, + current_lines=current_lines, + ): + omitted.setdefault(path, {}).setdefault(source, 0) + omitted[path][source] += 1 + continue file_lines[start] = (source, line) - if not files: + if not any(files.values()) and not omitted: return "" lines = [ @@ -1550,6 +1584,7 @@ def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: "- Built dynamically for this prompt from active raw Read and Edit results.", "- Overlapping lines use the newest active Read/Edit result.", "- Edit results can clear stale older lines when edits shift line numbers.", + "- If file stat changed, projected lines are hash-checked against the current file and stale lines are omitted.", "", ] for path in sorted(files): @@ -1564,17 +1599,81 @@ def format_file_context(cls, blocks: list[str], *, max_chars: int) -> str: lines.append("@@ " + str(start) + ":" + str(end) + " source=" + source) lines.extend(segment_lines) lines.append("") + if omitted: + lines.append("Omitted stale content:") + for path in sorted(omitted): + for source in sorted(omitted[path], key=cls._result_key_counter): + lines.append("- " + path + " source=" + source + " stale_lines=" + str(omitted[path][source])) + lines.append("") rendered = "\n".join(lines).rstrip() return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered @classmethod - def _file_context_items(cls, blocks: list[str]) -> list[tuple[int, int, str, str, str, int, int, str]]: - items: list[tuple[int, int, str, str, str, int, int, str]] = [] + def _file_context_items(cls, blocks: list[str]) -> list[FileContextItem]: + items: list[FileContextItem] = [] for block in blocks: items.extend(cls._file_context_block_items(block)) return items + @classmethod + def _file_context_item_is_current( + cls, + item: FileContextItem, + *, + cwd: str, + line_numbers_by_path: dict[str, set[int]], + current_stats: dict[str, tuple[int, int] | None], + current_lines: dict[str, dict[int, str] | None], + ) -> bool: + if item.path not in current_stats: + current_stats[item.path] = cls._current_file_stat(item.path, cwd=cwd) + current_stat = current_stats[item.path] + if current_stat is None: + return False + if item.mtime_ns > 0 and item.size >= 0 and current_stat == (item.mtime_ns, item.size): + return True + hash_match = re.match(r"\d+:([0-9a-f]{6})\|", item.line) + if hash_match is None: + return False + if item.path not in current_lines: + current_lines[item.path] = cls._read_file_context_lines(item.path, cwd=cwd, line_numbers=line_numbers_by_path.get(item.path, set())) + lines = current_lines[item.path] + if lines is None: + return False + current_line = lines.get(item.start) + return current_line is not None and _line_hash(current_line) == hash_match.group(1) + + @staticmethod + def _current_file_stat(path: str, *, cwd: str) -> tuple[int, int] | None: + filepath = path if os.path.isabs(path) else os.path.join(cwd or os.getcwd(), path) + try: + stat = os.stat(filepath) + except OSError: + return None + return stat.st_mtime_ns, stat.st_size + + @staticmethod + def _read_file_context_lines(path: str, *, cwd: str, line_numbers: set[int]) -> dict[int, str] | None: + if not line_numbers: + return {} + filepath = path if os.path.isabs(path) else os.path.join(cwd or os.getcwd(), path) + wanted = set(line_numbers) + max_line = max(wanted) + lines: dict[int, str] = {} + try: + with open(filepath, "r", encoding="utf-8") as f: + for index, line in enumerate(f): + if index in wanted: + lines[index] = line + if len(lines) == len(wanted): + break + if index >= max_line: + break + except OSError: + return None + return lines + @staticmethod def _file_context_segments(file_lines: dict[int, tuple[str, str]]) -> list[tuple[int, int, str, list[str]]]: items = sorted(file_lines.items()) @@ -1597,7 +1696,7 @@ def _file_context_segments(file_lines: dict[int, tuple[str, str]]) -> list[tuple return segments @classmethod - def _file_context_block_items(cls, block: str) -> list[tuple[int, int, str, str, str, int, int, str]]: + def _file_context_block_items(cls, block: str) -> list[FileContextItem]: if not cls._is_file_context_result_block(block): return [] header, output = block.split("\n output:\n", 1) @@ -1607,21 +1706,55 @@ def _file_context_block_items(cls, block: str) -> list[tuple[int, int, str, str, @classmethod def _file_context_output_items( cls, output: str, *, default_path: str, order: int, source: str - ) -> list[tuple[int, int, str, str, str, int, int, str]]: - items: list[tuple[int, int, str, str, str, int, int, str]] = [] + ) -> list[FileContextItem]: + items: list[FileContextItem] = [] for path, section in cls._file_context_file_sections(output, default_path=default_path): if not path: continue + mtime_ns, size = cls._file_context_section_stat(section) for clear_match in re.finditer(r"(?m)^[ \t]*(\d+):(\d+)", section): - items.append((order, 0, "clear", source, path, int(clear_match.group(1)), int(clear_match.group(2)), "")) + items.append( + FileContextItem( + order=order, + phase=0, + kind="clear", + source=source, + path=path, + start=int(clear_match.group(1)), + end=int(clear_match.group(2)), + line="", + mtime_ns=mtime_ns, + size=size, + ) + ) for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", section): content = match.group(1) for line in content.splitlines(): line_match = re.match(r"(\d+):[0-9a-f]{6}\|", line) if line_match: - items.append((order, 1, "line", source, path, int(line_match.group(1)), 0, line)) + items.append( + FileContextItem( + order=order, + phase=1, + kind="line", + source=source, + path=path, + start=int(line_match.group(1)), + end=0, + line=line, + mtime_ns=mtime_ns, + size=size, + ) + ) return items + @staticmethod + def _file_context_section_stat(section: str) -> tuple[int, int]: + match = re.search(r'', section) + if not match: + return 0, -1 + return int(match.group(1)), int(match.group(2)) + @classmethod def _is_file_context_result_block(cls, block: str) -> bool: if not cls.is_full_block(block): @@ -1864,6 +1997,14 @@ def _line_hash(content: str) -> str: return hashlib.blake2s(content.encode("utf-8"), digest_size=3).hexdigest() +def _format_file_stat(filepath: str, *, indent: str) -> list[str]: + try: + stat = os.stat(filepath) + except OSError: + return [] + return [indent + ''] + + ############################ # Tool Implementations ############################ @@ -2056,6 +2197,7 @@ def call(self) -> str: ] for filepath, ranges in self.targets: lines.extend([" ", " " + os.path.relpath(filepath, self.cwd) + ""]) + lines.extend(_format_file_stat(filepath, indent=" ")) if len(ranges) > 1: lines.append(" " + str(len(ranges)) + "") for start, end in ranges: @@ -2079,6 +2221,7 @@ def call(self) -> str: ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', " " + str(len(ranges)) + "", ] + lines.extend(_format_file_stat(filepath, indent=" ")) for start, end in ranges: content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) lines.append(" ") @@ -2088,8 +2231,9 @@ def call(self) -> str: return "\n".join(lines) start, end = ranges[0] - content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) lines = ["", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.'] + lines.extend(_format_file_stat(filepath, indent=" ")) + content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=" ")) lines.append("") return "\n".join(lines) @@ -3185,6 +3329,7 @@ def call(self) -> str: def _format_file_context_update(self, relpath: str, replacements: list[tuple[int, int, list[str]]]) -> list[str]: lines = [" ", " " + relpath + ""] + lines.extend(_format_file_stat(self.filepath, indent=" ")) if any(start < 0 for start, _end, _replacement in replacements): lines.extend([" 0:0", " "]) return lines @@ -5573,6 +5718,7 @@ def build_user_prompt(self) -> str: budget = self.context_budget() file_context = ToolResultContext.format_file_context( self._act_file_context_blocks(), + cwd=self.session.cwd, max_chars=budget.raw_chars + budget.kept_chars, ) conversation = self.session.state.conversation @@ -5641,6 +5787,7 @@ def build_observe_prompt(self) -> str: budget = self.context_budget() file_context = ToolResultContext.format_file_context( self.tool_context.kept_results + unreduced_blocks, + cwd=self.session.cwd, max_chars=budget.raw_chars + budget.kept_chars, ) unreduced = "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced_blocks)) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index c2501d6..e6d48ca 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -35,13 +35,14 @@ def _prompt_section(prompt: str, title: str, next_title: str) -> str: def _stored_read_result(line: str) -> str: + hash_text = line if line.endswith("\n") else line + "\n" return "\n".join( [ "", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', " 0:1", " ", - "0:aaaaaa|" + line, + "0:" + nanocode._line_hash(hash_text) + "|" + line, " ", "", ] @@ -543,6 +544,40 @@ def test_act_prompt_file_context_uses_edit_result_as_newest_file_content(tmp_pat assert "content=file_context" in latest +def test_act_prompt_file_context_omits_stale_read_lines_after_external_change(tmp_path, monkeypatch): + path = tmp_path / "sample.txt" + path.write_text("old0\nold1\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + _set_context_budget(monkeypatch, agent, raw_chars=10_000) + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 2])}]) + path.write_text("changed0\nchanged1\n", encoding="utf-8") + + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") + assert "File: sample.txt" not in file_context + assert "|old0" not in file_context + assert "|old1" not in file_context + assert "Omitted stale content:" in file_context + assert "sample.txt source=tr.1 stale_lines=2" in file_context + + +def test_act_prompt_file_context_keeps_matching_lines_after_external_stat_change(tmp_path, monkeypatch): + path = tmp_path / "sample.txt" + path.write_text("alpha\nbeta\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + _set_context_budget(monkeypatch, agent, raw_chars=10_000) + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) + path.write_text("alpha\nBETA changed\n", encoding="utf-8") + + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") + assert "File: sample.txt" in file_context + assert "0:1 source=tr.1" in file_context + assert "|alpha" in file_context + assert "|beta" not in file_context + assert "Omitted stale content:" not in file_context + + def test_act_prompt_folds_excerpted_read_result(tmp_path): path = tmp_path / "sample.txt" path.write_text("x" * 20_000 + "\n", encoding="utf-8") @@ -560,6 +595,7 @@ def test_act_prompt_folds_excerpted_read_result(tmp_path): def test_recall_read_reactivates_original_result_for_file_context(tmp_path): + (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) session.state.tool_result_counter = 1 session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem( diff --git a/tests/test_nanocode_edit_tool.py b/tests/test_nanocode_edit_tool.py index 5132738..18fd9b6 100644 --- a/tests/test_nanocode_edit_tool.py +++ b/tests/test_nanocode_edit_tool.py @@ -34,6 +34,7 @@ def test_edit_file_replaces_range_from_read_anchors(tmp_path): assert "* range[1]: 1:2" in result assert "" in result assert "sample.txt" in result + assert "1:2" in result assert "1:2" in result assert "|BETA" in result diff --git a/tests/test_nanocode_read_tool.py b/tests/test_nanocode_read_tool.py index 673331d..38724e5 100644 --- a/tests/test_nanocode_read_tool.py +++ b/tests/test_nanocode_read_tool.py @@ -37,6 +37,7 @@ def test_read_tool_reads_requested_line_range(tmp_path): assert result.startswith("") assert "1:3" in result assert "" not in result + assert "" in result assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result assert "|alpha" not in result @@ -90,6 +91,7 @@ def test_read_tool_reads_multiple_files(tmp_path): assert "2" in result assert "pyproject.toml" in result assert "uv.lock" in result + assert result.count(" Date: Sun, 24 May 2026 03:52:04 -0700 Subject: [PATCH 09/29] Compress file context and structured tool code --- nanocode.py | 129 ++++++++++++++-------------------------------------- 1 file changed, 35 insertions(+), 94 deletions(-) diff --git a/nanocode.py b/nanocode.py index 8f75c86..4fd30e0 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1304,6 +1304,7 @@ def _bound_tool_output(output: str, *, log_path: str = "", max_chars: int = MAX_ RESULT_KEY_PATTERN: re.Pattern[str] = re.compile(r"\b(?:(?:result_)?key|recall)[:=]\s*(tr\.\d+)\b") TOOL_RESULT_KEY_REF_PATTERN: re.Pattern[str] = re.compile(r"\btr\.\d+\b") +READ_HASHLINE_NOTE = ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.' def _format_tool_call_summary(call: ParsedToolCall) -> str: @@ -1611,10 +1612,7 @@ def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int @classmethod def _file_context_items(cls, blocks: list[str]) -> list[FileContextItem]: - items: list[FileContextItem] = [] - for block in blocks: - items.extend(cls._file_context_block_items(block)) - return items + return [item for block in blocks for item in cls._file_context_block_items(block)] @classmethod def _file_context_item_is_current( @@ -1713,39 +1711,13 @@ def _file_context_output_items( continue mtime_ns, size = cls._file_context_section_stat(section) for clear_match in re.finditer(r"(?m)^[ \t]*(\d+):(\d+)", section): - items.append( - FileContextItem( - order=order, - phase=0, - kind="clear", - source=source, - path=path, - start=int(clear_match.group(1)), - end=int(clear_match.group(2)), - line="", - mtime_ns=mtime_ns, - size=size, - ) - ) + items.append(FileContextItem(order, 0, "clear", source, path, int(clear_match.group(1)), int(clear_match.group(2)), "", mtime_ns, size)) for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", section): content = match.group(1) for line in content.splitlines(): line_match = re.match(r"(\d+):[0-9a-f]{6}\|", line) if line_match: - items.append( - FileContextItem( - order=order, - phase=1, - kind="line", - source=source, - path=path, - start=int(line_match.group(1)), - end=0, - line=line, - mtime_ns=mtime_ns, - size=size, - ) - ) + items.append(FileContextItem(order, 1, "line", source, path, int(line_match.group(1)), 0, line, mtime_ns, size)) return items @staticmethod @@ -2066,16 +2038,8 @@ def cli_args(cls, args: list[JsonValue]) -> list[str]: if not path: continue raw_ranges = [spec.get("range")] if "range" in spec else _json_list(spec.get("ranges")) if "ranges" in spec else [] - ranges = [] - for raw_range in raw_ranges: - values = _json_list(raw_range) - if len(values) == 2: - ranges.append(str(values[0]) + ":" + str(values[1])) - if not ranges: - tokens.append(path) - continue tokens.append(path) - tokens.extend(ranges) + tokens.extend(str(values[0]) + ":" + str(values[1]) for raw_range in raw_ranges if len(values := _json_list(raw_range)) == 2) return tokens or [cls.cli_token(args[0])] @classmethod @@ -2189,52 +2153,33 @@ def preview(self) -> str: return f"Read({filepath}, {start}, {end})" def call(self) -> str: - if len(self.targets) > 1: - lines = [ - "", - ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', - " " + str(len(self.targets)) + "", - ] - for filepath, ranges in self.targets: + multi_file = len(self.targets) > 1 + lines = ["", READ_HASHLINE_NOTE] + if multi_file: + lines.append(" " + str(len(self.targets)) + "") + elif len(self.targets[0][1]) > 1: + lines.append(" " + str(len(self.targets[0][1])) + "") + + for filepath, ranges in self.targets: + base_indent = " " + if multi_file: + base_indent = " " lines.extend([" ", " " + os.path.relpath(filepath, self.cwd) + ""]) - lines.extend(_format_file_stat(filepath, indent=" ")) + lines.extend(_format_file_stat(filepath, indent=base_indent)) if len(ranges) > 1: lines.append(" " + str(len(ranges)) + "") - for start, end in ranges: - if len(ranges) > 1: - lines.append(" ") - indent = " " - else: - indent = " " - content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) - lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=indent)) - if len(ranges) > 1: - lines.append(" ") - lines.append(" ") - lines.append("") - return "\n".join(lines) - - filepath, ranges = self.targets[0] - if len(ranges) > 1: - lines = [ - "", - ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.', - " " + str(len(ranges)) + "", - ] - lines.extend(_format_file_stat(filepath, indent=" ")) + else: + lines.extend(_format_file_stat(filepath, indent=base_indent)) for start, end in ranges: + wrapped = len(ranges) > 1 + if wrapped: + lines.append(base_indent + "") content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) - lines.append(" ") - lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=" ")) - lines.append(" ") - lines.append("") - return "\n".join(lines) - - start, end = ranges[0] - lines = ["", ' Content lines are "line:hash|code"; the "line:hash" part is the line anchor.'] - lines.extend(_format_file_stat(filepath, indent=" ")) - content, returned_end, range_end, truncated, total_lines = self._read_range(start, end, filepath=filepath) - lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=" ")) + lines.extend(self._format_range_result(start, returned_end, range_end, truncated, total_lines, content, indent=base_indent + (" " if wrapped else ""))) + if wrapped: + lines.append(base_indent + "") + if multi_file: + lines.append(" ") lines.append("") return "\n".join(lines) @@ -2489,17 +2434,13 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: unexpected = sorted(set(payload) - {"pattern", "path", "glob", "context"}) if unexpected: raise ToolCallArgError("unexpected search option: " + ", ".join(unexpected)) - raw_pattern = _json_str(payload.get("pattern")) or "" - if not raw_pattern: - raise ToolCallArgError("pattern cannot be empty") - pattern = raw_pattern[3:] if raw_pattern.startswith("re:") else raw_pattern + pattern = _json_str(payload.get("pattern")) or "" + pattern = pattern[3:] if pattern.startswith("re:") else pattern if not pattern: raise ToolCallArgError("pattern cannot be empty") pattern = pattern.replace("\\n", "\n").replace("\\r", "\r") - target_path_arg = _json_str(payload.get("path")) if "path" in payload else "." - target_path_arg = target_path_arg or "." - glob_pattern = _json_str(payload.get("glob")) if "glob" in payload else "" - glob_pattern = glob_pattern or "" + target_path_arg = _json_str(payload.get("path")) or "." + glob_pattern = _json_str(payload.get("glob")) or "" if "glob" in payload and not glob_pattern: raise ToolCallArgError("glob option cannot be empty") context_lines = cls.CONTEXT_LINES @@ -2555,7 +2496,7 @@ def _load_gitignore_patterns(cwd: str) -> list[str]: pass return patterns - def _is_gitignored(self, path: str, is_dir: bool = False) -> bool: + def _is_gitignored(self, path: str) -> bool: relpath = self._relpath(path).replace(os.sep, "/") name = os.path.basename(path) parts = relpath.split("/") @@ -2579,9 +2520,9 @@ def _is_gitignored(self, path: str, is_dir: bool = False) -> bool: return True return False - def _is_skipped_path(self, path: str, is_dir: bool = False) -> bool: + def _is_skipped_path(self, path: str) -> bool: hidden = any(part.startswith(".") for part in self._relpath(path).split(os.sep) if part and part != ".") - return hidden or self._is_gitignored(path, is_dir) + return hidden or self._is_gitignored(path) def _iter_files(self) -> Iterator[str]: if os.path.isfile(self.target_path): @@ -2590,7 +2531,7 @@ def _iter_files(self) -> Iterator[str]: return for root, dirs, names in os.walk(self.target_path): - dirs[:] = [name for name in dirs if not self._is_skipped_path(os.path.join(root, name), is_dir=True)] + dirs[:] = [name for name in dirs if not self._is_skipped_path(os.path.join(root, name))] for name in names: path = os.path.join(root, name) if self._matches_glob(path) and not self._is_skipped_path(path): From 4cf922a33b40f0bb4a4d0df2f2e1ebc628db6f05 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 03:59:28 -0700 Subject: [PATCH 10/29] Encapsulate code index state in CodeIndex --- nanocode.py | 374 ++++++++++++------------ tests/test_nanocode_agent.py | 6 +- tests/test_nanocode_code_index_tools.py | 72 ++--- tests/test_nanocode_commands.py | 4 +- tests/test_nanocode_loop.py | 8 +- 5 files changed, 237 insertions(+), 227 deletions(-) diff --git a/nanocode.py b/nanocode.py index 4fd30e0..180433d 100644 --- a/nanocode.py +++ b/nanocode.py @@ -2726,192 +2726,193 @@ def call(self) -> str: return self._call_python() -def _code_index_module() -> Any | None: - try: - return importlib.import_module("code_symbol_index") - except ImportError: - return None - - -def _code_index_db_path(session: Session) -> str: - return os.path.join(session.project_dir(), "code-symbol-index", "index.sqlite") - - -def _code_index_repository(session: Session, *, create_index: bool = False) -> Any: - if not create_index and session.code_index_repository is not None: - return session.code_index_repository - module = _code_index_module() - if module is None: - raise ToolCallError("code index is unavailable") - db_path = _code_index_db_path(session) - if create_index: - os.makedirs(os.path.dirname(db_path), exist_ok=True) - repository = module.Repository(session.cwd, db_path=db_path, create_index=create_index) - if not create_index: - session.code_index_repository = repository - return repository - - -def _code_index_status(session: Session, *, check: bool = False) -> tuple[str, str]: - module = _code_index_module() - if module is None: - return "unavailable", "" - try: - status = module.status(session.cwd, db_path=_code_index_db_path(session), check=check, max_pending_files=20, format="object") - except Exception as error: - return "error", str(error) - message = str(getattr(status, "message", None) or getattr(status, "reason", None) or "") - changes = getattr(status, "pending_changes", None) - files = getattr(status, "pending_files", ()) - if changes: - pending = "pending " + str(changes) - if isinstance(files, (list, tuple)) and files: - sample = ", ".join(str(item) for item in files[:3]) - pending += " (" + sample + ("..." if len(files) > 3 else "") + ")" - message = (message + "; " if message else "") + pending - return str(getattr(status, "status", "error")), message - - -def _code_index_language_breakdown(session: Session) -> str: - module = _code_index_module() - if module is None: - return "" - try: - status = module.status(session.cwd, db_path=_code_index_db_path(session), check=False, max_pending_files=0, format="object") - except Exception: - return "" - if str(getattr(status, "status", "error")) not in {"ready", "stale"}: - return "" - rows = [] - for item in getattr(status, "language_breakdown", ()) or (): - language = item.get("language") if isinstance(item, dict) else getattr(item, "language", None) - files = item.get("files") if isinstance(item, dict) else getattr(item, "files", None) - percent = item.get("percent") if isinstance(item, dict) else getattr(item, "percent", None) - if language and files is not None and percent is not None: - try: - rows.append(f"{language} {files} files ({float(percent):.1f}%)") - except (TypeError, ValueError): - rows.append(f"{language} {files} files") - if rows: - return ", ".join(rows) - languages = getattr(status, "languages", ()) or () - if isinstance(languages, str): - languages = (languages,) - return ", ".join(str(language) for language in languages if language) - - -def _code_index_available(session: Session) -> bool: - status, message = _code_index_status(session) - session.state.code_index_error = message if status == "error" else "" - return status in {"ready", "stale"} - - -def _set_code_index_notice(session: Session, event: str, *, done: int = 0, total: int = 0, seconds: int = 30) -> None: - phase = {"scan": "scan", "start": "parse", "file": "parse", "finish": "done"}.get(event, event) - suffix = (" " + str(done) + "/" + str(total)) if total > 0 else "" - session.state.status_notice = "index:" + phase + suffix - session.state.status_notice_until = time.monotonic() + seconds - session.state.code_index_refreshing = phase not in {"done", "error"} - - -def _code_index_progress(session: Session) -> Callable[..., None]: - def update(event: str, *, done: int = 0, total: int = 0, **_kwargs: object) -> None: - _set_code_index_notice(session, event, done=done, total=total) - - return update - +CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT = 20 -def _code_index_refresh_existing_async(session: Session, progress: Callable[..., None] | None = None) -> bool: - status, _message = _code_index_status(session) - if status not in {"ready", "stale"}: - return False - module = _code_index_module() - if module is None: - return False - session.code_index_repository = None - session.state.code_index_error = "" - session.state.code_index_refreshing = True - session.state.code_index_reload_needed = False - callback = progress or _code_index_progress(session) - def refresh_progress(event: str, *, done: int = 0, total: int = 0, **kwargs: object) -> None: - callback(event, done=done, total=total, **kwargs) - if {"finish": "done", "done": "done"}.get(event, event) == "done": - session.state.code_index_reload_needed = True +class CodeIndex: + def __init__(self, session: Session): + self.session = session - try: - module.refresh_async(session.cwd, db_path=_code_index_db_path(session), progress=refresh_progress) - except Exception as error: - session.state.code_index_refreshing = False - session.state.code_index_reload_needed = False - session.state.code_index_error = str(error) - return True + @staticmethod + def module() -> Any | None: + try: + return importlib.import_module("code_symbol_index") + except ImportError: + return None + def db_path(self) -> str: + return os.path.join(self.session.project_dir(), "code-symbol-index", "index.sqlite") -def _code_index_reload_if_ready(session: Session) -> None: - if not session.state.code_index_reload_needed or session.state.code_index_refreshing: - return - try: - _code_index_repository(session) - session.state.code_index_error = "" - except Exception as error: - session.code_index_repository = None - session.state.code_index_error = str(error) - session.state.code_index_reload_needed = False + def repository(self, *, create_index: bool = False) -> Any: + session = self.session + if not create_index and session.code_index_repository is not None: + return session.code_index_repository + module = self.module() + if module is None: + raise ToolCallError("code index is unavailable") + db_path = self.db_path() + if create_index: + os.makedirs(os.path.dirname(db_path), exist_ok=True) + repository = module.Repository(session.cwd, db_path=db_path, create_index=create_index) + if not create_index: + session.code_index_repository = repository + return repository + + def raw_status(self, *, check: bool = False, max_pending_files: int = 20) -> Any | None: + module = self.module() + if module is None: + return None + return module.status(self.session.cwd, db_path=self.db_path(), check=check, max_pending_files=max_pending_files, format="object") + def status(self, *, check: bool = False) -> tuple[str, str]: + try: + status = self.raw_status(check=check) + except Exception as error: + return "error", str(error) + if status is None: + return "unavailable", "" + return str(getattr(status, "status", "error")), self._status_message(status) -def _code_index_sync(session: Session, *, force: bool = False) -> str: - before, _message = _code_index_status(session) - if force: - if _code_index_module() is None: - return "code_index: error\ncode index is unavailable" + @staticmethod + def _status_message(status: Any) -> str: + message = str(getattr(status, "message", None) or getattr(status, "reason", None) or "") + changes = getattr(status, "pending_changes", None) + files = getattr(status, "pending_files", ()) + if changes: + pending = "pending " + str(changes) + if isinstance(files, (list, tuple)) and files: + sample = ", ".join(str(item) for item in files[:3]) + pending += " (" + sample + ("..." if len(files) > 3 else "") + ")" + message = (message + "; " if message else "") + pending + return message + + def language_breakdown(self) -> str: + try: + status = self.raw_status(max_pending_files=0) + except Exception: + return "" + if status is None or str(getattr(status, "status", "error")) not in {"ready", "stale"}: + return "" + rows = [] + for item in getattr(status, "language_breakdown", ()) or (): + language = item.get("language") if isinstance(item, dict) else getattr(item, "language", None) + files = item.get("files") if isinstance(item, dict) else getattr(item, "files", None) + percent = item.get("percent") if isinstance(item, dict) else getattr(item, "percent", None) + if language and files is not None and percent is not None: + try: + rows.append(f"{language} {files} files ({float(percent):.1f}%)") + except (TypeError, ValueError): + rows.append(f"{language} {files} files") + if rows: + return ", ".join(rows) + languages = getattr(status, "languages", ()) or () + if isinstance(languages, str): + languages = (languages,) + return ", ".join(str(language) for language in languages if language) + + def available(self) -> bool: + status, message = self.status() + self.session.state.code_index_error = message if status == "error" else "" + return status in {"ready", "stale"} + + def set_notice(self, event: str, *, done: int = 0, total: int = 0, seconds: int = 30) -> None: + phase = {"scan": "scan", "start": "parse", "file": "parse", "finish": "done"}.get(event, event) + suffix = (" " + str(done) + "/" + str(total)) if total > 0 else "" + self.session.state.status_notice = "index:" + phase + suffix + self.session.state.status_notice_until = time.monotonic() + seconds + self.session.state.code_index_refreshing = phase not in {"done", "error"} + + def progress(self) -> Callable[..., None]: + def update(event: str, *, done: int = 0, total: int = 0, **_kwargs: object) -> None: + self.set_notice(event, done=done, total=total) + + return update + + def refresh_existing_async(self, progress: Callable[..., None] | None = None) -> bool: + status, _message = self.status() + module = self.module() + if status not in {"ready", "stale"} or module is None: + return False + session = self.session session.code_index_repository = None - shutil.rmtree(os.path.dirname(_code_index_db_path(session)), ignore_errors=True) - try: - repository = _code_index_repository(session, create_index=True) - repository.refresh(progress=_code_index_progress(session)) - session.code_index_repository = repository + session.state.code_index_error = "" + session.state.code_index_refreshing = True session.state.code_index_reload_needed = False - except Exception as error: - session.code_index_repository = None - session.state.code_index_error = str(error) - return "code_index: error\n" + str(error) - session.state.code_index_error = "" - _set_code_index_notice(session, "done", seconds=2) - status, message = _code_index_status(session) - action = "rebuilt" if force else ("initialized" if before == "missing" else "synced") - lines = ["code_index: " + action, "status: " + status, "path: " + _code_index_db_path(session)] - if message: - lines.append("note: " + message) - return "\n".join(lines) + callback = progress or self.progress() + def refresh_progress(event: str, *, done: int = 0, total: int = 0, **kwargs: object) -> None: + callback(event, done=done, total=total, **kwargs) + if {"finish": "done", "done": "done"}.get(event, event) == "done": + session.state.code_index_reload_needed = True -CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT = 20 + try: + module.refresh_async(session.cwd, db_path=self.db_path(), progress=refresh_progress) + except Exception as error: + session.state.code_index_refreshing = False + session.state.code_index_reload_needed = False + session.state.code_index_error = str(error) + return True + def reload_if_ready(self) -> None: + session = self.session + if not session.state.code_index_reload_needed or session.state.code_index_refreshing: + return + try: + self.repository() + session.state.code_index_error = "" + except Exception as error: + session.code_index_repository = None + session.state.code_index_error = str(error) + session.state.code_index_reload_needed = False -def _code_index_update_pending(session: Session, *, limit: int = CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT) -> None: - module = _code_index_module() - if module is None or session.state.code_index_refreshing: - return - try: - status = module.status(session.cwd, db_path=_code_index_db_path(session), check=True, max_pending_files=limit + 1, format="object") - except Exception as error: - session.state.code_index_error = str(error) - return - if str(getattr(status, "status", "")) != "stale": - return - pending_changes = getattr(status, "pending_changes", None) - files = [str(path) for path in getattr(status, "pending_files", ()) if path] - if not files or len(files) > limit or (isinstance(pending_changes, int) and pending_changes > limit): - return - paths = list(dict.fromkeys(path for path in (session.resolve_path(path) for path in files) if session.is_path_in_cwd(path))) - if not paths: - return - try: - _code_index_repository(session).update(paths) + def sync(self, *, force: bool = False) -> str: + session = self.session + before, _message = self.status() + if force: + if self.module() is None: + return "code_index: error\ncode index is unavailable" + session.code_index_repository = None + shutil.rmtree(os.path.dirname(self.db_path()), ignore_errors=True) + try: + repository = self.repository(create_index=True) + repository.refresh(progress=self.progress()) + session.code_index_repository = repository + session.state.code_index_reload_needed = False + except Exception as error: + session.code_index_repository = None + session.state.code_index_error = str(error) + return "code_index: error\n" + str(error) session.state.code_index_error = "" - except Exception as error: - session.state.code_index_error = str(error) + self.set_notice("done", seconds=2) + status, message = self.status() + action = "rebuilt" if force else ("initialized" if before == "missing" else "synced") + lines = ["code_index: " + action, "status: " + status, "path: " + self.db_path()] + if message: + lines.append("note: " + message) + return "\n".join(lines) + + def update_pending(self, *, limit: int = CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT) -> None: + if self.module() is None or self.session.state.code_index_refreshing: + return + try: + status = self.raw_status(check=True, max_pending_files=limit + 1) + except Exception as error: + self.session.state.code_index_error = str(error) + return + if status is None or str(getattr(status, "status", "")) != "stale": + return + pending_changes = getattr(status, "pending_changes", None) + files = [str(path) for path in getattr(status, "pending_files", ()) if path] + if not files or len(files) > limit or (isinstance(pending_changes, int) and pending_changes > limit): + return + paths = list(dict.fromkeys(path for path in (self.session.resolve_path(path) for path in files) if self.session.is_path_in_cwd(path))) + if not paths: + return + try: + self.repository().update(paths) + self.session.state.code_index_error = "" + except Exception as error: + self.session.state.code_index_error = str(error) @dataclass @@ -2997,7 +2998,7 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: if re.search(r"\s", symbol): raise ToolCallArgError("outline symbol filter must be one symbol name or prefix") options["symbol"] = symbol - if not _code_index_available(session): + if not CodeIndex(session).available(): raise ToolCallError("code index is not available") return cls( mode=mode, @@ -3034,7 +3035,7 @@ def preview(self) -> str: def call(self) -> str: if self.session is None: raise ToolCallError("missing session") - repo = _code_index_repository(self.session) + repo = CodeIndex(self.session).repository() if self.mode == "find": text = repo.search_text( self.target, @@ -5713,8 +5714,9 @@ def _format_environment(self) -> str: shell_tools = [name for name in ("find", "rg", "python3", "perl", "sed", "awk", "xargs", "grep", "jq") if shutil.which(name)] if shell_tools: lines.append("- detected-available-shell-commands: " + ", ".join(shell_tools)) - if _code_index_available(self.session): - language_breakdown = _code_index_language_breakdown(self.session) + code_index = CodeIndex(self.session) + if code_index.available(): + language_breakdown = code_index.language_breakdown() if language_breakdown: lines.append("- indexed-language-breakdown: " + language_breakdown) lines.append( @@ -6022,7 +6024,7 @@ def _tool_schemas(self) -> list[Json]: else: action_names = self.ACT_ACTION_TYPES - {"tool", "forget"} tool_classes = tuple(TOOL_REGISTRY.values()) - if not _code_index_available(self.session): + if not CodeIndex(self.session).available(): tool_classes = tuple(tool for tool in tool_classes if tool is not InspectCodeTool) actions = [_state_tool_schema(name) for name in STATE_TOOL_PARAMS if name in action_names] return actions + [tool.tool_schema() for tool in tool_classes] @@ -7375,7 +7377,7 @@ def _status(self, args: str) -> str: else " (empty)" ) checks_status = blackboard.checks.status - code_index_status, code_index_message = _code_index_status(session, check=True) + code_index_status, code_index_message = CodeIndex(session).status(check=True) if session.state.code_index_error: code_index_status = "error" code_index_message = session.state.code_index_error @@ -7441,7 +7443,7 @@ def _index(self, args: str) -> str: value = args.strip() if value not in {"", "force"}: return "Usage: /index [force]" - return self._with_status(lambda: _code_index_sync(self.agent.session, force=value == "force")) + return self._with_status(lambda: CodeIndex(self.agent.session).sync(force=value == "force")) def _context(self, args: str) -> str: value = args.strip() @@ -7832,7 +7834,7 @@ def run(self) -> int: select_provider=self._select_provider, ) while True: - _code_index_reload_if_ready(self.agent.session) + CodeIndex(self.agent.session).reload_if_ready() if self._exit_after_current_turn: return 0 try: @@ -7850,7 +7852,7 @@ def run(self) -> int: continue if not user_input: continue - _code_index_reload_if_ready(self.agent.session) + CodeIndex(self.agent.session).reload_if_ready() try: result = dispatcher.dispatch(user_input) except Exception as error: @@ -7871,10 +7873,12 @@ def _prompt(self) -> str: return "[" + ",".join(labels) + "] > " if labels else "> " def _start_existing_code_index_refresh(self) -> None: + code_index = CodeIndex(self.agent.session) + def progress(event: str, *, done: int = 0, total: int = 0, **_kwargs: object) -> None: - _set_code_index_notice(self.agent.session, event, done=done, total=total) + code_index.set_notice(event, done=done, total=total) - _code_index_refresh_existing_async(self.agent.session, progress=progress) + code_index.refresh_existing_async(progress=progress) def _read_input(self, prompt: str) -> str: if self.prompt_session is None: @@ -8428,7 +8432,7 @@ def _run_agent(self, user_input: str) -> None: self.agent.session.state.manual_model_retry_requested = False if runtime_ui_running: self._stop_runtime_ui() - _code_index_update_pending(self.agent.session) + CodeIndex(self.agent.session).update_pending() self.status_bar.pause() def _run_with_status(self, action: StatusAction) -> str: @@ -8540,7 +8544,7 @@ def _emit(self, message: str) -> None: self._with_status_paused(lambda: self._print_message(message)) def _print_welcome(self) -> None: - index_status, _index_message = _code_index_status(self.agent.session) + index_status, _index_message = CodeIndex(self.agent.session).status() index_tip = ( [("ansibrightblack", " tip: "), ("ansicyan", "/index"), ("ansiwhite", " initializes indexed code tools\n")] if index_status == "missing" else [] ) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index e6d48ca..35068c5 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -803,7 +803,7 @@ def test_act_prompt_uses_first_todo_as_current_focus(tmp_path): def test_inspect_code_tools_is_hidden_until_available(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_available", lambda session: False) + monkeypatch.setattr(nanocode.CodeIndex, "available", lambda self: False) agent = Agent(Session(cwd=str(tmp_path))) tool_names = [schema["function"]["name"] for schema in agent._tool_schemas() if schema.get("type") == "function"] @@ -812,7 +812,7 @@ def test_inspect_code_tools_is_hidden_until_available(tmp_path, monkeypatch): def test_inspect_code_tools_is_visible_when_available(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_available", lambda session: True) + monkeypatch.setattr(nanocode.CodeIndex, "available", lambda self: True) agent = Agent(Session(cwd=str(tmp_path))) tool_names = [schema["function"]["name"] for schema in agent._tool_schemas() if schema.get("type") == "function"] @@ -940,7 +940,7 @@ def status_fn(root, *, db_path=None, check=False, max_pending_files=50, format=" ), ) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: SimpleNamespace(status=status_fn)) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: SimpleNamespace(status=status_fn))) agent = Agent(Session(cwd=str(tmp_path))) prompt = agent.build_user_prompt() diff --git a/tests/test_nanocode_code_index_tools.py b/tests/test_nanocode_code_index_tools.py index e8c89f0..fbbe222 100644 --- a/tests/test_nanocode_code_index_tools.py +++ b/tests/test_nanocode_code_index_tools.py @@ -81,7 +81,7 @@ def reset_fake_repository(): def test_inspect_code_requires_code_index(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_module", lambda: None) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: None)) with pytest.raises(ToolCallError, match="code index is not available"): InspectCodeTool.make(Session(cwd=str(tmp_path)), ["inspect", "Tool"]) @@ -94,7 +94,7 @@ def test_code_index_schema_accepts_expected_args(): def test_inspect_code_rejects_natural_language(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) with pytest.raises(ToolCallArgError, match="do not pass natural language"): InspectCodeTool.make(Session(cwd=str(tmp_path)), ["inspect", "Tool class callers"]) @@ -103,7 +103,7 @@ def test_inspect_code_rejects_natural_language(tmp_path, monkeypatch): def test_inspect_code_rejects_invalid_mode_and_options(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) with pytest.raises(ToolCallArgError, match="mode must be find, inspect, or outline"): InspectCodeTool.make(Session(cwd=str(tmp_path)), ["search", "Tool"]) @@ -113,7 +113,7 @@ def test_inspect_code_rejects_invalid_mode_and_options(tmp_path, monkeypatch): def test_code_index_missing_is_not_initialized_implicitly(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("missing")) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module("missing"))) with pytest.raises(ToolCallError, match="code index is not available"): InspectCodeTool.make(session, ["find", "Tool"]) @@ -127,17 +127,17 @@ def test_code_index_status_formats_checked_pending_files(tmp_path, monkeypatch): def status_fn(root, *, db_path=None, check=False, max_pending_files=50, format="object"): return SimpleNamespace(status="stale", reason="", message="", pending_changes=5, pending_files=("a.py", "b.py", "c.py", "d.py")) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: SimpleNamespace(status=status_fn)) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: SimpleNamespace(status=status_fn))) - assert nanocode._code_index_status(session, check=True) == ("stale", "pending 5 (a.py, b.py, c.py...)") + assert nanocode.CodeIndex(session).status(check=True) == ("stale", "pending 5 (a.py, b.py, c.py...)") def test_code_index_sync_initializes_missing_index_in_project_data(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) module = fake_code_index_module("missing", refresh_status="ready") - monkeypatch.setattr(nanocode, "_code_index_module", lambda: module) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: module)) - result = nanocode._code_index_sync(session) + result = nanocode.CodeIndex(session).sync() db_path = str(tmp_path / "data" / "projects" / session.project_key() / "code-symbol-index" / "index.sqlite") assert ("repo", str(tmp_path), db_path, True) in FakeRepository.events @@ -149,32 +149,34 @@ def test_code_index_sync_initializes_missing_index_in_project_data(tmp_path, mon def test_code_index_force_rebuild_removes_project_index_dir(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) module = fake_code_index_module("ready") - monkeypatch.setattr(nanocode, "_code_index_module", lambda: module) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: module)) index_dir = tmp_path / "data" / "projects" / session.project_key() / "code-symbol-index" index_dir.mkdir(parents=True) (index_dir / "old.sqlite").write_text("old", encoding="utf-8") - result = nanocode._code_index_sync(session, force=True) + code_index = nanocode.CodeIndex(session) + result = code_index.sync(force=True) assert not (index_dir / "old.sqlite").exists() - assert ("repo", str(tmp_path), nanocode._code_index_db_path(session), True) in FakeRepository.events - assert ("refresh", str(tmp_path), nanocode._code_index_db_path(session), True) in FakeRepository.events - assert result == "code_index: rebuilt\nstatus: ready\npath: " + nanocode._code_index_db_path(session) + assert ("repo", str(tmp_path), code_index.db_path(), True) in FakeRepository.events + assert ("refresh", str(tmp_path), code_index.db_path(), True) in FakeRepository.events + assert result == "code_index: rebuilt\nstatus: ready\npath: " + code_index.db_path() def test_code_index_refresh_existing_async_starts_for_ready_index(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("ready")) + code_index = nanocode.CodeIndex(session) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module("ready"))) - assert nanocode._code_index_refresh_existing_async(session) is True + assert code_index.refresh_existing_async() is True - assert ("refresh_async", str(tmp_path), nanocode._code_index_db_path(session), True, {}) in FakeRepository.events + assert ("refresh_async", str(tmp_path), code_index.db_path(), True, {}) in FakeRepository.events assert session.code_index_repository is None assert session.state.status_notice == "index:done 1/1" assert session.state.code_index_refreshing is False assert session.state.code_index_reload_needed is True - nanocode._code_index_reload_if_ready(session) + code_index.reload_if_ready() assert isinstance(session.code_index_repository, FakeRepository) assert session.state.code_index_reload_needed is False @@ -182,26 +184,27 @@ def test_code_index_refresh_existing_async_starts_for_ready_index(tmp_path, monk def test_code_index_update_pending_updates_small_stale_file_set(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("stale", pending_files=("a.py", "pkg/b.py"))) + code_index = nanocode.CodeIndex(session) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module("stale", pending_files=("a.py", "pkg/b.py")))) - nanocode._code_index_update_pending(session, limit=3) + code_index.update_pending(limit=3) - assert ("status", str(tmp_path), nanocode._code_index_db_path(session), True, 4, "object") in FakeRepository.events - assert ("update", (str(tmp_path / "a.py"), str(tmp_path / "pkg" / "b.py")), str(tmp_path), nanocode._code_index_db_path(session), False) in FakeRepository.events + assert ("status", str(tmp_path), code_index.db_path(), True, 4, "object") in FakeRepository.events + assert ("update", (str(tmp_path / "a.py"), str(tmp_path / "pkg" / "b.py")), str(tmp_path), code_index.db_path(), False) in FakeRepository.events def test_code_index_update_pending_skips_large_stale_file_set(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module("stale", pending_changes=4, pending_files=("a.py", "b.py", "c.py"))) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module("stale", pending_changes=4, pending_files=("a.py", "b.py", "c.py")))) - nanocode._code_index_update_pending(session, limit=3) + nanocode.CodeIndex(session).update_pending(limit=3) assert not [event for event in FakeRepository.events if event[0] == "update"] def test_inspect_code_find_uses_search_text(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path), config=nanocode.Config(data_dir=str(tmp_path / "data"))) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) result = InspectCodeTool.make(session, ["find", "Tool", {"limit": 12, "kind": "class", "path": "nanocode.py", "exact_only": True}]).call() @@ -211,7 +214,7 @@ def test_inspect_code_find_uses_search_text(tmp_path, monkeypatch): def test_inspect_code_find_clamps_limit(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) assert InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool", {"limit": 999}]).limit == 80 assert InspectCodeTool.make(Session(cwd=str(tmp_path)), ["find", "Tool", {"limit": 0}]).limit == 1 with pytest.raises(ToolCallArgError, match="limit must be an integer"): @@ -219,7 +222,7 @@ def test_inspect_code_find_clamps_limit(tmp_path, monkeypatch): def test_inspect_code_symbol_rejects_files_directories_and_dotted_module_paths(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) (tmp_path / "orion" / "biz" / "handlers" / "syftpp").mkdir(parents=True) (tmp_path / "code.py").write_text("class Tool:\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) @@ -234,17 +237,19 @@ def test_inspect_code_symbol_rejects_files_directories_and_dotted_module_paths(t def test_inspect_code_inspect_uses_inspect_text(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + code_index = nanocode.CodeIndex(session) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) result = InspectCodeTool.make(session, ["inspect", "Tool", {"path": "nanocode.py", "exact_only": True}]).call() - assert ("inspect_text", "Tool", None, "nanocode.py", True, True, str(tmp_path), nanocode._code_index_db_path(session)) in FakeRepository.events + assert ("inspect_text", "Tool", None, "nanocode.py", True, True, str(tmp_path), code_index.db_path()) in FakeRepository.events assert result == "\nmode: inspect\nsymbol:\n name: Tool\nsource:\n status: full\n" def test_agent_tool_call_preserves_code_index_options_object(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + code_index = nanocode.CodeIndex(session) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) Agent(session).execute_tool_calls( [ @@ -256,23 +261,24 @@ def test_agent_tool_call_preserves_code_index_options_object(tmp_path, monkeypat ] ) - assert ("inspect_text", "Tool", None, "nanocode.py", True, True, str(tmp_path), nanocode._code_index_db_path(session)) in FakeRepository.events + assert ("inspect_text", "Tool", None, "nanocode.py", True, True, str(tmp_path), code_index.db_path()) in FakeRepository.events def test_inspect_code_outline_uses_outline_text(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) filepath = tmp_path / "code.py" filepath.write_text("class Tool:\n pass\n", encoding="utf-8") - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + code_index = nanocode.CodeIndex(session) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) result = InspectCodeTool.make(session, ["outline", "code.py", {"symbol": "Tool"}]).call() - assert ("outline_text", str(filepath), "Tool", str(tmp_path), nanocode._code_index_db_path(session)) in FakeRepository.events + assert ("outline_text", str(filepath), "Tool", str(tmp_path), code_index.db_path()) in FakeRepository.events assert result == "\nmode: outline\nfile: " + str(filepath) + "\noutline:\n class Tool 0:2 class Tool:\n" def test_outline_code_file_rejects_directories_and_symbols(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_module", lambda: fake_code_index_module()) + monkeypatch.setattr(nanocode.CodeIndex, "module", staticmethod(lambda: fake_code_index_module())) (tmp_path / "pkg").mkdir() session = Session(cwd=str(tmp_path)) diff --git a/tests/test_nanocode_commands.py b/tests/test_nanocode_commands.py index e61322f..b43c94c 100644 --- a/tests/test_nanocode_commands.py +++ b/tests/test_nanocode_commands.py @@ -89,7 +89,7 @@ def test_command_dispatcher_updates_config_and_auto_compacts(tmp_path): def test_status_reports_tokens_in_human_readable_format(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_status", lambda session, *, check=False: ("unavailable", "")) + monkeypatch.setattr(nanocode.CodeIndex, "status", lambda self, *, check=False: ("unavailable", "")) session = make_session(tmp_path, model="model") session.state.last_total_tokens = 1200 session.state.last_cached_prompt_tokens = 400 @@ -118,7 +118,7 @@ def test_status_reports_tokens_in_human_readable_format(tmp_path, monkeypatch): def test_index_command_syncs_code_index(tmp_path, monkeypatch): calls = [] - monkeypatch.setattr(nanocode, "_code_index_sync", lambda session, *, force=False: calls.append(force) or "code_index: synced") + monkeypatch.setattr(nanocode.CodeIndex, "sync", lambda self, *, force=False: calls.append(force) or "code_index: synced") dispatcher = CommandDispatcher(Agent(make_session(tmp_path))) result = dispatcher.dispatch("/index") diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py index 084ed1a..7cb010f 100644 --- a/tests/test_nanocode_loop.py +++ b/tests/test_nanocode_loop.py @@ -544,7 +544,7 @@ def run(self, user_input, *, confirm=None, on_auto_approve=None, on_message=None def test_agent_loop_welcome_suggests_index_when_missing(tmp_path, monkeypatch): - monkeypatch.setattr(nanocode, "_code_index_status", lambda session: ("missing", "")) + monkeypatch.setattr(nanocode.CodeIndex, "status", lambda self: ("missing", "")) class FakeAgent: def __init__(self): @@ -559,7 +559,7 @@ def __init__(self): def test_agent_loop_starts_existing_index_refresh_async(tmp_path, monkeypatch): refreshed = [] - def refresh_existing(session, *, progress=None): + def refresh_existing(self, *, progress=None): refreshed.append(progress is not None) if progress is not None: progress("file", done=1, total=2) @@ -570,7 +570,7 @@ def __init__(self): self.session = make_session(tmp_path, model="model") self.blackboard = Blackboard() - monkeypatch.setattr(nanocode, "_code_index_refresh_existing_async", refresh_existing) + monkeypatch.setattr(nanocode.CodeIndex, "refresh_existing_async", refresh_existing) outputs = [] loop = AgentLoop(FakeAgent(), input_fn=lambda prompt: "/exit", output_fn=outputs.append) @@ -619,7 +619,7 @@ def run(self, user_input, **kwargs): monkeypatch.setattr(loop.status_bar, "reset_timer", lambda: calls.append("reset")) monkeypatch.setattr(loop.status_bar, "resume", lambda: calls.append("resume")) monkeypatch.setattr(loop.status_bar, "pause", lambda: calls.append("pause")) - monkeypatch.setattr(nanocode, "_code_index_update_pending", lambda session: calls.append("index")) + monkeypatch.setattr(nanocode.CodeIndex, "update_pending", lambda self: calls.append("index")) loop._run_agent("hello") From 46fc3dcf6565df466a56dabbe4a58414a3388d1f Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:06:26 -0700 Subject: [PATCH 11/29] Clarify observe context tool restrictions --- nanocode.py | 11 +++++++++-- tests/test_nanocode_agent.py | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/nanocode.py b/nanocode.py index 180433d..9c8156c 100644 --- a/nanocode.py +++ b/nanocode.py @@ -4016,6 +4016,9 @@ def _state_tool_schema(name: str) -> Json: --- Output Guide --- Use function tools only. +OBSERVE may only use context tools: Keep, Forget, Recall. +Never use work tools during OBSERVE: Search, Read, Edit, Bash, InspectCode, CreateFile, List, LineCount, Git. +If more investigation is needed, first finish OBSERVE with Keep/Forget/Recall; ACT will continue after context reduction. Keep raw results needed for the next step; forget noise. Preserve important conclusions with SOURCE-backed Facts or Leads. @@ -4027,6 +4030,9 @@ def _state_tool_schema(name: str) -> Json: Use function tools only. No prose. Reduce raw tool results before ACT continues. +Allowed tools in OBSERVE: Keep, Forget, Recall. +Forbidden tools in OBSERVE: Search, Read, Edit, Bash, InspectCode, CreateFile, List, LineCount, Git. +If more work is needed, finish OBSERVE first; ACT will continue after context reduction. Keep only what affects the next step. Forget noise; omitted results are compacted. Preserve durable conclusions as source-backed Facts or Leads. @@ -6767,11 +6773,12 @@ def _handle_observe_response( return gate_result non_context_tool_error = self._non_context_tool_error(ctx.tool_calls) if non_context_tool_error: + detail = non_context_tool_error + " is not available in OBSERVE; use Keep, Forget, or Recall to reduce current results first. ACT may use work tools after OBSERVE completes." return self._reject_result( self._remember_observe_error, on_message, - self._error("observe only accepts context tools: " + non_context_tool_error + ".", "use Keep, Forget, or Recall while observing."), - "Retrying: observe latest results with context tools only.", + self._error(detail, "OBSERVE only accepts context tools: Keep, Forget, Recall."), + "Retrying: OBSERVE only accepts Keep, Forget, or Recall.", "Protocol_Gate: invalid observe tool(s): " + non_context_tool_error + ".", ) context_actions = ctx.actions + self._context_actions_from_tool_calls(ctx.tool_calls) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 35068c5..8b72722 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -1254,6 +1254,24 @@ def test_observe_rejects_invalid_action_and_allows_empty_actions(tmp_path): assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] +def test_observe_rejects_search_with_context_tool_guidance(tmp_path): + agent = Agent(_session(tmp_path, debug=True)) + agent.mode = nanocode.AgentMode.OBSERVE + agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] + messages = [] + + result = agent.handle_response( + {"actions": [{"type": "tool", "name": "Search", "intention": "keep investigating", "args": _search_args("needle")}]}, + on_message=messages.append, + ) + + assert result.done is False + assert agent.mode == nanocode.AgentMode.OBSERVE + assert messages == ["Protocol_Gate: invalid observe tool(s): Search."] + assert any("Search is not available in OBSERVE" in error for error in agent.observe_feedback_errors) + assert any("Keep, Forget, or Recall" in error for error in agent.observe_feedback_errors) + + def test_observe_compacts_unmentioned_result_keys_by_default(tmp_path): agent = Agent(_session(tmp_path, debug=True)) agent.mode = nanocode.AgentMode.OBSERVE From 536ed67157b2cc3e820c4c38887a4960a08e9ddb Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:25:04 -0700 Subject: [PATCH 12/29] Bump version to 0.4.10 --- nanocode.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nanocode.py b/nanocode.py index 9c8156c..a2ba395 100644 --- a/nanocode.py +++ b/nanocode.py @@ -56,7 +56,7 @@ from prompt_toolkit.patch_stdout import patch_stdout from prompt_toolkit.styles import Style -__version__ = "0.4.9" +__version__ = "0.4.10" JsonValue: TypeAlias = Any diff --git a/pyproject.toml b/pyproject.toml index 13a7483..d2d0de8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "nanocode-cli" -version = "0.4.9" +version = "0.4.10" description = "A lightweight terminal-based AI coding assistant" readme = "README.md" requires-python = ">=3.11" From 2fcdd2433deb7d05891e5399f101309e3c9b2860 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:32:55 -0700 Subject: [PATCH 13/29] Map named tool call fields and refine agent prompt --- nanocode.py | 97 ++++++++++++++++++++++++++---------- tests/test_nanocode_agent.py | 24 +++++++++ 2 files changed, 94 insertions(+), 27 deletions(-) diff --git a/nanocode.py b/nanocode.py index a2ba395..19e8512 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1216,6 +1216,19 @@ def cli_token(value: JsonValue) -> str: def signatures(cls) -> tuple[str, ...]: return cls.SIGNATURES or ((cls.SIGNATURE,) if cls.SIGNATURE else ()) + @classmethod + def param_names(cls) -> list[str]: + if cls.PARAM_NAMES: + return list(cls.PARAM_NAMES) + signatures = cls.signatures() + if len(signatures) != 1: + return [] + match = re.search(r"\(([^)]*)\)", signatures[0]) + value = match.group(1) if match else "" + if not value or any(token in value for token in "[]*{}") or "..." in value: + return [] + return [part.strip().split("=", 1)[0].strip() for part in value.split(",") if part.strip()] + @classmethod def schema_description(cls) -> str: return " ".join((*cls.DESCRIPTION, *cls.signatures(), *cls.EXAMPLE)) @@ -3890,34 +3903,65 @@ def _state_tool_schema(name: str) -> Json: # - Keep section names stable; change prompt shape only when the workflow meaning changes. AGENT_SYSTEM_PROMPT = """You are nanocode, a terminal coding agent. -Use assistant text for chat/final answers; use function tools for state/repo work. -Use tool schemas for exact names, capabilities, and arguments. -WHEN THE NEXT USEFUL ACTION IS CLEAR, TAKE IT NOW. +Use assistant text for chat/final answers. Use function tools for state, repo, files, shell, edits, and checks. +Follow tool schemas exactly. When the next useful action is clear, do it now. Priority: latest user request > blocking feedback > user rules > active state > conversation. -Never repeat an old completion. Do not rewrite Goal unless the user changed the task. - -Workflow: -- Chat: answer directly; do not create task state. -- One-shot: use only needed tools, then answer and stop; do not create task state just to report. -- Tracked task: for edits/debugging/checks/multi-step work, set Goal, keep the shortest necessary correct Plan, act on the current step, record Checks after edits or requested checks, finish with goal.complete=true. -Current step: -- Choose the smallest useful action from latest request, feedback, visible results, and Plan. -- Batch clear tool calls in one response. -- Tool calls run in order. If one fails, later tool calls are skipped. -- Use ordered tools for edit-then-check when the check is clear. -- Ask only when blocked. -- Do not stop at state-only updates when a useful tool call is clear. +Core rules: +- Do not repeat an old completion. +- Do not rewrite Goal unless the user changed the task. +- Ask only when blocked by missing intent, missing permission, or destructive risk. +- Prefer small, local, reversible changes. +- Do not invent code structure; inspect before editing. +- Do not overwrite unrelated user changes. +- Do not stop with state-only updates if a useful tool call is available. + +Modes: +- Chat: answer directly; no task state. +- Inspect: read/search only; answer with findings. +- One-shot: use only needed tools; answer and stop. +- Tracked task: for edits, debugging, checks, or multi-step work, maintain Goal, Plan, Facts, Leads, and Checks. State: -- Goal/Plan track work. Plan is the minimal correct path to Goal, not a loose TODO list; update it when Facts change the path. -- Facts are confirmed. Leads are for investigations. Checks are checks. User Rules are future-behavior requests. -- Save only what matters after results disappear; cite tr.N when result-backed; forget raw results when no longer needed. +- Goal stays stable until complete or user changes it. +- Plan is the shortest correct path to Goal, not a loose TODO list. +- Update Plan only when new Facts change the path. +- Facts are confirmed. Leads are unconfirmed. Checks are verification records. User Rules are future behavior. +- Save only what must survive disappearing tool results. Cite tr.N when result-backed. Forget stale raw results. + +Coding workflow: +- Before editing, identify the target file, relevant symbols, expected behavior, and evidence. +- Read only the smallest useful code region, but enough surrounding context to avoid wrong edits. +- Prefer existing project style, APIs, naming, error handling, tests, and workflows. +- Change only files needed for the Goal. +- Avoid broad refactors unless explicitly requested or necessary for correctness. +- If multiple fixes are possible, choose the smallest correct one. +- If editing generated, vendored, lock, or migration files, verify they are meant to be edited. +- After edits, inspect the diff or changed region before claiming success. + +Tool use: +- Batch independent read/search calls. +- Use ordered calls for clear edit-then-check flows. +- If a tool fails, diagnose the failure before retrying. +- Do not repeatedly run the same failing command without a new hypothesis or change. +- Prefer targeted checks first; run broader checks only when useful or requested. +- For long or expensive checks, run the narrowest command that can verify the change. + +Verification: +- A tracked task is not complete until the Goal is satisfied and required Checks are recorded. +- For code edits, verify by tests, typecheck, lint, build, or direct inspection when commands are unavailable. +- If verification cannot be run, state exactly why and what was verified instead. +- Record failed checks and use them to adjust the Plan. + +Finish: +- Set goal.complete=true only after the Goal is satisfied. +- Final answer should include what changed, how it was verified, and any remaining risk. Response: -- Reply in the LANGUAGE of the latest user input unless asked otherwise. Keep output plain and concise. Preserve literals. -- Default Response Format: Text (Not markdown) +- Reply in the language of the latest user input unless asked otherwise. +- Keep output plain, concise, and literal-preserving. +- Plain text by default. """ AGENT_USER_PROMPT_TEMPLATE = """ @@ -4591,7 +4635,10 @@ def _action_from_function_call(self, name: str, arguments: str) -> Json: } args = _json_dict(value) if name in TOOL_REGISTRY: - return {"type": "tool", "name": name, "intention": _json_str(args.get("intention")) or "", "args": _json_list(args.get("args"))} + call_args = _json_list(args.get("args")) + if "args" not in args: + call_args = [args[param] for param in TOOL_REGISTRY[name].param_names() if param in args] + return {"type": "tool", "name": name, "intention": _json_str(args.get("intention")) or "", "args": call_args} action = {"type": name} action.update(args) return action @@ -6299,11 +6346,7 @@ def _format_tool_arg_error(self, execution: ToolCallExecution) -> str: tool_class = TOOL_REGISTRY.get(call.name) if tool_class is None: return execution.output - match = re.search(r"\(([^)]*)\)", tool_class.SIGNATURE) - value = match.group(1) if match else "" - params = list(tool_class.PARAM_NAMES) - if not params and value and not any(token in value for token in "[]*") and "..." not in value: - params = [part.strip().split("=", 1)[0].strip() for part in value.split(",") if part.strip()] + params = tool_class.param_names() if not params or len(call.args) == len(params): return execution.output detail = "got " + str(len(call.args)) + " args, expected " + str(len(params)) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 8b72722..ecde1ee 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -1979,6 +1979,30 @@ def __init__(self, **_kwargs): assert session.state.last_total_tokens == 5 +def test_agent_request_chat_stream_maps_named_tool_arguments(tmp_path, monkeypatch): + class FakeCompletions: + def create(self, **_kwargs): + return iter( + [ + _stream_chunk({"tool_calls": [{"index": 0, "function": {"name": "Bash", "arguments": '{"intention":"check diff",'}}]}), + _stream_chunk({"tool_calls": [{"index": 0, "function": {"arguments": '"command":"git diff -- nanocode.py"}'}}]}), + ] + ) + + class FakeOpenAI: + def __init__(self, **_kwargs): + self.chat = type("FakeChat", (), {"completions": FakeCompletions()})() + + monkeypatch.setattr(nanocode, "OpenAI", FakeOpenAI) + session = _session(tmp_path, api_url="https://example.test/v1", api_key="key", model="model") + + response = Agent(session).request("system", "user", tool_schemas=[nanocode.BashTool.tool_schema()]) + + assert response == { + "actions": [{"type": "tool", "name": "Bash", "intention": "check diff", "args": ["git diff -- nanocode.py"]}], + } + + def test_agent_stream_step_preserves_same_response_tool_batch_in_latest(tmp_path, monkeypatch): (tmp_path / "one.txt").write_text("one\n", encoding="utf-8") (tmp_path / "two.txt").write_text("two\n", encoding="utf-8") From 3586637c387534006bcd4a4689b18d06a87059bb Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:35:46 -0700 Subject: [PATCH 14/29] Bump version from 0.4.10 to 0.4.11 --- CHANGELOG.md | 5 +++++ nanocode.py | 2 +- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa8e1af..4ae708e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog + +## 0.4.11 - 2026-05-24 + +### Changed +- Bumped version from 0.4.10 to 0.4.11. ## 0.4.8 - 2026-05-23 ### Changed diff --git a/nanocode.py b/nanocode.py index 19e8512..2707ed7 100644 --- a/nanocode.py +++ b/nanocode.py @@ -56,7 +56,7 @@ from prompt_toolkit.patch_stdout import patch_stdout from prompt_toolkit.styles import Style -__version__ = "0.4.10" +__version__ = "0.4.11" JsonValue: TypeAlias = Any diff --git a/pyproject.toml b/pyproject.toml index d2d0de8..a014369 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "nanocode-cli" -version = "0.4.10" +version = "0.4.11" description = "A lightweight terminal-based AI coding assistant" readme = "README.md" requires-python = ">=3.11" From b9123719385cf55d7bca7d852f51152013125fd6 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:43:56 -0700 Subject: [PATCH 15/29] refactor: use projected unreduced context chars for observe budgeting Replace raw_context_chars check in _should_observe_after_tools with _projected_unreduced_context_chars to avoid false positives when large Read blocks are replaced in later steps. --- nanocode.py | 19 ++++++++++++------- tests/test_nanocode_agent.py | 36 +++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/nanocode.py b/nanocode.py index 2707ed7..5907d44 100644 --- a/nanocode.py +++ b/nanocode.py @@ -6271,16 +6271,21 @@ def _should_observe_after_tools(self) -> bool: if not pending: return False budget = self.context_budget() - # Tool failures stay visible to ACT as Latest Tool Results plus feedback. - # Very large failures still trigger observe through raw-context pressure. return ( len(pending) >= budget.observe_after_results - or self.tool_context.raw_context_chars( - self.blackboard.memory_checkpoint_tool_result_counter, - exclude_keys=self.blackboard.referenced_result_keys(), - ) - >= budget.raw_chars + or self._projected_unreduced_context_chars(pending) >= budget.raw_chars + ) + + def _projected_unreduced_context_chars(self, blocks: list[str]) -> int: + budget = self.context_budget() + file_context = ToolResultContext.format_file_context( + blocks, + cwd=self.session.cwd, + max_chars=budget.raw_chars + budget.kept_chars, ) + tool_results = "\n\n".join(ToolResultContext.render_blocks_for_prompt(blocks)) + tool_index = "\n".join(ToolResultContext.compact_block(block) for block in blocks) + return len("\n\n".join(part for part in (file_context, tool_index, tool_results) if part)) def _unreferenced_unreduced_blocks(self) -> list[str]: return self.tool_context.unreduced_blocks( diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index ecde1ee..b1e9ee3 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -1380,20 +1380,38 @@ def test_observe_checkpoint_clears_observe_errors(tmp_path): assert agent.observe_feedback_errors == [] -def test_agent_tool_result_raw_budget_triggers_observe(tmp_path, monkeypatch): +def test_projected_read_context_budget_ignores_replaced_raw_read_blocks(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) agent = Agent(session) - _set_context_budget(monkeypatch, agent, raw_chars=180, observe_after_results=99) + _set_context_budget(monkeypatch, agent, raw_chars=1_000_000, observe_after_results=99) path = tmp_path / "sample.txt" - path.write_text("x" * 400 + "\n", encoding="utf-8") + path.write_text("x" * 2000 + "\n", encoding="utf-8") - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) + for _ in range(4): + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) + assert agent.mode == nanocode.AgentMode.ACT - assert agent.mode == nanocode.AgentMode.OBSERVE - assert agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter) >= agent.context_budget().raw_chars - observe_context = _observe_tool_result_context(agent) - assert "sample.txt" in observe_context - assert "x" * 50 in observe_context + pending = agent._unreferenced_unreduced_blocks() + raw_chars = agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter) + projected_chars = agent._projected_unreduced_context_chars(pending) + assert projected_chars < raw_chars + + _set_context_budget(monkeypatch, agent, raw_chars=(raw_chars + projected_chars) // 2, observe_after_results=99) + + assert raw_chars >= agent.context_budget().raw_chars + assert agent._projected_unreduced_context_chars(pending) < agent.context_budget().raw_chars + assert agent._should_observe_after_tools() is False + + +def test_projected_raw_output_budget_triggers_observe(tmp_path, monkeypatch): + agent = Agent(Session(cwd=str(tmp_path))) + _set_context_budget(monkeypatch, agent, raw_chars=180, observe_after_results=99) + agent.tool_context.latest = ['- ok tool=Bash args=["big"] key=tr.1\n output:\n' + ("x" * 400)] + + pending = agent._unreferenced_unreduced_blocks() + + assert agent._projected_unreduced_context_chars(pending) >= agent.context_budget().raw_chars + assert agent._should_observe_after_tools() is True def test_referenced_raw_context_does_not_force_observe(tmp_path, monkeypatch): From e3d42de90ca226ffc39eb3d2621326e651517c7c Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:55:12 -0700 Subject: [PATCH 16/29] Optimize task feedback and discovery context --- nanocode.py | 104 ++++++++++++++++++++++++++++++++++- tests/test_nanocode_agent.py | 104 ++++++++++++++++++++++++++++++++++- 2 files changed, 202 insertions(+), 6 deletions(-) diff --git a/nanocode.py b/nanocode.py index 5907d44..e88d6fc 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1345,6 +1345,7 @@ class FileContextItem: @dataclass class ToolResultContext: COMPACT_OUTPUT_SUMMARY_CHARS: ClassVar[int] = 120 + DISCOVERY_CONTEXT_BLOCK_CHARS: ClassVar[int] = 4_000 latest: list[str] = field(default_factory=list) recent: list[str] = field(default_factory=list) kept_results: list[str] = field(default_factory=list) @@ -1532,6 +1533,11 @@ def render_blocks_for_prompt(cls, blocks: list[str]) -> list[str]: @classmethod def render_block_for_prompt(cls, block: str) -> str: + if cls._is_discovery_result_block(block): + compact = cls.compact_block(block) + if "\n out: " in compact: + return compact + "; content=discovery_context" + return compact if not cls._is_file_context_result_block(block): return block compact = cls.compact_file_context_block(block) @@ -1623,6 +1629,33 @@ def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int rendered = "\n".join(lines).rstrip() return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered + @classmethod + def format_discovery_context(cls, blocks: list[str], *, max_chars: int) -> str: + lines = [ + "Source Policy:", + "- Built dynamically for this prompt from active Search and InspectCode results.", + "- Treat these as discovery leads, not current source truth.", + "- Use Read before editing exact code.", + "", + ] + seen: set[str] = set() + for block in blocks: + if not cls._is_discovery_result_block(block): + continue + key = cls.result_key(block) + if key and key in seen: + continue + seen.add(key) + header, output = block.split("\n output:\n", 1) + tool_name = cls._block_tool_name(header) + block_budget = min(cls.DISCOVERY_CONTEXT_BLOCK_CHARS, max_chars) if max_chars > 0 else cls.DISCOVERY_CONTEXT_BLOCK_CHARS + excerpt = _bound_tool_output(output, max_chars=block_budget).value + lines.extend(["Source: " + (key or "(unknown)") + " tool=" + tool_name, excerpt.strip(), ""]) + if len(lines) <= 5: + return "" + rendered = "\n".join(lines).rstrip() + return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered + @classmethod def _file_context_items(cls, blocks: list[str]) -> list[FileContextItem]: return [item for block in blocks for item in cls._file_context_block_items(block)] @@ -1750,6 +1783,21 @@ def _is_file_context_result_block(cls, block: str) -> bool: or (re.search(r"\btool=Edit\b", header) and "" in output and ("" in output or "" in output)) ) + @classmethod + def _is_discovery_result_block(cls, block: str) -> bool: + if not cls.is_full_block(block): + return False + header, output = block.split("\n output:\n", 1) + return bool( + (re.search(r"\btool=Search\b", header) and "" in output) + or (re.search(r"\btool=InspectCode\b", header) and "" in output) + ) + + @staticmethod + def _block_tool_name(header: str) -> str: + match = re.search(r"\btool=([A-Za-z][A-Za-z0-9_]*)\b", header) + return match.group(1) if match else "unknown" + @classmethod def recalled_result_blocks(cls, recall_block: str) -> list[str]: if not cls.is_full_block(recall_block): @@ -3988,6 +4036,9 @@ def _state_tool_schema(name: str) -> Json: Tool Result Index: {tool_result_index} +Discovery Context: +{discovery_context} + File Context: {file_context} @@ -4043,6 +4094,9 @@ def _state_tool_schema(name: str) -> Json: --- Tool Context --- +Discovery Context: +{discovery_context} + File Context: {file_context} @@ -5709,10 +5763,16 @@ def apply_context_budget(self) -> None: self.tool_context.prune_recent(max_index_items=budget.index_items, checkpoint=checkpoint) def build_user_prompt(self) -> str: + self._refresh_agent_feedback() tool_result_index, unreduced_tool_results, latest_tool_results = self._format_act_tool_result_context() budget = self.context_budget() + context_blocks = self._act_file_context_blocks() + discovery_context = ToolResultContext.format_discovery_context( + context_blocks, + max_chars=max(1, budget.raw_chars // 3), + ) file_context = ToolResultContext.format_file_context( - self._act_file_context_blocks(), + context_blocks, cwd=self.session.cwd, max_chars=budget.raw_chars + budget.kept_chars, ) @@ -5723,6 +5783,7 @@ def build_user_prompt(self) -> str: user_rules=self.session.state.user_rules.format(), kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", tool_result_index=tool_result_index or "(empty)", + discovery_context=discovery_context or "(empty)", file_context=file_context or "(empty)", unreduced_tool_results=unreduced_tool_results or "(empty)", latest_tool_results=latest_tool_results or "(empty)", @@ -5786,6 +5847,10 @@ def build_observe_prompt(self) -> str: cwd=self.session.cwd, max_chars=budget.raw_chars + budget.kept_chars, ) + discovery_context = ToolResultContext.format_discovery_context( + self.tool_context.kept_results + unreduced_blocks, + max_chars=max(1, budget.raw_chars // 3), + ) unreduced = "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced_blocks)) return AGENT_OBSERVE_USER_PROMPT_TEMPLATE.format( user_rules=self.session.state.user_rules.format(), @@ -5793,6 +5858,7 @@ def build_observe_prompt(self) -> str: plan="\n".join(item.format() for item in current.plan) if current.plan else "(empty)", leads="\n".join(item.format() for item in current.leads) if current.leads else "(empty)", known="\n".join(KnownItem.format_item(item) for item in current.known) if current.known else "(empty)", + discovery_context=discovery_context or "(empty)", file_context=file_context or "(empty)", kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", errors="\n".join("- " + error for error in self.observe_feedback_errors) or "(empty)", @@ -6008,6 +6074,31 @@ def _remember_agent_error(self, text: str) -> None: def _remember_observe_error(self, text: str) -> None: self._remember_feedback_error(self.observe_feedback_errors, text) + def _drop_agent_feedback(self, *markers: str) -> None: + lowered = tuple(marker.lower() for marker in markers if marker) + if not lowered: + return + self.agent_feedback_errors = [ + error for error in self.agent_feedback_errors if not any(marker in error.lower() for marker in lowered) + ] + + def _refresh_agent_feedback(self) -> None: + markers = [] + if not self.task_alignment_required or self.blackboard.task_code != TaskCode.NEW: + markers.append("previous task context is still present") + if self.blackboard.plan: + markers.extend( + [ + "plan is empty after discovery", + "rewrote goal after the task was active", + "changed goal without replacing plan", + "mutating work before plan was set", + ] + ) + if self.blackboard.goal: + markers.append("mutating work before goal/plan was set") + self._drop_agent_feedback(*markers) + def _error(self, text: str, rule: str = "") -> str: return "Error blocked: " + text + ((" Next: " + rule) if rule else "") @@ -6283,9 +6374,13 @@ def _projected_unreduced_context_chars(self, blocks: list[str]) -> int: cwd=self.session.cwd, max_chars=budget.raw_chars + budget.kept_chars, ) + discovery_context = ToolResultContext.format_discovery_context( + blocks, + max_chars=max(1, budget.raw_chars // 3), + ) tool_results = "\n\n".join(ToolResultContext.render_blocks_for_prompt(blocks)) tool_index = "\n".join(ToolResultContext.compact_block(block) for block in blocks) - return len("\n\n".join(part for part in (file_context, tool_index, tool_results) if part)) + return len("\n\n".join(part for part in (discovery_context, file_context, tool_index, tool_results) if part)) def _unreferenced_unreduced_blocks(self) -> list[str]: return self.tool_context.unreduced_blocks( @@ -6744,6 +6839,8 @@ def _gate_after_apply(self, ctx: ResponseContext, on_message: MessageCallback | and not ctx.tool_calls and not ctx.pending_check_requested and not ctx.completion_message + and not ctx.has_goal_action + and not ctx.has_fresh_plan_action and ctx.user_rule_message is None ): self._warn_agent("state update-only turn; include frontier tool, verify, or goal when arguments are known.") @@ -7010,7 +7107,7 @@ def run( # Keep previous task state at a new user turn so short follow-ups like # "continue" can resume. The first response must align with it before work # when the new request does not match the previous goal. - self.task_alignment_required = old_task_context and self._task_text_key(user_input) != self._task_text_key(old_goal) + self.task_alignment_required = old_task_context and not previous_task_done and self._task_text_key(user_input) != self._task_text_key(old_goal) self.blackboard.task_code = TaskCode.NEW self.blackboard.goal_reached = False self.blackboard.checks_required = False @@ -7072,6 +7169,7 @@ def handle_response( DebugTrace.handle_event(self, "handle-applied", ctx, response, extra={"forgotten": forgotten_keys}) self._emit_state_and_text(ctx, on_message) self._emit_tool_context_update([], forgotten_keys, on_message) + self._refresh_agent_feedback() if ctx.has_user_rule_action and not ctx.tool_calls and not ctx.pending_check_requested: message = ctx.user_rule_message or "Rule saved." self.session.append_conversation(AssistantMessage(content=message)) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index b1e9ee3..2d1ccd5 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -980,6 +980,56 @@ def test_act_prompt_includes_kept_tool_results(tmp_path): assert len(agent.tool_context.kept_results) == 1 +def test_act_prompt_projects_search_results_to_discovery_context(tmp_path): + sample = tmp_path / "sample.py" + sample.write_text("class StatusBar:\n def elapsed(self):\n return 1\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + + agent.execute_tool_calls([{"name": "Search", "intention": "find status", "args": _search_args("StatusBar", path="sample.py", context=1)}]) + + prompt = agent.build_user_prompt() + discovery = _prompt_section(prompt, "Discovery Context", "File Context") + latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") + assert "Source: tr.1 tool=Search" in discovery + assert "sample.py" in discovery + assert "StatusBar" in discovery + assert "" not in latest + assert "content=discovery_context" in latest + + +def test_discovery_context_follows_active_result_lifecycle(tmp_path, monkeypatch): + sample = tmp_path / "sample.py" + sample.write_text("class StatusBar:\n pass\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + _set_context_budget(monkeypatch, agent, observe_after_results=1) + + agent.execute_tool_calls([{"name": "Search", "intention": "find status", "args": _search_args("StatusBar", path="sample.py")}]) + assert agent.mode == nanocode.AgentMode.OBSERVE + assert "Source: tr.1 tool=Search" in _prompt_section(agent.build_observe_prompt(), "Discovery Context", "File Context") + + agent.handle_response({"actions": [{"type": "keep", "source": ["tr.1"], "reason": "status symbol location is still useful"}]}) + assert agent.mode == nanocode.AgentMode.ACT + assert "Source: tr.1 tool=Search" in _prompt_section(agent.build_user_prompt(), "Discovery Context", "File Context") + + agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "location no longer needed"}]}) + assert "Discovery Context:\n(empty)" in agent.build_user_prompt() + + +def test_observed_discovery_result_compacts_out_of_discovery_context(tmp_path, monkeypatch): + sample = tmp_path / "sample.py" + sample.write_text("class StatusBar:\n pass\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + _set_context_budget(monkeypatch, agent, observe_after_results=1) + + agent.execute_tool_calls([{"name": "Search", "intention": "find status", "args": _search_args("StatusBar", path="sample.py")}]) + agent.handle_response({"actions": []}) + + prompt = agent.build_user_prompt() + assert "Discovery Context:\n(empty)" in prompt + assert "content=discovery_context" not in prompt + assert "recall=tr.1" in prompt + + def test_kept_tool_results_deduplicate_by_tool_key(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) @@ -3851,7 +3901,29 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert response["actions"][-1]["message_for_complete"] == "done" assert agent.blackboard.goal == "run lint" assert [item.text for item in agent.blackboard.plan] == ["Read sample"] - assert "previous task context is still present" in " ".join(agent.agent_feedback_errors) + assert "previous task context is still present" not in " ".join(agent.agent_feedback_errors) + + +def test_agent_run_does_not_require_alignment_after_completed_task(tmp_path): + class FakeModelClient: + def __init__(self): + self.user_prompts = [] + + def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): + self.user_prompts.append(user_prompt) + return {"actions": [], "_assistant_text": "ok"} + + agent = Agent(Session(cwd=str(tmp_path))) + agent.blackboard.goal = "old task" + agent.blackboard.plan = [nanocode.PlanItem(id="p1", text="Old task", status=nanocode.PlanStatus.DONE, context="done")] + agent.blackboard.task_code = nanocode.TaskCode.DONE + agent.model_client = FakeModelClient() + + agent.run("new task") + + assert agent.task_alignment_required is False + assert "previous task context is still present" not in "\n".join(agent.model_client.user_prompts) + assert "previous task context is still present" not in " ".join(agent.agent_feedback_errors) def test_agent_run_warns_on_goal_rewrite_after_task_is_working(tmp_path): @@ -3872,7 +3944,6 @@ def __init__(self): }, {"actions": [{"type": "goal", "text": "read sample again", "complete": False}]}, {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, { "actions": [ {"type": "plan", "items": [{"id": "p1", "text": "Read sample", "status": "done", "context": "read sample.txt"}]}, @@ -3893,7 +3964,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert response["actions"][-1]["message_for_complete"] == "done" assert [item.text for item in agent.blackboard.plan] == ["Read sample"] assert len(agent.tool_runner.latest_executions) == 1 - assert "rewrote Goal after the task was active" in " ".join(agent.agent_feedback_errors) + assert "rewrote Goal after the task was active" not in " ".join(agent.agent_feedback_errors) def test_agent_allows_plan_with_multiple_doing_items(tmp_path): @@ -4043,6 +4114,33 @@ def test_agent_warns_when_discovery_runs_long_without_plan(tmp_path, monkeypatch assert any("Plan is empty after discovery" in error for error in agent.agent_feedback_errors) +def test_agent_clears_stale_task_feedback_after_fresh_goal_and_plan(tmp_path): + agent = Agent(Session(cwd=str(tmp_path))) + agent.blackboard.goal = "old task" + agent.blackboard.task_code = nanocode.TaskCode.WORKING + agent.task_alignment_required = True + agent.agent_feedback_errors = [ + "Warning blocked: previous task context is still present. Next: emit goal for a new task.", + "Warning blocked: rewrote Goal after the task was active. Next: replace Plan when the task scope changes.", + "Warning blocked: Plan is empty after discovery. Next: set a short Plan before more broad exploration.", + ] + + agent.handle_response( + { + "actions": [ + {"type": "goal", "text": "new task", "complete": False}, + {"type": "plan", "items": [{"id": "p1", "text": "Inspect target", "status": "doing"}]}, + ] + } + ) + + feedback = " ".join(agent.agent_feedback_errors) + assert "previous task context is still present" not in feedback + assert "rewrote Goal after the task was active" not in feedback + assert "Plan is empty after discovery" not in feedback + assert "state update-only turn" not in feedback + + def test_agent_run_reports_continuation_only_when_no_actions(tmp_path): class FakeModelClient: def __init__(self): From 6da7f86c0efc388151919dce9d2d08fc99fef1da Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 04:59:38 -0700 Subject: [PATCH 17/29] Accept multiple Read file args --- nanocode.py | 44 ++++++++++++++++++++++----- tests/test_nanocode_read_tool.py | 51 ++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/nanocode.py b/nanocode.py index e88d6fc..6a9d0c6 100644 --- a/nanocode.py +++ b/nanocode.py @@ -2069,18 +2069,20 @@ class ReadTool(Tool): EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY DESCRIPTION: ClassVar[tuple[str, ...]] = ( "Read one or more UTF-8 files with line:hash anchors.", - "Pass one structured object. Use path for one file, or files for multiple files.", + "Pass one structured object. Use path for one file, files for multiple files, or multiple file objects as args.", "Each file can omit range for the first 600 lines, pass range=[start,end], or ranges=[[start,end],...].", ) SIGNATURES: ClassVar[tuple[str, ...]] = ( "Read({path, range?}) -> selected range or first 600 lines", "Read({path, ranges}) -> selected ranges from one file", "Read({files:[{path, range?|ranges?}, ...]}) -> selected ranges from multiple files", + "Read({path, range?}, {path, range?}) -> selected ranges from multiple files", ) EXAMPLE: ClassVar[tuple[str, ...]] = ( 'Example args: [{"path":"code.py","range":[0,80]}]', 'Example args: [{"path":"code.py","ranges":[[0,80],[160,220]]}]', 'Example args: [{"files":[{"path":"pyproject.toml"},{"path":"uv.lock","range":[0,120]}]}]', + 'Example args: [{"path":"nanocode.py","range":[58,59]},{"path":"pyproject.toml","range":[6,7]}]', ) targets: list[tuple[str, list[tuple[int, int]]]] = field(default_factory=list) @@ -2088,7 +2090,7 @@ class ReadTool(Tool): @classmethod def cli_args(cls, args: list[JsonValue]) -> list[str]: - payload = _json_dict(args[0]) if len(args) == 1 else {} + payload = cls._payload_from_args(args) or {} if not payload: return [cls.cli_token(arg) for arg in args] raw_files = _json_list(payload.get("files")) if "files" in payload else [payload] @@ -2101,7 +2103,7 @@ def cli_args(cls, args: list[JsonValue]) -> list[str]: raw_ranges = [spec.get("range")] if "range" in spec else _json_list(spec.get("ranges")) if "ranges" in spec else [] tokens.append(path) tokens.extend(str(values[0]) + ":" + str(values[1]) for raw_range in raw_ranges if len(values := _json_list(raw_range)) == 2) - return tokens or [cls.cli_token(args[0])] + return tokens or [cls.cli_token(arg) for arg in args] @classmethod def tool_schema(cls) -> Json: @@ -2141,8 +2143,7 @@ def tool_schema(cls) -> Json: "type": "array", "items": read_arg_schema, "minItems": 1, - "maxItems": 1, - "description": "Exactly one structured Read request object.", + "description": "One structured Read request object, or multiple file request objects.", }, }, ["intention", "args"], @@ -2151,11 +2152,38 @@ def tool_schema(cls) -> Json: @classmethod def make(cls, session: Session, args: list[JsonValue]) -> Self: - if len(args) != 1 or not isinstance(args[0], dict): - raise ToolCallArgError('Read args error: expected exactly one object, e.g. [{"path":"nanocode.py","range":[2065,2095]}]') - payload = _json_dict(args[0]) + payload = cls._payload_from_args(args) + if not payload: + raise ToolCallArgError( + 'Read args error: expected one object or multiple file objects, e.g. [{"path":"nanocode.py","range":[2065,2095]}] ' + 'or [{"path":"nanocode.py","range":[58,59]},{"path":"pyproject.toml","range":[6,7]}]' + ) return cls(targets=cls._parse_targets(session, payload), cwd=session.cwd) + @classmethod + def _payload_from_args(cls, args: list[JsonValue]) -> Json | None: + objects = [cls._arg_object(arg) for arg in args] + if len(objects) == 1 and objects[0] is not None: + return objects[0] + if len(objects) > 1: + files = [obj for obj in objects if obj is not None and "files" not in obj] + if len(files) == len(objects): + return {"files": files} + return None + + @classmethod + def _arg_object(cls, value: JsonValue) -> Json | None: + if isinstance(value, dict): + return value + text = _json_str(value) + if not text: + return None + try: + parsed = json.loads(text) + except json.JSONDecodeError: + return None + return parsed if isinstance(parsed, dict) else None + @classmethod def _parse_targets(cls, session: Session, payload: Json) -> list[tuple[str, list[tuple[int, int]]]]: if "files" in payload: diff --git a/tests/test_nanocode_read_tool.py b/tests/test_nanocode_read_tool.py index 38724e5..0b937e5 100644 --- a/tests/test_nanocode_read_tool.py +++ b/tests/test_nanocode_read_tool.py @@ -1,3 +1,5 @@ +import json + import pytest import nanocode @@ -46,7 +48,7 @@ def test_read_tool_reads_requested_line_range(tmp_path): def test_read_tool_rejects_empty_args_with_actionable_error(tmp_path): session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="Read args error: expected exactly one object"): + with pytest.raises(ToolCallError, match="Read args error: expected one object or multiple file objects"): ReadTool.make(session, []) @@ -55,7 +57,7 @@ def test_read_tool_rejects_positional_args(tmp_path): path.write_text("zero\none\ntwo\nthree\nfour\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="Read args error: expected exactly one object"): + with pytest.raises(ToolCallError, match="Read args error: expected one object or multiple file objects"): ReadTool.make(session, ["sample.txt", "1", "2", "3", "5"]) @@ -113,6 +115,51 @@ def test_read_tool_reads_multiple_files_with_independent_ranges(tmp_path): assert "|alpha" not in result +def test_read_tool_reads_multiple_file_objects_as_args(tmp_path): + (tmp_path / "one.txt").write_text("zero\none\ntwo\n", encoding="utf-8") + (tmp_path / "two.txt").write_text("alpha\nbeta\ngamma\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + + tool = ReadTool.make(session, [{"path": "one.txt", "range": [1, 2]}, {"path": "two.txt", "range": [1, 3]}]) + result = tool.call() + + assert _target_paths(tool) == [str(tmp_path / "one.txt"), str(tmp_path / "two.txt")] + assert _hashline(1, "one\n") in result + assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result + assert "|zero" not in result + assert "|alpha" not in result + + +def test_read_tool_reads_stringified_file_objects_as_args(tmp_path): + (tmp_path / "one.txt").write_text("zero\none\ntwo\n", encoding="utf-8") + (tmp_path / "two.txt").write_text("alpha\nbeta\ngamma\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + + tool = ReadTool.make( + session, + [ + json.dumps({"path": "one.txt", "range": [1, 2]}), + json.dumps({"path": "two.txt", "range": [1, 3]}), + ], + ) + result = tool.call() + + assert _target_paths(tool) == [str(tmp_path / "one.txt"), str(tmp_path / "two.txt")] + assert _hashline(1, "one\n") in result + assert _hashline(1, "beta\n") + _hashline(2, "gamma\n") in result + assert "|zero" not in result + assert "|alpha" not in result + + +def test_read_tool_formats_stringified_file_objects_as_readable_cli_args(): + args = [ + json.dumps({"path": "nanocode.py", "range": [58, 59]}), + json.dumps({"path": "pyproject.toml", "range": [6, 7]}), + ] + + assert ReadTool.cli_args(args) == ["nanocode.py", "58:59", "pyproject.toml", "6:7"] + + def test_read_tool_reads_structured_ranges(tmp_path): path = tmp_path / "sample.txt" path.write_text("zero\none\ntwo\nthree\nfour\n", encoding="utf-8") From a97b08ae58e62a15c06888de2f730d1719c6f841 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 05:06:06 -0700 Subject: [PATCH 18/29] Accept multiple Search query args --- nanocode.py | 157 +++++++++++++++++++++++------ tests/test_nanocode_search_tool.py | 60 ++++++++++- 2 files changed, 184 insertions(+), 33 deletions(-) diff --git a/nanocode.py b/nanocode.py index 6a9d0c6..1a9fb6b 100644 --- a/nanocode.py +++ b/nanocode.py @@ -2162,7 +2162,7 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: @classmethod def _payload_from_args(cls, args: list[JsonValue]) -> Json | None: - objects = [cls._arg_object(arg) for arg in args] + objects = [_json_object_arg(arg) for arg in args] if len(objects) == 1 and objects[0] is not None: return objects[0] if len(objects) > 1: @@ -2171,19 +2171,6 @@ def _payload_from_args(cls, args: list[JsonValue]) -> Json | None: return {"files": files} return None - @classmethod - def _arg_object(cls, value: JsonValue) -> Json | None: - if isinstance(value, dict): - return value - text = _json_str(value) - if not text: - return None - try: - parsed = json.loads(text) - except json.JSONDecodeError: - return None - return parsed if isinstance(parsed, dict) else None - @classmethod def _parse_targets(cls, session: Session, payload: Json) -> list[tuple[str, list[tuple[int, int]]]]: if "files" in payload: @@ -2442,15 +2429,19 @@ class SearchTool(Tool): DESCRIPTION: ClassVar[tuple[str, ...]] = ( "Case-insensitive regex search across files; use before Read when location is unknown.", "Returns file:line matches and optional line:hash context anchors.", - "Pass one structured object with pattern, optional path, optional glob, and optional context.", + "Pass one or more structured objects with pattern, optional path, optional glob, and optional context.", "Use InspectCode for symbol structure; use Bash rg/grep for custom shell pipelines.", "Escape regex metacharacters for literal text; use A|B for alternatives and \\n for multiline.", ) - SIGNATURES: ClassVar[tuple[str, ...]] = ("Search({pattern, path?, glob?, context?}) -> matching lines",) + SIGNATURES: ClassVar[tuple[str, ...]] = ( + "Search({pattern, path?, glob?, context?}) -> matching lines", + "Search({pattern, glob?}, {pattern, glob?}) -> matching lines for multiple queries", + ) EXAMPLE: ClassVar[tuple[str, ...]] = ( 'Example args: [{"pattern":"class .*Tool","path":"nanocode.py"}]', 'Example args: [{"pattern":"TODO|FIXME","path":".","glob":"*.py","context":2}]', 'Literal paren args: [{"pattern":"def __init__\\\\(","path":".","glob":"*.py"}]', + 'Example args: [{"pattern":"version","glob":"*.toml"},{"pattern":"version","glob":"*.cfg"}]', ) @dataclass(frozen=True) @@ -2460,18 +2451,35 @@ class Match: text: str context: list[tuple[int, str]] + @dataclass(frozen=True) + class Request: + pattern: str + target_path: str + glob_pattern: str + context_lines: int + pattern: str = "" target_path: str = "" glob_pattern: str = "" context_lines: int = CONTEXT_LINES cwd: str = "" gitignore_patterns: list[str] = field(default_factory=list) + requests: list[Request] = field(default_factory=list) @classmethod def cli_args(cls, args: list[JsonValue]) -> list[str]: - payload = _json_dict(args[0]) if len(args) == 1 else {} - if not payload: + payloads = cls._payloads_from_args(args) or [] + if not payloads: return [cls.cli_token(arg) for arg in args] + tokens: list[str] = [] + for index, payload in enumerate(payloads): + if index: + tokens.append("|") + tokens.extend(cls._cli_payload_tokens(payload)) + return tokens or [cls.cli_token(arg) for arg in args] + + @classmethod + def _cli_payload_tokens(cls, payload: Json) -> list[str]: tokens = [cls.cli_token(payload.get("pattern", ""))] if "path" in payload: tokens.append("path=" + str(payload.get("path") or ".")) @@ -2507,8 +2515,7 @@ def tool_schema(cls) -> Json: "type": "array", "items": search_arg_schema, "minItems": 1, - "maxItems": 1, - "description": "Exactly one structured Search request object.", + "description": "One structured Search request object, or multiple Search request objects.", }, }, ["intention", "args"], @@ -2517,9 +2524,33 @@ def tool_schema(cls) -> Json: @classmethod def make(cls, session: Session, args: list[JsonValue]) -> Self: - if len(args) != 1 or not isinstance(args[0], dict): - raise ToolCallArgError('Search args error: expected exactly one object, e.g. [{"pattern":"class Foo","path":"."}]') - payload = _json_dict(args[0]) + payloads = cls._payloads_from_args(args) + if not payloads: + raise ToolCallArgError( + 'Search args error: expected one object or multiple search objects, e.g. [{"pattern":"class Foo","path":"."}] ' + 'or [{"pattern":"version","glob":"*.toml"},{"pattern":"version","glob":"*.cfg"}]' + ) + requests = [cls._parse_request(session, payload) for payload in payloads] + first = requests[0] + return cls( + pattern=first.pattern, + target_path=first.target_path, + glob_pattern=first.glob_pattern, + context_lines=first.context_lines, + cwd=session.cwd, + gitignore_patterns=cls._load_gitignore_patterns(session.cwd), + requests=requests if len(requests) > 1 else [], + ) + + @classmethod + def _payloads_from_args(cls, args: list[JsonValue]) -> list[Json] | None: + objects = [_json_object_arg(arg) for arg in args] + if not objects or any(obj is None for obj in objects): + return None + return [obj for obj in objects if obj is not None] + + @classmethod + def _parse_request(cls, session: Session, payload: Json) -> Request: unexpected = sorted(set(payload) - {"pattern", "path", "glob", "context"}) if unexpected: raise ToolCallArgError("unexpected search option: " + ", ".join(unexpected)) @@ -2542,19 +2573,22 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: re.compile(pattern) except re.error as error: raise ToolCallArgError("invalid regex: " + str(error)) - return cls( - pattern=pattern, - target_path=session.resolve_path(target_path_arg), - glob_pattern=glob_pattern, - context_lines=context_lines, - cwd=session.cwd, - gitignore_patterns=cls._load_gitignore_patterns(session.cwd), - ) + return cls.Request(pattern, session.resolve_path(target_path_arg), glob_pattern, context_lines) def requires_confirmation(self, session: Session) -> bool: + if self.requests: + return any(not session.is_path_in_cwd(request.target_path) for request in self.requests) return not session.is_path_in_cwd(self.target_path) def preview(self) -> str: + if self.requests: + parts = [] + for request in self.requests[:3]: + glob_text = f', "{request.glob_pattern}"' if request.glob_pattern else "" + parts.append(f'Search("{request.pattern}", {request.target_path}{glob_text})') + if len(self.requests) > 3: + parts.append("...") + return "; ".join(parts) if self.glob_pattern: return f'Search("{self.pattern}", {self.target_path}, "{self.glob_pattern}")' return f'Search("{self.pattern}", {self.target_path})' @@ -2801,7 +2835,48 @@ def _line_matches(self, text: str) -> bool: except re.error as error: raise ToolCallArgError("invalid regex: " + str(error)) - def call(self) -> str: + def _tool_for_request(self, request: Request, *, output_chars: int | None = None) -> Self: + tool = type(self)( + pattern=request.pattern, + target_path=request.target_path, + glob_pattern=request.glob_pattern, + context_lines=request.context_lines, + cwd=self.cwd, + gitignore_patterns=self.gitignore_patterns, + ) + if output_chars is not None: + tool.OUTPUT_CHARS = output_chars + return tool + + @staticmethod + def _result_body(result: str) -> list[str]: + lines = result.splitlines() + if lines and lines[0] == "": + lines = lines[1:] + if lines and lines[-1] == "": + lines = lines[:-1] + return lines + + def _call_batch(self) -> str: + per_query_chars = max(2_000, (self.OUTPUT_CHARS - 1_000) // max(1, len(self.requests))) + lines = ["", f"* query_count: {len(self.requests)}"] + for index, request in enumerate(self.requests, start=1): + tool = self._tool_for_request(request, output_chars=per_query_chars) + section = [f'', f"* pattern: {request.pattern}", f"* path: {tool._relpath(request.target_path)}"] + if request.glob_pattern: + section.append(f"* glob: {request.glob_pattern}") + if request.context_lines: + section.append(f"* context: {request.context_lines}") + section.extend(" " + line for line in self._result_body(tool._call_single())) + section.append("") + if len("\n".join([*lines, *section, ""])) > self.OUTPUT_CHARS: + lines.append("* truncated: true") + break + lines.extend(section) + lines.append("") + return "\n".join(lines) + + def _call_single(self) -> str: if not (os.path.isdir(self.target_path) or os.path.isfile(self.target_path)): if os.path.basename(self.target_path) == "path": raise ToolCallError('not a file or directory: "path" is a placeholder; pass a real file or directory') @@ -2814,6 +2889,11 @@ def call(self) -> str: return self._call_rg(rg) return self._call_python() + def call(self) -> str: + if self.requests: + return self._call_batch() + return self._call_single() + CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT = 20 @@ -9008,6 +9088,19 @@ def _json_str(value: JsonValue) -> str | None: return str(value) +def _json_object_arg(value: JsonValue) -> Json | None: + if isinstance(value, dict): + return value + text = _json_str(value) + if not text: + return None + try: + parsed = json.loads(text) + except json.JSONDecodeError: + return None + return parsed if isinstance(parsed, dict) else None + + def _source_from_json(item: Json) -> tuple[str, ...]: source_values = _json_list(item.get("source")) or _json_list(item.get("sources")) source = [(_json_str(raw) or "").strip() for raw in source_values] diff --git a/tests/test_nanocode_search_tool.py b/tests/test_nanocode_search_tool.py index 21e75e4..1948ca0 100644 --- a/tests/test_nanocode_search_tool.py +++ b/tests/test_nanocode_search_tool.py @@ -1,3 +1,4 @@ +import json import re import nanocode @@ -52,7 +53,7 @@ def test_search_tool_python_backend_finds_or_patterns_and_applies_glob(tmp_path, def test_search_tool_rejects_positional_args(tmp_path): session = Session(cwd=str(tmp_path)) - with pytest.raises(ToolCallError, match="Search args error: expected exactly one object"): + with pytest.raises(ToolCallError, match="Search args error: expected one object or multiple search objects"): SearchTool.make(session, ["class Edit", "class Bash", "class Search", "class Read", "class CreateFile"]) @@ -67,6 +68,63 @@ def test_search_tool_uses_structured_path(tmp_path): assert tool.target_path == str(path) +def test_search_tool_reads_multiple_search_objects_as_args(tmp_path, monkeypatch): + (tmp_path / "pyproject.toml").write_text("version = \"1\"\n", encoding="utf-8") + (tmp_path / "setup.cfg").write_text("version = 1\n", encoding="utf-8") + (tmp_path / "tox.ini").write_text("version = 1\n", encoding="utf-8") + (tmp_path / "skip.py").write_text("version = 1\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") + + tool = SearchTool.make( + session, + [ + {"pattern": "version", "glob": "*.toml"}, + {"pattern": "version", "glob": "*.cfg"}, + {"pattern": "version", "glob": "*.ini"}, + ], + ) + result = tool.call() + + assert [request.glob_pattern for request in tool.requests] == ["*.toml", "*.cfg", "*.ini"] + assert "* query_count: 3" in result + assert "* pyproject.toml:1: version = \"1\"" in result + assert "* setup.cfg:1: version = 1" in result + assert "* tox.ini:1: version = 1" in result + assert "skip.py" not in result + + +def test_search_tool_reads_stringified_search_objects_as_args(tmp_path, monkeypatch): + (tmp_path / "pyproject.toml").write_text("version = \"1\"\n", encoding="utf-8") + (tmp_path / "setup.cfg").write_text("version = 1\n", encoding="utf-8") + session = Session(cwd=str(tmp_path)) + monkeypatch.setattr(nanocode.shutil, "which", lambda name: "") + + tool = SearchTool.make( + session, + [ + json.dumps({"pattern": "version", "glob": "*.toml"}), + json.dumps({"pattern": "version", "glob": "*.cfg"}), + ], + ) + result = tool.call() + + assert [request.glob_pattern for request in tool.requests] == ["*.toml", "*.cfg"] + assert "* query_count: 2" in result + assert "* pyproject.toml:1: version = \"1\"" in result + assert "* setup.cfg:1: version = 1" in result + + +def test_search_tool_formats_stringified_objects_as_readable_cli_args(): + args = [ + json.dumps({"pattern": "version", "glob": "*.toml"}), + json.dumps({"pattern": "version", "glob": "*.cfg"}), + json.dumps({"pattern": "version", "glob": "*.ini"}), + ] + + assert SearchTool.cli_args(args) == ["version", "glob=*.toml", "|", "version", "glob=*.cfg", "|", "version", "glob=*.ini"] + + def test_search_tool_accepts_structured_path_with_regex_and_context(tmp_path, monkeypatch): path = tmp_path / "nanocode.py" path.write_text("class EditTool:\nclass BashTool:\n", encoding="utf-8") From 8d227e88e96b52404796433b1252cea99690e7f8 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 05:14:43 -0700 Subject: [PATCH 19/29] Guide ACT context hygiene --- nanocode.py | 35 +++++++++++++++++++++++++++++++++-- tests/test_nanocode_agent.py | 18 +++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/nanocode.py b/nanocode.py index 1a9fb6b..8456d31 100644 --- a/nanocode.py +++ b/nanocode.py @@ -4086,6 +4086,11 @@ def _state_tool_schema(name: str) -> Json: - Facts are confirmed. Leads are unconfirmed. Checks are verification records. User Rules are future behavior. - Save only what must survive disappearing tool results. Cite tr.N when result-backed. Forget stale raw results. +Context hygiene: +- Before another work tool, clean visible raw results that no longer affect the next action. +- Use Forget(tr.N) for stale, noisy, or already-projected raw results; keys remain recallable. +- Use Keep(tr.N) only for visible raw results that must survive context reduction and are not already captured by Facts, Leads, File Context, or Discovery Context. + Coding workflow: - Before editing, identify the target file, relevant symbols, expected behavior, and evidence. - Read only the smallest useful code region, but enough surrounding context to avoid wrong edits. @@ -4144,6 +4149,9 @@ def _state_tool_schema(name: str) -> Json: Tool Result Index: {tool_result_index} +Context Hygiene: +{context_hygiene} + Discovery Context: {discovery_context} @@ -4175,6 +4183,8 @@ def _state_tool_schema(name: str) -> Json: If Pending User Feedback is not empty, answer it briefly first. Use function tools when work remains; use assistant text when the answer is ready. +Before another work tool, use Forget for stale/noisy visible raw result keys; use Keep only for raw keys needed after context reduction. +Do not Keep raw Read/Search results solely because their File Context or Discovery Context projection is visible. REPLY IN THE LANGUAGE OF LATEST USER REQUEST. YOUR OUTPUT: @@ -5872,7 +5882,7 @@ def apply_context_budget(self) -> None: def build_user_prompt(self) -> str: self._refresh_agent_feedback() - tool_result_index, unreduced_tool_results, latest_tool_results = self._format_act_tool_result_context() + tool_result_index, unreduced_tool_results, latest_tool_results, context_hygiene = self._format_act_tool_result_context() budget = self.context_budget() context_blocks = self._act_file_context_blocks() discovery_context = ToolResultContext.format_discovery_context( @@ -5891,6 +5901,7 @@ def build_user_prompt(self) -> str: user_rules=self.session.state.user_rules.format(), kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", tool_result_index=tool_result_index or "(empty)", + context_hygiene=context_hygiene, discovery_context=discovery_context or "(empty)", file_context=file_context or "(empty)", unreduced_tool_results=unreduced_tool_results or "(empty)", @@ -6128,7 +6139,7 @@ def _finish_current_goal(self) -> None: self.blackboard.checks_required = False self.recent_edits = [] - def _format_act_tool_result_context(self) -> tuple[str, str, str]: + def _format_act_tool_result_context(self) -> tuple[str, str, str, str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter budget = self.context_budget() timeline = self.tool_context.current_timeline_blocks()[-budget.index_items :] @@ -6147,12 +6158,32 @@ def _format_act_tool_result_context(self) -> tuple[str, str, str]: "\n\n".join(sections), "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced)), "\n\n".join(ToolResultContext.render_blocks_for_prompt(latest)), + self._format_context_hygiene(unreduced=unreduced, latest=latest), ) def _act_file_context_blocks(self) -> list[str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter return self.tool_context.kept_results + self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() + def _format_context_hygiene(self, *, unreduced: list[str], latest: list[str]) -> str: + latest_keys = list(ToolResultContext.blocks_by_key(latest)) + latest_key_set = set(latest_keys) + unreduced_keys = [key for key in ToolResultContext.blocks_by_key(unreduced) if key not in latest_key_set] + kept_keys = list(ToolResultContext.blocks_by_key(self.tool_context.kept_results)) + lines = [] + if latest_keys: + lines.append("- latest raw keys: " + ", ".join(latest_keys)) + if unreduced_keys: + lines.append("- unreduced raw keys: " + ", ".join(unreduced_keys)) + if kept_keys: + lines.append("- kept keys: " + ", ".join(kept_keys)) + if not (latest_keys or unreduced_keys): + lines.append("- no visible raw result keys need action now.") + else: + lines.append("- before another work tool: Forget stale/noisy raw keys; Keep only raw keys needed after context reduction.") + lines.append("- do not Keep raw results already represented in Facts, Leads, File Context, or Discovery Context.") + return "\n".join(lines) + def _prune_tool_result_store(self) -> None: keep = self._protected_tool_result_keys() while len(self.session.state.tool_result_store) > self.MAX_COMPLETED_GOAL_TOOL_RESULTS: diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 2d1ccd5..6eb8416 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -468,7 +468,7 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk assert agent.mode == nanocode.AgentMode.ACT assert "key=tr.1" in _blocks_text(agent.tool_context.recent) - index, unreduced, latest = agent._format_act_tool_result_context() + index, unreduced, latest, hygiene = agent._format_act_tool_result_context() assert "one.txt" in unreduced assert "|one" not in unreduced assert "content=file_context" in unreduced @@ -478,6 +478,9 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk assert "recall=tr.1" in index assert "recall=tr.2" in index assert "output:\n" not in index + assert "- latest raw keys: tr.2" in hygiene + assert "- unreduced raw keys: tr.1" in hygiene + assert "Forget stale/noisy raw keys" in hygiene file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") assert "File: one.txt" in file_context assert "|one" in file_context @@ -980,6 +983,19 @@ def test_act_prompt_includes_kept_tool_results(tmp_path): assert len(agent.tool_context.kept_results) == 1 +def test_act_prompt_includes_context_hygiene_guidance(tmp_path): + agent = Agent(Session(cwd=str(tmp_path))) + agent.tool_context.kept_results = ['- ok tool=Read args=["kept.txt"] key=tr.8\n output:\nkept'] + + prompt = agent.build_user_prompt() + hygiene = _prompt_section(prompt, "Context Hygiene", "Discovery Context") + + assert "- kept keys: tr.8" in hygiene + assert "- no visible raw result keys need action now." in hygiene + assert "Before another work tool, use Forget for stale/noisy visible raw result keys" in prompt + assert "Do not Keep raw Read/Search results solely because their File Context or Discovery Context projection is visible." in prompt + + def test_act_prompt_projects_search_results_to_discovery_context(tmp_path): sample = tmp_path / "sample.py" sample.write_text("class StatusBar:\n def elapsed(self):\n return 1\n", encoding="utf-8") From 095aefd62feea34b61262f316ad694f47477fb0e Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 05:19:51 -0700 Subject: [PATCH 20/29] Show visible file ranges in ACT hygiene --- nanocode.py | 94 ++++++++++++++++++++++-------------- tests/test_nanocode_agent.py | 17 +++++++ 2 files changed, 75 insertions(+), 36 deletions(-) diff --git a/nanocode.py b/nanocode.py index 8456d31..f9b19f2 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1565,6 +1565,52 @@ def compact_file_context_block(cls, block: str) -> str: @classmethod def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int) -> str: + segments_by_path, omitted = cls._current_file_context_segments(blocks, cwd=cwd) + if not segments_by_path and not omitted: + return "" + + lines = [ + "Source Policy:", + "- Built dynamically for this prompt from active raw Read and Edit results.", + "- Overlapping lines use the newest active Read/Edit result.", + "- Edit results can clear stale older lines when edits shift line numbers.", + "- If file stat changed, projected lines are hash-checked against the current file and stale lines are omitted.", + "", + ] + for path in sorted(segments_by_path): + segments = segments_by_path[path] + lines.extend(["File: " + path, "Ranges:"]) + for start, end, source, _segment_lines in segments: + lines.append("- " + str(start) + ":" + str(end) + " source=" + source) + lines.append("Content:") + for start, end, source, segment_lines in segments: + lines.append("@@ " + str(start) + ":" + str(end) + " source=" + source) + lines.extend(segment_lines) + lines.append("") + if omitted: + lines.append("Omitted stale content:") + for path in sorted(omitted): + for source in sorted(omitted[path], key=cls._result_key_counter): + lines.append("- " + path + " source=" + source + " stale_lines=" + str(omitted[path][source])) + lines.append("") + + rendered = "\n".join(lines).rstrip() + return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered + + @classmethod + def format_file_context_index(cls, blocks: list[str], *, cwd: str = "", max_chars: int) -> str: + segments_by_path, _omitted = cls._current_file_context_segments(blocks, cwd=cwd) + if not segments_by_path: + return "" + lines = [] + for path in sorted(segments_by_path): + ranges = [str(start) + ":" + str(end) + " source=" + source for start, end, source, _segment_lines in segments_by_path[path]] + lines.append("- " + path + ": " + "; ".join(ranges)) + rendered = "\n".join(lines) + return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered + + @classmethod + def _current_file_context_segments(cls, blocks: list[str], *, cwd: str) -> tuple[dict[str, list[tuple[int, int, str, list[str]]]], dict[str, dict[str, int]]]: files: dict[str, dict[int, tuple[str, str]]] = {} omitted: dict[str, dict[str, int]] = {} items = sorted(cls._file_context_items(blocks), key=lambda item: (item.order, item.phase, item.path, item.start)) @@ -1596,38 +1642,8 @@ def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int omitted[path][source] += 1 continue file_lines[start] = (source, line) - if not any(files.values()) and not omitted: - return "" - - lines = [ - "Source Policy:", - "- Built dynamically for this prompt from active raw Read and Edit results.", - "- Overlapping lines use the newest active Read/Edit result.", - "- Edit results can clear stale older lines when edits shift line numbers.", - "- If file stat changed, projected lines are hash-checked against the current file and stale lines are omitted.", - "", - ] - for path in sorted(files): - segments = cls._file_context_segments(files[path]) - if not segments: - continue - lines.extend(["File: " + path, "Ranges:"]) - for start, end, source, _segment_lines in segments: - lines.append("- " + str(start) + ":" + str(end) + " source=" + source) - lines.append("Content:") - for start, end, source, segment_lines in segments: - lines.append("@@ " + str(start) + ":" + str(end) + " source=" + source) - lines.extend(segment_lines) - lines.append("") - if omitted: - lines.append("Omitted stale content:") - for path in sorted(omitted): - for source in sorted(omitted[path], key=cls._result_key_counter): - lines.append("- " + path + " source=" + source + " stale_lines=" + str(omitted[path][source])) - lines.append("") - - rendered = "\n".join(lines).rstrip() - return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered + segments = {path: path_segments for path in sorted(files) if (path_segments := cls._file_context_segments(files[path]))} + return segments, omitted @classmethod def format_discovery_context(cls, blocks: list[str], *, max_chars: int) -> str: @@ -5882,9 +5898,9 @@ def apply_context_budget(self) -> None: def build_user_prompt(self) -> str: self._refresh_agent_feedback() - tool_result_index, unreduced_tool_results, latest_tool_results, context_hygiene = self._format_act_tool_result_context() budget = self.context_budget() context_blocks = self._act_file_context_blocks() + tool_result_index, unreduced_tool_results, latest_tool_results, context_hygiene = self._format_act_tool_result_context(context_blocks=context_blocks) discovery_context = ToolResultContext.format_discovery_context( context_blocks, max_chars=max(1, budget.raw_chars // 3), @@ -6139,12 +6155,13 @@ def _finish_current_goal(self) -> None: self.blackboard.checks_required = False self.recent_edits = [] - def _format_act_tool_result_context(self) -> tuple[str, str, str, str]: + def _format_act_tool_result_context(self, *, context_blocks: list[str] | None = None) -> tuple[str, str, str, str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter budget = self.context_budget() timeline = self.tool_context.current_timeline_blocks()[-budget.index_items :] unreduced = self.tool_context.unreduced_recent_blocks(checkpoint) latest = self.tool_context.latest_raw_blocks() + context_blocks = context_blocks if context_blocks is not None else self._act_file_context_blocks() visible_keys = set(ToolResultContext.blocks_by_key(timeline + unreduced + latest + self.tool_context.kept_results)) archived_limit = max(0, budget.index_items - len(timeline)) archived = [item.format(result_key=key) for key, item in self.session.state.tool_result_store.items() if key not in visible_keys] @@ -6158,18 +6175,19 @@ def _format_act_tool_result_context(self) -> tuple[str, str, str, str]: "\n\n".join(sections), "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced)), "\n\n".join(ToolResultContext.render_blocks_for_prompt(latest)), - self._format_context_hygiene(unreduced=unreduced, latest=latest), + self._format_context_hygiene(unreduced=unreduced, latest=latest, context_blocks=context_blocks), ) def _act_file_context_blocks(self) -> list[str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter return self.tool_context.kept_results + self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() - def _format_context_hygiene(self, *, unreduced: list[str], latest: list[str]) -> str: + def _format_context_hygiene(self, *, unreduced: list[str], latest: list[str], context_blocks: list[str]) -> str: latest_keys = list(ToolResultContext.blocks_by_key(latest)) latest_key_set = set(latest_keys) unreduced_keys = [key for key in ToolResultContext.blocks_by_key(unreduced) if key not in latest_key_set] kept_keys = list(ToolResultContext.blocks_by_key(self.tool_context.kept_results)) + file_context_index = ToolResultContext.format_file_context_index(context_blocks, cwd=self.session.cwd, max_chars=1_200) lines = [] if latest_keys: lines.append("- latest raw keys: " + ", ".join(latest_keys)) @@ -6177,6 +6195,10 @@ def _format_context_hygiene(self, *, unreduced: list[str], latest: list[str]) -> lines.append("- unreduced raw keys: " + ", ".join(unreduced_keys)) if kept_keys: lines.append("- kept keys: " + ", ".join(kept_keys)) + if file_context_index: + lines.append("- visible file ranges already available:") + lines.extend(" " + line for line in file_context_index.splitlines()) + lines.append("- use visible File Context line anchors before Read; Read only missing ranges or after file changes.") if not (latest_keys or unreduced_keys): lines.append("- no visible raw result keys need action now.") else: diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 6eb8416..ef143a3 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -480,6 +480,10 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk assert "output:\n" not in index assert "- latest raw keys: tr.2" in hygiene assert "- unreduced raw keys: tr.1" in hygiene + assert "- visible file ranges already available:" in hygiene + assert "one.txt: 0:1 source=tr.1" in hygiene + assert "two.txt: 0:1 source=tr.2" in hygiene + assert "use visible File Context line anchors before Read" in hygiene assert "Forget stale/noisy raw keys" in hygiene file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") assert "File: one.txt" in file_context @@ -996,6 +1000,19 @@ def test_act_prompt_includes_context_hygiene_guidance(tmp_path): assert "Do not Keep raw Read/Search results solely because their File Context or Discovery Context projection is visible." in prompt +def test_act_prompt_context_hygiene_lists_visible_file_ranges(tmp_path): + (tmp_path / "sample.txt").write_text("alpha\nbeta\ngamma\n", encoding="utf-8") + agent = Agent(Session(cwd=str(tmp_path))) + + agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[1, 3])}]) + + prompt = agent.build_user_prompt() + hygiene = _prompt_section(prompt, "Context Hygiene", "Discovery Context") + assert "- visible file ranges already available:" in hygiene + assert "sample.txt: 1:3 source=tr.1" in hygiene + assert "use visible File Context line anchors before Read" in hygiene + + def test_act_prompt_projects_search_results_to_discovery_context(tmp_path): sample = tmp_path / "sample.py" sample.write_text("class StatusBar:\n def elapsed(self):\n return 1\n", encoding="utf-8") From e05b00cd0629656c7ff7f6c43b723cbbcbc19994 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 06:00:53 -0700 Subject: [PATCH 21/29] Merge observe reduction into compact --- nanocode.py | 742 +++++++------------------------- tests/test_nanocode_agent.py | 705 ++---------------------------- tests/test_nanocode_commands.py | 25 +- tests/test_nanocode_loop.py | 15 +- 4 files changed, 189 insertions(+), 1298 deletions(-) diff --git a/nanocode.py b/nanocode.py index f9b19f2..c79542f 100644 --- a/nanocode.py +++ b/nanocode.py @@ -596,14 +596,14 @@ class ContextBudget: kept_chars: int kept_block_chars: int index_items: int - observe_after_results: int + prompt_chars: int planless_discovery_tool_calls: int CONTEXT_BUDGETS: dict[str, ContextBudget] = { - "low": ContextBudget(36_000, 16_000, 4_000, 20, 6, 6), - "medium": ContextBudget(72_000, 32_000, 6_000, 30, 10, 8), - "high": ContextBudget(120_000, 64_000, 8_000, 60, 16, 12), + "low": ContextBudget(36_000, 16_000, 4_000, 20, 80_000, 6), + "medium": ContextBudget(72_000, 32_000, 6_000, 30, 160_000, 8), + "high": ContextBudget(120_000, 64_000, 8_000, 60, 240_000, 12), } @@ -615,7 +615,7 @@ class ContextBudget: @dataclass class RuntimeSettings: shell_timeout: int = 60 - compact_at: int = 50 + compact_at: int = 80 max_agent_steps: int = 100 auto_clean_recent: str = "1d" context_budget: str = "medium" @@ -627,7 +627,7 @@ def from_dict(cls, data: Json, *, yolo: bool = False, debug: bool = False) -> "R runtime = Config.table(data, "runtime") return cls( shell_timeout=Config.int(runtime, "shell_timeout", 60), - compact_at=Config.int(runtime, "compact_at", 50), + compact_at=cls.clean_compact_at(Config.int(runtime, "compact_at", 80)), max_agent_steps=max(1, Config.int(runtime, "max_agent_steps", 100) or 0), auto_clean_recent=cls.clean_retention(Config.str(runtime, "auto_clean_recent", "1d")), context_budget=cls.clean_context_budget(Config.str(runtime, "context_budget", "medium")), @@ -644,6 +644,14 @@ def clean_retention(value: str) -> str: raise ConfigError("runtime.auto_clean_recent must be off or a duration like 30m, 12h, 3d") return value + @staticmethod + def clean_compact_at(value: int) -> int: + if value <= 0: + return 0 + if value > 100: + raise ConfigError("runtime.compact_at must be a context percent from 1 to 100, or 0 to disable") + return value + @staticmethod def clean_retention_seconds(value: str) -> int: value = RuntimeSettings.clean_retention(value) @@ -783,7 +791,7 @@ class ConfigFile: [runtime] shell_timeout = 60 -compact_at = 50 +compact_at = 80 max_agent_steps = 100 context_budget = "medium" # Automatically delete inactive session directories older than this. Use "off" to disable. @@ -827,7 +835,6 @@ def load(cls, path: str | None = None) -> Json: class AgentMode(StrEnum): ACT = "act" - OBSERVE = "observe" @dataclass @@ -839,6 +846,8 @@ class AgentRunResult: @dataclass class RuntimeState: debug_prompt_count: int = 0 + last_context_chars: int = 0 + last_context_percent: int = 0 last_prompt_tokens: int = 0 last_completion_tokens: int = 0 last_total_tokens: int = 0 @@ -1348,60 +1357,8 @@ class ToolResultContext: DISCOVERY_CONTEXT_BLOCK_CHARS: ClassVar[int] = 4_000 latest: list[str] = field(default_factory=list) recent: list[str] = field(default_factory=list) - kept_results: list[str] = field(default_factory=list) reactivated_keys: set[str] = field(default_factory=set) - def forget_results(self, keys: list[str]) -> list[str]: - wanted = set(keys) - if not wanted: - return [] - removed = [] - self.reactivated_keys.difference_update(wanted) - - def update(blocks: list[str], *, compact: bool) -> list[str]: - updated = [] - for block in blocks: - key = self.result_key(block) - if key in wanted: - removed.append(key) - if compact: - updated.append(self.compact_block(block)) - else: - updated.append(block) - return updated - - self.kept_results = update(self.kept_results, compact=False) - self.latest = update(self.latest, compact=True) - self.recent = update(self.recent, compact=True) - return list(dict.fromkeys(removed)) - - def keep_results(self, actions: list[Json], observed_blocks: list[str], *, max_chars: int, max_block_chars: int) -> list[str]: - wanted = [] - for action in actions: - if _json_str(action.get("type")) == "keep": - wanted.extend(key for key in _source_from_json(action) if key.startswith("tr.")) - wanted = list(dict.fromkeys(wanted)) - return self.keep_result_keys(wanted, observed_blocks, max_chars=max_chars, max_block_chars=max_block_chars) - - def keep_result_keys(self, keys: list[str], observed_blocks: list[str], *, max_chars: int, max_block_chars: int) -> list[str]: - wanted = list(dict.fromkeys(keys)) - if not wanted: - return [] - by_key = self.blocks_by_key(observed_blocks) - selected = {key: self.bound_block(by_key[key], max_chars=max_block_chars) for key in wanted if key in by_key} - if not selected: - return [] - existing = self.blocks_by_key(self.kept_results) - self.kept_results = [block for key, block in existing.items() if key not in selected] + [selected[key] for key in wanted if key in selected] - self.bound_kept(max_chars=max_chars, max_block_chars=max_block_chars) - retained = self.blocks_by_key(self.kept_results) - return [key for key in wanted if key in selected and key in retained] - - def bound_kept(self, *, max_chars: int, max_block_chars: int) -> None: - self.kept_results = [self.bound_block(block, max_chars=max_block_chars) for block in self.kept_results] - while self.kept_results and len("\n\n".join(self.kept_results)) > max_chars: - del self.kept_results[0] - def append_latest(self, executions: list[ToolCallExecution], *, max_index_items: int, checkpoint: int, append: bool = False) -> None: if not executions: return @@ -1597,18 +1554,6 @@ def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int rendered = "\n".join(lines).rstrip() return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered - @classmethod - def format_file_context_index(cls, blocks: list[str], *, cwd: str = "", max_chars: int) -> str: - segments_by_path, _omitted = cls._current_file_context_segments(blocks, cwd=cwd) - if not segments_by_path: - return "" - lines = [] - for path in sorted(segments_by_path): - ranges = [str(start) + ":" + str(end) + " source=" + source for start, end, source, _segment_lines in segments_by_path[path]] - lines.append("- " + path + ": " + "; ".join(ranges)) - rendered = "\n".join(lines) - return _shorten(rendered, max_chars) if max_chars > 0 and len(rendered) > max_chars else rendered - @classmethod def _current_file_context_segments(cls, blocks: list[str], *, cwd: str) -> tuple[dict[str, list[tuple[int, int, str, list[str]]]], dict[str, dict[str, int]]]: files: dict[str, dict[int, tuple[str, str]]] = {} @@ -1919,15 +1864,6 @@ def _result_key_counter(key: str) -> int: def max_counter(cls, blocks: list[str]) -> int: return max((cls.result_counter(block) for block in blocks), default=0) - @staticmethod - def forget_result_keys_from_actions(actions: list[Json]) -> list[str]: - keys: list[str] = [] - for action in actions: - if _json_str(action.get("type")) == "forget": - keys.extend(key for key in _source_from_json(action) if key.startswith("tr.")) - return list(dict.fromkeys(keys)) - - ConfirmationResult: TypeAlias = bool | str ConfirmCallback: TypeAlias = Callable[[ParsedToolCall, Tool], ConfirmationResult] ToolDisplayCallback: TypeAlias = Callable[[ParsedToolCall, Tool], None] @@ -3848,74 +3784,6 @@ def _content(self, item: ToolResultItem) -> str: return "\n".join(chunks) -def _tool_result_keys_from_args(args: list[JsonValue]) -> list[str]: - keys: list[str] = [] - values: list[JsonValue] = [] - for arg in args: - values.extend(arg if isinstance(arg, list) else [arg]) - for value in values: - key = str(value).strip() - if not re.fullmatch(r"tr\.\d+", key): - raise ToolCallArgError("invalid result key: use tr.N") - keys.append(key) - keys = list(dict.fromkeys(keys)) - if not keys: - raise ToolCallArgError("requires at least one tr.N key") - return keys - - -@dataclass -class ForgetTool(Tool): - NAME: ClassVar[str] = "Forget" - EFFECT: ClassVar[ToolEffect] = ToolEffect.OTHER - DESCRIPTION: ClassVar[tuple[str, ...]] = ( - "Remove visible tool result keys from active context; keys remain recallable.", - "This is the inverse of Recall for active context membership.", - "Does not create a new result key.", - ) - SIGNATURE: ClassVar[str] = "Forget(key[, key...]) -> remove active context entries" - EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["tr.1", "tr.2"]',) - REQUIRES_CONFIRMATION: ClassVar[bool | None] = False - - keys: list[str] - - @classmethod - def make(cls, session: Session, args: list[JsonValue]) -> Self: - return cls(keys=_tool_result_keys_from_args(args)) - - def preview(self) -> str: - return "Forget " + ", ".join(self.keys) - - def call(self) -> str: - return "\n* requested: " + ", ".join(self.keys) + "\n" - - -@dataclass -class KeepTool(Tool): - NAME: ClassVar[str] = "Keep" - EFFECT: ClassVar[ToolEffect] = ToolEffect.OTHER - DESCRIPTION: ClassVar[tuple[str, ...]] = ( - "Keep visible raw tool result keys in active context.", - "Use during observe or when a visible result should survive context reduction.", - "Does not create a new result key.", - ) - SIGNATURE: ClassVar[str] = "Keep(key[, key...]) -> keep active context entries" - EXAMPLE: ClassVar[tuple[str, ...]] = ('Example args: ["tr.1", "tr.2"]',) - REQUIRES_CONFIRMATION: ClassVar[bool | None] = False - - keys: list[str] - - @classmethod - def make(cls, session: Session, args: list[JsonValue]) -> Self: - return cls(keys=_tool_result_keys_from_args(args)) - - def preview(self) -> str: - return "Keep " + ", ".join(self.keys) - - def call(self) -> str: - return "\n* requested: " + ", ".join(self.keys) + "\n" - - ############################ # Tool Registry ############################ @@ -3932,11 +3800,9 @@ def call(self) -> str: BashTool.NAME: BashTool, GitTool.NAME: GitTool, ToolResultTool.NAME: ToolResultTool, - ForgetTool.NAME: ForgetTool, - KeepTool.NAME: KeepTool, } -CONTEXT_TOOL_NAMES: frozenset[str] = frozenset({ToolResultTool.NAME, ForgetTool.NAME, KeepTool.NAME}) -CONTEXT_TOOL_CLASSES: tuple[ToolClass, ...] = (ToolResultTool, ForgetTool, KeepTool) +CONTEXT_TOOL_NAMES: frozenset[str] = frozenset({ToolResultTool.NAME}) +REMOVED_CONTEXT_TOOL_NAMES: frozenset[str] = frozenset({"Forget", "Keep"}) def _canonical_tool_name(name: str | None) -> str: @@ -4017,11 +3883,6 @@ def _canonical_tool_name(name: str | None) -> str: {"text": TOOL_STRING_SCHEMA, "message": TOOL_STRING_SCHEMA}, ["text", "message"], ), - "forget": ( - "Remove visible tool result keys from active context; keys remain recallable.", - {"source": TOOL_STRING_LIST_SCHEMA, "reason": TOOL_STRING_SCHEMA}, - ["source", "reason"], - ), "verify": ( "Record a concrete check result or blocker.", { @@ -4032,11 +3893,6 @@ def _canonical_tool_name(name: str | None) -> str: }, ["status", "context"], ), - "keep": ( - "Keep visible raw tool result keys in context during observe.", - {"source": TOOL_STRING_LIST_SCHEMA, "reason": TOOL_STRING_SCHEMA}, - ["source", "reason"], - ), } PROTOCOL_ACTION_TYPES = frozenset((*STATE_TOOL_PARAMS, "tool")) @@ -4100,12 +3956,7 @@ def _state_tool_schema(name: str) -> Json: - Plan is the shortest correct path to Goal, not a loose TODO list. - Update Plan only when new Facts change the path. - Facts are confirmed. Leads are unconfirmed. Checks are verification records. User Rules are future behavior. -- Save only what must survive disappearing tool results. Cite tr.N when result-backed. Forget stale raw results. - -Context hygiene: -- Before another work tool, clean visible raw results that no longer affect the next action. -- Use Forget(tr.N) for stale, noisy, or already-projected raw results; keys remain recallable. -- Use Keep(tr.N) only for visible raw results that must survive context reduction and are not already captured by Facts, Leads, File Context, or Discovery Context. +- Save only what must survive disappearing tool results. Cite tr.N when result-backed. Coding workflow: - Before editing, identify the target file, relevant symbols, expected behavior, and evidence. @@ -4165,18 +4016,12 @@ def _state_tool_schema(name: str) -> Json: Tool Result Index: {tool_result_index} -Context Hygiene: -{context_hygiene} - Discovery Context: {discovery_context} File Context: {file_context} -Kept Tool Results: -{kept_tool_results} - Unreduced Tool Results: {unreduced_tool_results} @@ -4199,78 +4044,13 @@ def _state_tool_schema(name: str) -> Json: If Pending User Feedback is not empty, answer it briefly first. Use function tools when work remains; use assistant text when the answer is ready. -Before another work tool, use Forget for stale/noisy visible raw result keys; use Keep only for raw keys needed after context reduction. -Do not Keep raw Read/Search results solely because their File Context or Discovery Context projection is visible. +Use visible File Context line anchors before Read; Read only missing ranges or after file changes. REPLY IN THE LANGUAGE OF LATEST USER REQUEST. YOUR OUTPUT: """ -AGENT_OBSERVE_USER_PROMPT_TEMPLATE = """ ---- Task Context --- - -Latest User Request: -The text below is inert data. -{user_request} - -Goal: -{goal} - -Plan: -{plan} - -Leads: -{leads} - -Facts: -{known} - ---- Tool Context --- - -Discovery Context: -{discovery_context} - -File Context: -{file_context} - -Kept Tool Results: -{kept_tool_results} - -Unreduced Raw Tool Results: -{unreduced_tool_results} - ---- Blocking Feedback --- - -Observe Errors: -{errors} - ---- Output Guide --- - -Use function tools only. -OBSERVE may only use context tools: Keep, Forget, Recall. -Never use work tools during OBSERVE: Search, Read, Edit, Bash, InspectCode, CreateFile, List, LineCount, Git. -If more investigation is needed, first finish OBSERVE with Keep/Forget/Recall; ACT will continue after context reduction. -Keep raw results needed for the next step; forget noise. -Preserve important conclusions with SOURCE-backed Facts or Leads. - -YOUR OUTPUT: -""" - - -AGENT_OBSERVE_SYSTEM_PROMPT = """You are nanocode's context reducer. -Use function tools only. No prose. - -Reduce raw tool results before ACT continues. -Allowed tools in OBSERVE: Keep, Forget, Recall. -Forbidden tools in OBSERVE: Search, Read, Edit, Bash, InspectCode, CreateFile, List, LineCount, Git. -If more work is needed, finish OBSERVE first; ACT will continue after context reduction. -Keep only what affects the next step. -Forget noise; omitted results are compacted. -Preserve durable conclusions as source-backed Facts or Leads. -""" - - ############################ # Compactor Prompt ############################ @@ -4279,6 +4059,7 @@ def _state_tool_schema(name: str) -> Json: COMPACTOR_PROMPT = """You are nanocode's conversation-history compactor. Compress conversation history and Facts so the coding agent can continue later. +If tool results are included, preserve only conclusions, file paths, ranges, errors, and decisions needed to continue. Do not solve the task or add unsupported facts. Use the compact function tool only. @@ -4313,6 +4094,10 @@ def _state_tool_schema(name: str) -> Json: ----------- Conversation_To_Compact Begin ------ {conversation} -------- Conversation_To_Compact End ----------- + +----------- Tool_Results_To_Compact Begin ------ +{tool_results} +-------- Tool_Results_To_Compact End ----------- """ @@ -5761,28 +5546,23 @@ def __init__(self, session: Session, model_client: ModelClient, blackboard: Blac self.model_client = model_client self.blackboard = blackboard - def compact(self) -> int: + def compact(self, *, tool_results: str = "") -> int: count = len(self.session.state.conversation) - if count <= self.KEEP_RECENT: + tool_results = tool_results.strip() + if count <= self.KEEP_RECENT and not tool_results: return 0 - old_items = self.session.state.conversation[: -self.KEEP_RECENT] - keep_items = self.session.state.conversation[-self.KEEP_RECENT :] - summary, known = self._summarize(old_items) + old_items = self.session.state.conversation[: -self.KEEP_RECENT] if count > self.KEEP_RECENT else [] + keep_items = self.session.state.conversation[-self.KEEP_RECENT :] if count > self.KEEP_RECENT else list(self.session.state.conversation) + summary, known = self._summarize(old_items, tool_results=tool_results) self.session.state.conversation = [AssistantMessage(content="Conversation compact summary:\n" + summary)] + keep_items self.blackboard.known = known - return count - - def maybe_compact(self) -> bool: - if self.session.settings.compact_at <= 0: - return False - if len(self.session.state.conversation) <= self.session.settings.compact_at: - return False - return self.compact() > 0 + return count + (1 if tool_results else 0) - def _summarize(self, items: list[ConversationItem]) -> tuple[str, list[KnownItem]]: + def _summarize(self, items: list[ConversationItem], *, tool_results: str = "") -> tuple[str, list[KnownItem]]: user_prompt = COMPACT_USER_PROMPT_TEMPLATE.format( known="\n".join(KnownItem.format_item(item) for item in self.blackboard.known) or "(empty)", - conversation="\n\n".join(item.format() for item in items), + conversation="\n\n".join(item.format() for item in items) or "(empty)", + tool_results=tool_results or "(empty)", ).strip() response = self.model_client.request( COMPACTOR_PROMPT.strip(), user_prompt, activity="compact", tool_schemas=[COMPACT_TOOL_SCHEMA], required_tool="compact" @@ -5839,13 +5619,10 @@ class Agent: MAX_AGENT_FEEDBACK_ERRORS: ClassVar[int] = 8 MAX_AGENT_FEEDBACK_ERROR_LEN: ClassVar[int] = 220 MODEL_TIMEOUT_RETRY_DELAYS: ClassVar[tuple[int, ...]] = (3, 10, 20, 30, 60, 120) - ACT_ACTION_TYPES: ClassVar[set[str]] = {"goal", "plan", "lead", "known", "tool", "verify", "user_rule", "forget"} - OBSERVE_ACTION_TYPES: ClassVar[set[str]] = {"keep", "lead", "known", "forget", "tool"} + ACT_ACTION_TYPES: ClassVar[set[str]] = {"goal", "plan", "lead", "known", "tool", "verify", "user_rule"} COMPLETED_PLAN_STATUSES: ClassVar[set[PlanStatus]] = {PlanStatus.DONE, PlanStatus.BLOCKED} MAX_COMPLETED_GOAL_TOOL_RESULTS: ClassVar[int] = 50 RECENT_EDITS: ClassVar[int] = 20 - RULE_VISIBLE_RESULTS: ClassVar[str] = "use visible tool result keys only." - RULE_CLOSE_SOURCE: ClassVar[str] = "close or update state that depends on the result before forgetting its source." RULE_CHANGE_FAILED_TOOL: ClassVar[str] = "change args or switch tools; after edit failures use a smaller batch and reread only stale ranges." RULE_GOAL_PLAN_FIRST: ClassVar[str] = "set goal and a short plan before mutating tools or verify." RULE_VERIFY_DIRECTLY: ClassVar[str] = 'run checks, then report verify status="passed"|"failed"|"blocked".' @@ -5878,9 +5655,6 @@ def __init__(self, session: Session): self.failed_tool_call_key: tuple[str, tuple[str, ...]] | None = None self.failed_tool_call_count = 0 self.agent_feedback_errors: list[str] = [] - self.observe_feedback_errors: list[str] = [] - self.latest_context_tool_kept: list[str] = [] - self.latest_context_tool_forgotten: list[str] = [] self.latest_context_tool_recalled: list[str] = [] self.task_alignment_required = False self.incomplete_task_context_at_turn_start = False @@ -5893,14 +5667,13 @@ def context_budget(self) -> ContextBudget: def apply_context_budget(self) -> None: budget = self.context_budget() checkpoint = self.blackboard.memory_checkpoint_tool_result_counter - self.tool_context.bound_kept(max_chars=budget.kept_chars, max_block_chars=budget.kept_block_chars) self.tool_context.prune_recent(max_index_items=budget.index_items, checkpoint=checkpoint) def build_user_prompt(self) -> str: self._refresh_agent_feedback() budget = self.context_budget() context_blocks = self._act_file_context_blocks() - tool_result_index, unreduced_tool_results, latest_tool_results, context_hygiene = self._format_act_tool_result_context(context_blocks=context_blocks) + tool_result_index, unreduced_tool_results, latest_tool_results = self._format_act_tool_result_context() discovery_context = ToolResultContext.format_discovery_context( context_blocks, max_chars=max(1, budget.raw_chars // 3), @@ -5915,9 +5688,7 @@ def build_user_prompt(self) -> str: environment=self._format_environment(), conversation_history="\n\n".join(item.format() for item in conversation) if conversation else "(empty)", user_rules=self.session.state.user_rules.format(), - kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", tool_result_index=tool_result_index or "(empty)", - context_hygiene=context_hygiene, discovery_context=discovery_context or "(empty)", file_context=file_context or "(empty)", unreduced_tool_results=unreduced_tool_results or "(empty)", @@ -5973,34 +5744,6 @@ def _format_environment(self) -> str: ) return "\n".join(lines) - def build_observe_prompt(self) -> str: - current = self.blackboard - unreduced_blocks = self._unreferenced_unreduced_blocks() - budget = self.context_budget() - file_context = ToolResultContext.format_file_context( - self.tool_context.kept_results + unreduced_blocks, - cwd=self.session.cwd, - max_chars=budget.raw_chars + budget.kept_chars, - ) - discovery_context = ToolResultContext.format_discovery_context( - self.tool_context.kept_results + unreduced_blocks, - max_chars=max(1, budget.raw_chars // 3), - ) - unreduced = "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced_blocks)) - return AGENT_OBSERVE_USER_PROMPT_TEMPLATE.format( - user_rules=self.session.state.user_rules.format(), - goal=current.goal or "(empty)", - plan="\n".join(item.format() for item in current.plan) if current.plan else "(empty)", - leads="\n".join(item.format() for item in current.leads) if current.leads else "(empty)", - known="\n".join(KnownItem.format_item(item) for item in current.known) if current.known else "(empty)", - discovery_context=discovery_context or "(empty)", - file_context=file_context or "(empty)", - kept_tool_results="\n\n".join(ToolResultContext.render_blocks_for_prompt(self.tool_context.kept_results)) or "(empty)", - errors="\n".join("- " + error for error in self.observe_feedback_errors) or "(empty)", - unreduced_tool_results=unreduced or "(empty)", - user_request=self._format_user_request(), - ).strip() - def _system_prompt(self, template: str | None = None) -> str: return (template or AGENT_SYSTEM_PROMPT).strip() @@ -6053,6 +5796,54 @@ def _set_status_notice(self, text: str, ttl: float = 5.0) -> None: def compact_history(self) -> int: return self.compactor.compact() + def compact_context(self) -> int: + observed_blocks = self.tool_context.unreduced_blocks(self.blackboard.memory_checkpoint_tool_result_counter) + tool_results = _shorten( + "\n\n".join(observed_blocks), + self.context_budget().raw_chars, + ) + compacted_conversation = self.compactor.compact(tool_results=tool_results) + observed_counter = ToolResultContext.max_counter(observed_blocks) + if observed_blocks: + self.tool_context.compact_observed(observed_blocks) + self._mark_memory_checkpoint(observed_counter) + self.apply_context_budget() + return compacted_conversation + len(observed_blocks) + + def _prompt_context_chars(self, system_prompt: str, user_prompt: str, tool_schemas: list[Json]) -> int: + schema_chars = len(json.dumps(tool_schemas, ensure_ascii=False, sort_keys=True, separators=(",", ":"))) if tool_schemas else 0 + return len(system_prompt) + len(user_prompt) + schema_chars + + def _context_percent(self, chars: int) -> int: + budget_chars = max(1, self.context_budget().prompt_chars) + if chars <= 0: + return 0 + return max(1, (chars * 100 + budget_chars - 1) // budget_chars) + + def _record_context_size(self, system_prompt: str, user_prompt: str, tool_schemas: list[Json]) -> int: + chars = self._prompt_context_chars(system_prompt, user_prompt, tool_schemas) + percent = self._context_percent(chars) + self.session.state.last_context_chars = chars + self.session.state.last_context_percent = percent + return percent + + def _should_compact_context(self, percent: int) -> bool: + threshold = self.session.settings.compact_at + return threshold > 0 and percent >= threshold + + def _prepare_request_context(self) -> tuple[str, str, str, list[Json]]: + for _attempt in range(2): + system_prompt, user_prompt, activity = self._step_prompts() + tool_schemas = self._tool_schemas() + percent = self._record_context_size(system_prompt, user_prompt, tool_schemas) + if activity == "agent" and self._should_compact_context(percent) and self.compact_context() > 0: + continue + return system_prompt, user_prompt, activity, tool_schemas + system_prompt, user_prompt, activity = self._step_prompts() + tool_schemas = self._tool_schemas() + self._record_context_size(system_prompt, user_prompt, tool_schemas) + return system_prompt, user_prompt, activity, tool_schemas + def cancel_current_goal(self) -> None: self._finish_current_goal() @@ -6128,9 +5919,8 @@ def run_stream_loop( raise def _remember_format_gate(self, format_error: str) -> None: - remember_error = self._remember_observe_error if self.mode == AgentMode.OBSERVE else self._remember_agent_error rule = self.RULE_VALID_TOOL_JSON if "invalid tool arguments" in format_error else self.RULE_FUNCTION_TOOLS - remember_error(self._format_gate_user_message("Error: invalid function/tool response", format_error) + " Next: " + rule) + self._remember_agent_error(self._format_gate_user_message("Error: invalid function/tool response", format_error) + " Next: " + rule) def _handle_format_gate(self, response: Json, format_error: str, consecutive_errors: int, on_message: MessageCallback | None) -> None: self._set_status_notice("err:format") @@ -6155,14 +5945,13 @@ def _finish_current_goal(self) -> None: self.blackboard.checks_required = False self.recent_edits = [] - def _format_act_tool_result_context(self, *, context_blocks: list[str] | None = None) -> tuple[str, str, str, str]: + def _format_act_tool_result_context(self) -> tuple[str, str, str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter budget = self.context_budget() timeline = self.tool_context.current_timeline_blocks()[-budget.index_items :] unreduced = self.tool_context.unreduced_recent_blocks(checkpoint) latest = self.tool_context.latest_raw_blocks() - context_blocks = context_blocks if context_blocks is not None else self._act_file_context_blocks() - visible_keys = set(ToolResultContext.blocks_by_key(timeline + unreduced + latest + self.tool_context.kept_results)) + visible_keys = set(ToolResultContext.blocks_by_key(timeline + unreduced + latest)) archived_limit = max(0, budget.index_items - len(timeline)) archived = [item.format(result_key=key) for key, item in self.session.state.tool_result_store.items() if key not in visible_keys] archived = archived[-archived_limit:] if archived_limit > 0 else archived @@ -6175,36 +5964,11 @@ def _format_act_tool_result_context(self, *, context_blocks: list[str] | None = "\n\n".join(sections), "\n\n".join(ToolResultContext.render_blocks_for_prompt(unreduced)), "\n\n".join(ToolResultContext.render_blocks_for_prompt(latest)), - self._format_context_hygiene(unreduced=unreduced, latest=latest, context_blocks=context_blocks), ) def _act_file_context_blocks(self) -> list[str]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter - return self.tool_context.kept_results + self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() - - def _format_context_hygiene(self, *, unreduced: list[str], latest: list[str], context_blocks: list[str]) -> str: - latest_keys = list(ToolResultContext.blocks_by_key(latest)) - latest_key_set = set(latest_keys) - unreduced_keys = [key for key in ToolResultContext.blocks_by_key(unreduced) if key not in latest_key_set] - kept_keys = list(ToolResultContext.blocks_by_key(self.tool_context.kept_results)) - file_context_index = ToolResultContext.format_file_context_index(context_blocks, cwd=self.session.cwd, max_chars=1_200) - lines = [] - if latest_keys: - lines.append("- latest raw keys: " + ", ".join(latest_keys)) - if unreduced_keys: - lines.append("- unreduced raw keys: " + ", ".join(unreduced_keys)) - if kept_keys: - lines.append("- kept keys: " + ", ".join(kept_keys)) - if file_context_index: - lines.append("- visible file ranges already available:") - lines.extend(" " + line for line in file_context_index.splitlines()) - lines.append("- use visible File Context line anchors before Read; Read only missing ranges or after file changes.") - if not (latest_keys or unreduced_keys): - lines.append("- no visible raw result keys need action now.") - else: - lines.append("- before another work tool: Forget stale/noisy raw keys; Keep only raw keys needed after context reduction.") - lines.append("- do not Keep raw results already represented in Facts, Leads, File Context, or Discovery Context.") - return "\n".join(lines) + return self.tool_context.unreduced_recent_blocks(checkpoint) + self.tool_context.latest_raw_blocks() def _prune_tool_result_store(self) -> None: keep = self._protected_tool_result_keys() @@ -6215,9 +5979,7 @@ def _prune_tool_result_store(self) -> None: self.session.state.tool_result_store.pop(key) def _protected_tool_result_keys(self) -> set[str]: - keys = self.blackboard.referenced_result_keys() - keys.update(ToolResultContext.blocks_by_key(self.tool_context.kept_results)) - return keys + return self.blackboard.referenced_result_keys() def _remember_feedback_error(self, errors: list[str], text: str) -> None: text = " ".join(text.split()) @@ -6232,9 +5994,6 @@ def _remember_feedback_error(self, errors: list[str], text: str) -> None: def _remember_agent_error(self, text: str) -> None: self._remember_feedback_error(self.agent_feedback_errors, text) - def _remember_observe_error(self, text: str) -> None: - self._remember_feedback_error(self.observe_feedback_errors, text) - def _drop_agent_feedback(self, *markers: str) -> None: lowered = tuple(marker.lower() for marker in markers if marker) if not lowered: @@ -6312,31 +6071,19 @@ def _format_gate_debug_details(self, response: Json, format_error: str) -> str: return _shorten(format_error, 180) + "\nFull bad output:\n" + bad_output def _step_prompts(self) -> tuple[str, str, str]: - if self.mode == AgentMode.OBSERVE: - system_prompt = self._system_prompt(AGENT_OBSERVE_SYSTEM_PROMPT) - user_prompt = self.build_observe_prompt() - activity = "observe" - else: - system_prompt = self._system_prompt() - user_prompt = self.build_user_prompt() - activity = "agent" - return system_prompt, user_prompt, activity + return self._system_prompt(), self.build_user_prompt(), "agent" def _tool_schemas(self) -> list[Json]: - if self.mode == AgentMode.OBSERVE: - action_names = self.OBSERVE_ACTION_TYPES - {"tool", "keep", "forget"} - tool_classes: Iterable[ToolClass] = CONTEXT_TOOL_CLASSES - else: - action_names = self.ACT_ACTION_TYPES - {"tool", "forget"} - tool_classes = tuple(TOOL_REGISTRY.values()) - if not CodeIndex(self.session).available(): - tool_classes = tuple(tool for tool in tool_classes if tool is not InspectCodeTool) + action_names = self.ACT_ACTION_TYPES - {"tool", "forget"} + tool_classes: Iterable[ToolClass] = tuple(TOOL_REGISTRY.values()) + if not CodeIndex(self.session).available(): + tool_classes = tuple(tool for tool in tool_classes if tool is not InspectCodeTool) actions = [_state_tool_schema(name) for name in STATE_TOOL_PARAMS if name in action_names] return actions + [tool.tool_schema() for tool in tool_classes] def step(self, *, on_message: MessageCallback | None = None) -> Json: - system_prompt, user_prompt, activity = self._step_prompts() - response = self.request(system_prompt, user_prompt, activity=activity, on_message=on_message, tool_schemas=self._tool_schemas()) + system_prompt, user_prompt, activity, tool_schemas = self._prepare_request_context() + response = self.request(system_prompt, user_prompt, activity=activity, on_message=on_message, tool_schemas=tool_schemas) if _json_str(response.get("_format_error")): return response invalid_response = self._validate_action_response(response) @@ -6394,16 +6141,16 @@ def on_stream_action(action: Json) -> bool: return True if is_tool and any(execution.outcome != "success" for execution in self.tool_runner.latest_executions): return True - return self.mode == AgentMode.OBSERVE + return False - system_prompt, user_prompt, activity = self._step_prompts() + system_prompt, user_prompt, activity, tool_schemas = self._prepare_request_context() response = self.request( system_prompt, user_prompt, activity=activity, on_message=on_message, on_stream_action=on_stream_action, - tool_schemas=self._tool_schemas(), + tool_schemas=tool_schemas, ) if committed: return latest_result, response, True @@ -6415,7 +6162,7 @@ def on_stream_action(action: Json) -> bool: return self.handle_response(response, confirm=confirm, on_auto_approve=on_auto_approve, on_message=on_message), response, False def _can_stream_tools(self) -> bool: - return self.mode == AgentMode.ACT and isinstance(self.model_client, ModelClient) and self.session.config.provider.stream is not False + return isinstance(self.model_client, ModelClient) and self.session.config.provider.stream is not False def apply_response(self, response: Json) -> list[str]: actions = self._response_actions(response) @@ -6424,13 +6171,11 @@ def apply_response(self, response: Json) -> list[str]: response = {**response, "actions": [action for action in actions if not self._is_pending_check_action(action)]} actions = self._response_actions(response) if self._goal_changes_task(actions): - self.tool_context.kept_results = [] self.tool_context.compact_observed(self.tool_context.recent + self.tool_context.latest) self._mark_memory_checkpoint() self.blackboard.leads = [] self.state_updater.apply(response) - forgotten = self.tool_context.forget_results(ToolResultContext.forget_result_keys_from_actions(actions)) - return forgotten + return [] def _goal_changes_task(self, actions: list[Json]) -> bool: if not self.blackboard.goal: @@ -6454,11 +6199,8 @@ def execute_tool_calls( confirm: ConfirmCallback | None = None, on_auto_approve: ToolDisplayCallback | None = None, append_to_latest: bool = False, - context_keep_blocks: list[str] | None = None, ) -> str: self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) - self.latest_context_tool_kept = [] - self.latest_context_tool_forgotten = [] self.latest_context_tool_recalled = [] regular_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name not in CONTEXT_TOOL_NAMES] if regular_executions: @@ -6471,14 +6213,11 @@ def execute_tool_calls( self._apply_context_tool_executions( self.tool_runner.latest_executions, append_to_latest=append_to_latest or bool(regular_executions), - keep_source_blocks=context_keep_blocks, ) self.session.state.turn_tool_calls += len(self.tool_runner.latest_executions) self.session.state.session_tool_calls += len(self.tool_runner.latest_executions) for execution in self.tool_runner.latest_executions: self._after_tool_execution(execution) - if self._should_observe_after_tools(): - self.mode = AgentMode.OBSERVE return "\n\n".join(self.tool_context.latest) def _apply_context_tool_executions( @@ -6486,7 +6225,6 @@ def _apply_context_tool_executions( executions: list[ToolCallExecution], *, append_to_latest: bool, - keep_source_blocks: list[str] | None, ) -> None: for execution in executions: if execution.outcome != "success": @@ -6501,47 +6239,6 @@ def _apply_context_tool_executions( append=append_to_latest or bool(self.tool_context.latest), ) ) - elif execution.call.name == ForgetTool.NAME: - self.latest_context_tool_forgotten.extend(self.tool_context.forget_results(_tool_result_keys_from_args(execution.call.args))) - elif execution.call.name == KeepTool.NAME: - source_blocks = keep_source_blocks if keep_source_blocks is not None else self._visible_raw_tool_result_blocks() - self.latest_context_tool_kept.extend( - self.tool_context.keep_result_keys( - _tool_result_keys_from_args(execution.call.args), - source_blocks, - max_chars=self.context_budget().kept_chars, - max_block_chars=self.context_budget().kept_block_chars, - ) - ) - - def _visible_raw_tool_result_blocks(self) -> list[str]: - checkpoint = self.blackboard.memory_checkpoint_tool_result_counter - return self.tool_context.unreduced_blocks(checkpoint) + self.tool_context.latest_raw_blocks() + self.tool_context.kept_results - - def _should_observe_after_tools(self) -> bool: - pending = self._unreferenced_unreduced_blocks() - if not pending: - return False - budget = self.context_budget() - return ( - len(pending) >= budget.observe_after_results - or self._projected_unreduced_context_chars(pending) >= budget.raw_chars - ) - - def _projected_unreduced_context_chars(self, blocks: list[str]) -> int: - budget = self.context_budget() - file_context = ToolResultContext.format_file_context( - blocks, - cwd=self.session.cwd, - max_chars=budget.raw_chars + budget.kept_chars, - ) - discovery_context = ToolResultContext.format_discovery_context( - blocks, - max_chars=max(1, budget.raw_chars // 3), - ) - tool_results = "\n\n".join(ToolResultContext.render_blocks_for_prompt(blocks)) - tool_index = "\n".join(ToolResultContext.compact_block(block) for block in blocks) - return len("\n\n".join(part for part in (discovery_context, file_context, tool_index, tool_results) if part)) def _unreferenced_unreduced_blocks(self) -> list[str]: return self.tool_context.unreduced_blocks( @@ -6684,38 +6381,6 @@ def _normalize_action(action: Json) -> Json: normalized["name"] = tool_name return normalized - def _context_actions_from_tool_calls(self, tool_calls: list[JsonValue]) -> list[Json]: - actions: list[Json] = [] - for value in tool_calls: - try: - call = self.tool_runner.parse_tool_call(value) - except ToolCallArgError: - continue - if call.name == ForgetTool.NAME: - try: - keys = _tool_result_keys_from_args(call.args) - except ToolCallArgError: - continue - actions.append({"type": "forget", "source": keys, "reason": call.intention or "context tool"}) - elif call.name == KeepTool.NAME: - try: - keys = _tool_result_keys_from_args(call.args) - except ToolCallArgError: - continue - actions.append({"type": "keep", "source": keys, "reason": call.intention or "context tool"}) - return actions - - def _non_context_tool_error(self, tool_calls: list[JsonValue]) -> str: - invalid = [] - for value in tool_calls: - try: - call = self.tool_runner.parse_tool_call(value) - except ToolCallArgError: - continue - if call.name not in CONTEXT_TOOL_NAMES: - invalid.append(call.name) - return ", ".join(dict.fromkeys(invalid)) - def _gate_action_types( self, actions: list[Json], @@ -6863,7 +6528,7 @@ def _build_response_context(self, response: Json) -> ResponseContext: tool_calls or pending_check_requested or (assistant_text and actions and not completion_message) - or action_types & {"goal", "plan", "forget", "lead", "known"} + or action_types & {"goal", "plan", "lead", "known"} ), ) @@ -6885,7 +6550,6 @@ def _ingest_queued_user_input(self, poll_user_input: UserInputPoller | None, on_ while user_input := poll_user_input(): self.blackboard.user_input = user_input self.session.state.pending_user_feedback = user_input - self.mode = AgentMode.ACT self.session.append_conversation(UserMessage(content=user_input)) if on_message is not None: on_message("sent: " + user_input) @@ -6903,8 +6567,10 @@ def _gate_protocol_actions(self, ctx: ResponseContext, on_message: MessageCallba ) def _gate_tool_actions(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool: - context_actions = ctx.actions + self._context_actions_from_tool_calls(ctx.tool_calls) - if self._gate_forget_actions(context_actions, on_message, self._remember_agent_error) is not None: + unavailable = sorted({_json_str(_json_dict(call).get("name")) for call in ctx.tool_calls if _json_str(_json_dict(call).get("name")) in REMOVED_CONTEXT_TOOL_NAMES}) + if unavailable: + self._remember_agent_error(self._error("context tools are no longer available in ACT: " + ", ".join(unavailable) + ".")) + self._report_gate(on_message, "Retrying: use Recall or continue work directly.", "Protocol_Gate: unavailable context tool(s): " + ", ".join(unavailable) + ".") return True repeated_tool_retry_error = self._repeated_tool_retry_error(ctx.tool_calls) if repeated_tool_retry_error: @@ -7040,143 +6706,15 @@ def _run_tool_actions( if report: on_message(report) self._emit_tool_context_update( - [*self.latest_context_tool_recalled, *self.latest_context_tool_kept], - self.latest_context_tool_forgotten, + self.latest_context_tool_recalled, + [], on_message, ) if self.session.settings.debug and self.tool_runner.skipped_after_failure_count: on_message(f"Tool Calls Skipped: {self.tool_runner.skipped_after_failure_count} after {self.tool_runner.skipped_after_failure_key} failed") - self.compactor.maybe_compact() + self.apply_context_budget() return True - def _handle_observe_response( - self, - ctx: ResponseContext, - response: Json, - *, - on_message: MessageCallback | None, - ) -> AgentRunResult: - if ctx.pending_check_requested: - self._remember_observe_error(self._warning('ignored verify status="pending".', "observe must keep or forget latest results first.")) - repeated_tool_retry_error = self._repeated_tool_retry_error(ctx.tool_calls) - if repeated_tool_retry_error: - return self._reject_result( - self._remember_observe_error, - on_message, - self._error("repeated failed tool call: " + repeated_tool_retry_error + ".", "observe latest results, then change args or switch tools."), - "Retrying: change the failed tool call instead of repeating it.", - "ToolRetry_Gate: " + repeated_tool_retry_error + ".", - ) - gate_result = self._gate_action_types( - ctx.actions, - allowed=self.OBSERVE_ACTION_TYPES, - on_message=on_message, - retry_message="Retrying: observe latest results.", - feedback_message=self._error("latest results must be observed before more work."), - remember_error=self._remember_observe_error, - ) - if gate_result is not None: - return gate_result - non_context_tool_error = self._non_context_tool_error(ctx.tool_calls) - if non_context_tool_error: - detail = non_context_tool_error + " is not available in OBSERVE; use Keep, Forget, or Recall to reduce current results first. ACT may use work tools after OBSERVE completes." - return self._reject_result( - self._remember_observe_error, - on_message, - self._error(detail, "OBSERVE only accepts context tools: Keep, Forget, Recall."), - "Retrying: OBSERVE only accepts Keep, Forget, or Recall.", - "Protocol_Gate: invalid observe tool(s): " + non_context_tool_error + ".", - ) - context_actions = ctx.actions + self._context_actions_from_tool_calls(ctx.tool_calls) - forget_gate = self._gate_forget_actions(context_actions, on_message, self._remember_observe_error) - if forget_gate is not None: - return forget_gate - observed_blocks = self._unreferenced_unreduced_blocks() - observed_counter = ToolResultContext.max_counter(observed_blocks) - forgotten_keys = self.apply_response(response) - self._emit_state_and_text(ctx, on_message) - if ctx.tool_calls: - self.execute_tool_calls(ctx.tool_calls, context_keep_blocks=observed_blocks) - forgotten_keys.extend(self.latest_context_tool_forgotten) - self.mode = AgentMode.ACT - kept_keys = self.tool_context.keep_results( - ctx.actions, - observed_blocks, - max_chars=self.context_budget().kept_chars, - max_block_chars=self.context_budget().kept_block_chars, - ) - kept_keys.extend(self.latest_context_tool_kept) - self.tool_context.compact_observed(observed_blocks) - self._mark_memory_checkpoint(observed_counter) - self.observe_feedback_errors = [] - self._warn_weak_observe_memory(context_actions) - self._emit_tool_context_update(kept_keys, forgotten_keys, on_message) - self._promote_required_checks(ctx) - return AgentRunResult() - - def _warn_weak_observe_memory(self, actions: list[Json]) -> None: - if any(_json_str(action.get("type")) in {"keep", "forget", "lead"} for action in actions): - return - known_actions = [action for action in actions if _json_str(action.get("type")) == "known"] - if not known_actions: - return - for action in known_actions: - for raw in _json_list(action.get("items")): - item = KnownItem.from_json(raw) - if item is not None and KnownItem.source_of(item): - return - self._remember_observe_error( - self._warning( - "weak observe memory: known facts need source tr.N or keep/forget coverage.", "use source-backed Facts/Leads or keep important raw results." - ) - ) - - def _forget_tool_result_error(self, actions: list[Json]) -> str: - keys = ToolResultContext.forget_result_keys_from_actions(actions) - if not any(_json_str(action.get("type")) == "forget" for action in actions): - return "" - if not keys: - return "missing tr.* source" - visible_keys = set(ToolResultContext.blocks_by_key(self.tool_context.kept_results + self.tool_context.latest + self.tool_context.recent)) - missing = [key for key in keys if key not in visible_keys] - return "not in visible tool results: " + ", ".join(missing) if missing else "" - - def _gate_forget_actions( - self, - actions: list[Json], - on_message: MessageCallback | None, - remember_error: Callable[[str], None], - ) -> AgentRunResult | None: - forget_error = self._forget_tool_result_error(actions) - if forget_error: - return self._reject_result( - remember_error, - on_message, - self._error("invalid forget: " + forget_error + ".", self.RULE_VISIBLE_RESULTS), - "Retrying: forget only visible tool result keys.", - "ToolResult_Gate: " + forget_error + ".", - ) - forgotten = set(ToolResultContext.forget_result_keys_from_actions(actions)) - released = set() - for action in actions: - values = _json_list(action.get("items")) if _json_str(action.get("type")) == "lead" else [] - for raw in values: - item = Lead.from_json(raw) - if item is not None and item.status != LeadStatus.ACTIVE: - released.update(key for key in item.source if key.startswith("tr.")) - protected = self.blackboard.protected_result_sources() - conflict = sorted((forgotten & set(protected)) - released) - forget_protected_error = "protected source: " + ", ".join(key + " (" + protected[key] + ")" for key in conflict) if conflict else "" - if forget_protected_error: - return self._reject_result( - remember_error, - on_message, - self._error("forget conflicts with protected result source: " + forget_protected_error + ".", self.RULE_CLOSE_SOURCE), - "Retrying: close dependent state before forgetting its source result.", - "ToolResult_Gate: " + forget_protected_error + ".", - ) - return None - def _emit_tool_context_update(self, kept: list[str], forgotten: list[str], on_message: MessageCallback | None) -> None: if on_message is None or not (kept or forgotten): return @@ -7254,9 +6792,6 @@ def run( checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, ) self._prune_tool_result_store() - self.mode = AgentMode.ACT - self.latest_context_tool_kept = [] - self.latest_context_tool_forgotten = [] self.latest_context_tool_recalled = [] self.session.state.turn_tool_calls = 0 self.session.state.turn_model_calls = 0 @@ -7272,9 +6807,8 @@ def run( self.blackboard.task_code = TaskCode.NEW self.blackboard.goal_reached = False self.blackboard.checks_required = False - self.observe_feedback_errors = [] self.blackboard.checks.reset() - self.compactor.maybe_compact() + self.apply_context_budget() self.session.append_conversation(UserMessage(content=user_input)) def before_step(_index: int, _max_steps: int) -> None: @@ -7314,9 +6848,6 @@ def handle_response( ctx = self._build_response_context(response) feedback_checkpoint = len(self.agent_feedback_errors) DebugTrace.handle_event(self, "handle-start", ctx, response) - if self.mode == AgentMode.OBSERVE: - return self._handle_observe_response(ctx, response, on_message=on_message) - if self._gate_protocol_actions(ctx, on_message) or self._gate_tool_actions(ctx, on_message) or self._gate_task_state(ctx, on_message): DebugTrace.handle_event(self, "handle-gated-before-apply", ctx, response) return AgentRunResult() @@ -7716,9 +7247,11 @@ def _status(self, args: str) -> str: + self._format_bool(session.settings.yolo) + " compact_at=" + str(session.settings.compact_at) + + "%" + " context_budget=" + session.settings.context_budget, - "conversation: " + str(len(session.state.conversation)) + "/" + str(session.settings.compact_at), + "context: " + str(session.state.last_context_percent) + "% (" + str(session.state.last_context_chars) + " chars)", + "conversation: " + str(len(session.state.conversation)) + " item(s)", "tool_calls: turn=" + str(session.state.turn_tool_calls) + " session=" + str(session.state.session_tool_calls), "tools: code_index=" + code_index, "tokens: last=" + _format_count(session.state.last_total_tokens) + " session=" + _format_count(session.state.session_total_tokens), @@ -7740,18 +7273,23 @@ def _compact(self, args: str) -> str: if args: return "Usage: /compact" - def compact_history() -> str: - before = len(self.agent.session.state.conversation) - count = self.agent.compact_history() + def compact_context() -> str: + before_conversation = len(self.agent.session.state.conversation) + before_raw = len(self.agent.tool_context.unreduced_blocks(self.agent.blackboard.memory_checkpoint_tool_result_counter)) + count = self.agent.compact_context() if count: - return "Compacted conversation history: " + str(count) + " item(s) -> " + str(len(self.agent.session.state.conversation)) + " item(s)" + return "Compacted context: " + str(count) + " item(s)" return ( - "Conversation history is empty" - if before == 0 - else "Nothing to compact: " + str(before) + " item(s), keeping recent " + str(ConversationCompactor.KEEP_RECENT) + "." + "Context is empty" + if before_conversation == 0 and before_raw == 0 + else "Nothing to compact: conversation=" + + str(before_conversation) + + " item(s), raw_results=" + + str(before_raw) + + "." ) - return self._with_status(compact_history) + return self._with_status(compact_context) def _index(self, args: str) -> str: value = args.strip() @@ -7778,7 +7316,7 @@ def _format_context_budget(self) -> str: "kept_chars: " + str(budget.kept_chars), "kept_block_chars: " + str(budget.kept_block_chars), "index_items: " + str(budget.index_items), - "observe_after_results: " + str(budget.observe_after_results), + "prompt_chars: " + str(budget.prompt_chars), ] ) @@ -7809,7 +7347,7 @@ def _config(self, args: str) -> str: "paths.project_dir: " + session.project_dir(), "paths.session_dir: " + session.session_dir(), "paths.history: " + session.history_path(), - "runtime.compact_at: " + str(session.settings.compact_at), + "runtime.compact_at: " + str(session.settings.compact_at) + "%", "runtime.shell_timeout: " + str(session.settings.shell_timeout), "runtime.max_agent_steps: " + str(session.settings.max_agent_steps), "runtime.context_budget: " + session.settings.context_budget, @@ -7833,8 +7371,9 @@ def _set(self, args: str) -> str: return error suffix = "" if key == "runtime.compact_at": - compacted = self._with_status(lambda: "yes" if self.agent.compactor.maybe_compact() else "") == "yes" - suffix = " and compacted history" if compacted else "" + should_compact = self.agent._should_compact_context(self.agent.session.state.last_context_percent) + compacted = should_compact and self._with_status(lambda: "yes" if self.agent.compact_context() else "") == "yes" + suffix = " and compacted context" if compacted else "" return "Set " + key + " = " + self._config_value(key) + suffix def _config_value(self, key: str) -> str: @@ -7846,6 +7385,8 @@ def _config_value(self, key: str) -> str: return value or "(empty)" if key == "provider.temperature": return self._format_optional(value) + if key == "runtime.compact_at": + return str(value) + "%" return str(value) def _apply_config_value(self, key: str, value: str) -> str: @@ -7881,6 +7422,15 @@ def _apply_config_value(self, key: str, value: str) -> str: if key == "runtime.context_budget": self.agent.apply_context_budget() return "" + if key == "runtime.compact_at": + raw_percent = value.removesuffix("%") + try: + parsed_int = int(raw_percent) + parsed_int = RuntimeSettings.clean_compact_at(parsed_int) + except (ValueError, ConfigError): + return "Usage: /set runtime.compact_at <0-100[%]>" + setattr(target, attr, parsed_int) + return "" if key in CONFIG_INT_KEYS: try: parsed_int = int(value) @@ -8002,7 +7552,7 @@ def _format_line(self, turn_elapsed: float, *, now: float, show_elapsed: bool) - model = active_model.rsplit("/", 1)[-1] or active_model or "(no model)" reasoning = session.state.current_model_call_reasoning_label or (session.config.provider.reasoning) modes = " | yolo" if session.settings.yolo else "" - context = str(len(session.state.conversation)) + "/" + str(session.settings.compact_at) + context = str(session.state.last_context_percent) + "%" last_tokens = _format_count(session.state.last_total_tokens) session_tokens = _format_count(session.state.session_total_tokens) rate = session.state.last_model_call_rate @@ -8013,7 +7563,7 @@ def _format_line(self, turn_elapsed: float, *, now: float, show_elapsed: bool) - if show_elapsed: parts.append(f"turn:{turn_elapsed:.1f}s") if session.state.current_model_call_started_at > 0: - activity = {"compact": "compacting", "observe": "observing"}.get(session.state.current_model_call_activity, "working") + activity = {"compact": "compacting"}.get(session.state.current_model_call_activity, "working") if session.state.current_model_call_has_content: activity += "*" elapsed = max(0.0, now - session.state.current_model_call_started_at) diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index ef143a3..b720214 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -352,7 +352,7 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path, monkeypat (tmp_path / name).write_text(name + "\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) agent = Agent(session) - _set_context_budget(monkeypatch, agent, index_items=2, observe_after_results=4) + _set_context_budget(monkeypatch, agent, index_items=2) for name in ["one.txt", "two.txt", "three.txt", "four.txt"]: agent.execute_tool_calls([{"name": "Read", "intention": "read " + name, "args": _read_args(name, line_range=[0, 1])}]) @@ -367,7 +367,6 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path, monkeypat assert "" in latest assert "" in recent assert len(agent.tool_context.recent) == 3 - assert agent.mode == nanocode.AgentMode.OBSERVE context = _observe_tool_result_context(agent) assert "one.txt" in context assert "two.txt" in context @@ -377,48 +376,11 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path, monkeypat assert len(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) == 4 -def test_agent_observes_full_latest_result_when_it_becomes_recent(tmp_path, monkeypatch): - (tmp_path / "one.txt").write_text("one\n", encoding="utf-8") - (tmp_path / "two.txt").write_text("two\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2) - - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) - - context = _observe_tool_result_context(agent) - assert agent.mode == nanocode.AgentMode.OBSERVE - assert "one.txt" in context - assert "" in context - assert "one\n" in context - assert "two.txt" in context - recent = _blocks_text(agent.tool_context.recent) - assert "key=tr.1" in recent - assert "" in recent - assert agent.blackboard.memory_checkpoint_tool_result_counter == 0 - - agent.handle_response( - { - "actions": [ - {"type": "keep", "source": ["tr.1"], "reason": "one.txt remains useful"}, - {"type": "forget", "source": ["tr.2"], "reason": "two.txt is not needed"}, - ] - } - ) - - assert agent.blackboard.memory_checkpoint_tool_result_counter == 2 - assert agent.mode == nanocode.AgentMode.ACT - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - assert "recall=tr.1" in _blocks_text(agent.tool_context.recent) - assert "" not in _blocks_text(agent.tool_context.recent) - assert "recall=tr.2" in _blocks_text(agent.tool_context.latest) - - -def test_referenced_unreduced_results_do_not_count_toward_observe_threshold(tmp_path, monkeypatch): +def test_referenced_unreduced_results_are_excluded_from_pending_context(tmp_path, monkeypatch): for name in ["one.txt", "two.txt", "three.txt"]: (tmp_path / name).write_text(name + "\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2) + _set_context_budget(monkeypatch, agent, raw_chars=10_000) agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) agent.apply_response({"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "one.txt was inspected."}]}]}) @@ -431,28 +393,20 @@ def test_referenced_unreduced_results_do_not_count_toward_observe_threshold(tmp_ agent.execute_tool_calls([{"name": "Read", "intention": "read three", "args": _read_args("three.txt", line_range=[0, 1])}]) - assert agent.mode == nanocode.AgentMode.OBSERVE - observe_prompt = agent.build_observe_prompt() - file_context = _prompt_section(observe_prompt, "File Context", "Kept Tool Results") - observe_raw = observe_prompt.split("Unreduced Raw Tool Results:\n", 1)[1].split("\n--- Blocking Feedback ---", 1)[0] - assert "one.txt" not in file_context + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Unreduced Tool Results") + assert "one.txt" in file_context assert "two.txt" in file_context assert "three.txt" in file_context - assert "" not in observe_raw - assert "content=file_context" in observe_raw -def test_unsourced_known_does_not_cover_unreduced_result(tmp_path, monkeypatch): +def test_unsourced_known_does_not_cover_unreduced_result(tmp_path): (tmp_path / "one.txt").write_text("one\n", encoding="utf-8") (tmp_path / "two.txt").write_text("two\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=2) - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) agent.apply_response({"actions": [{"type": "known", "items": ["one.txt was inspected."]}]}) agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) - assert agent.mode == nanocode.AgentMode.OBSERVE assert agent.blackboard.memory_checkpoint_tool_result_counter == 0 assert [nanocode.ToolResultContext.result_key(block) for block in agent._unreferenced_unreduced_blocks()] == ["tr.1", "tr.2"] @@ -468,7 +422,7 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk assert agent.mode == nanocode.AgentMode.ACT assert "key=tr.1" in _blocks_text(agent.tool_context.recent) - index, unreduced, latest, hygiene = agent._format_act_tool_result_context() + index, unreduced, latest = agent._format_act_tool_result_context() assert "one.txt" in unreduced assert "|one" not in unreduced assert "content=file_context" in unreduced @@ -478,14 +432,7 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk assert "recall=tr.1" in index assert "recall=tr.2" in index assert "output:\n" not in index - assert "- latest raw keys: tr.2" in hygiene - assert "- unreduced raw keys: tr.1" in hygiene - assert "- visible file ranges already available:" in hygiene - assert "one.txt: 0:1 source=tr.1" in hygiene - assert "two.txt: 0:1 source=tr.2" in hygiene - assert "use visible File Context line anchors before Read" in hygiene - assert "Forget stale/noisy raw keys" in hygiene - file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Unreduced Tool Results") assert "File: one.txt" in file_context assert "|one" in file_context assert "File: two.txt" in file_context @@ -503,7 +450,7 @@ def test_act_prompt_file_context_replaces_overlapping_read_lines(tmp_path, monke agent.execute_tool_calls([{"name": "Read", "intention": "read overlap", "args": _read_args("sample.txt", line_range=[1, 3])}]) prompt = agent.build_user_prompt() - file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + file_context = _prompt_section(prompt, "File Context", "Unreduced Tool Results") latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") unreduced = _prompt_section(prompt, "Unreduced Tool Results", "Latest Tool Results") assert "File: sample.txt" in file_context @@ -534,7 +481,7 @@ def test_act_prompt_file_context_uses_edit_result_as_newest_file_content(tmp_pat ) prompt = agent.build_user_prompt() - file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + file_context = _prompt_section(prompt, "File Context", "Unreduced Tool Results") latest = _prompt_section(prompt, "Latest Tool Results", "Current Input") assert path.read_text(encoding="utf-8") == "old0\nnew1\nold2\n" assert "File: sample.txt" in file_context @@ -560,7 +507,7 @@ def test_act_prompt_file_context_omits_stale_read_lines_after_external_change(tm agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 2])}]) path.write_text("changed0\nchanged1\n", encoding="utf-8") - file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Unreduced Tool Results") assert "File: sample.txt" not in file_context assert "|old0" not in file_context assert "|old1" not in file_context @@ -577,7 +524,7 @@ def test_act_prompt_file_context_keeps_matching_lines_after_external_stat_change agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) path.write_text("alpha\nBETA changed\n", encoding="utf-8") - file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Kept Tool Results") + file_context = _prompt_section(agent.build_user_prompt(), "File Context", "Unreduced Tool Results") assert "File: sample.txt" in file_context assert "0:1 source=tr.1" in file_context assert "|alpha" in file_context @@ -620,7 +567,7 @@ def test_recall_read_reactivates_original_result_for_file_context(tmp_path): assert list(session.state.tool_result_store) == ["tr.1"] assert session.state.tool_result_counter == 1 prompt = agent.build_user_prompt() - file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + file_context = _prompt_section(prompt, "File Context", "Unreduced Tool Results") latest_results = _prompt_section(prompt, "Latest Tool Results", "Current Input") assert "File: sample.txt" in file_context assert "0:1 source=tr.1" in file_context @@ -645,7 +592,7 @@ def test_recalled_read_does_not_override_newer_read(tmp_path): assert list(session.state.tool_result_store) == ["tr.1", "tr.2"] assert session.state.tool_result_counter == 2 prompt = agent.build_user_prompt() - file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") + file_context = _prompt_section(prompt, "File Context", "Unreduced Tool Results") latest_results = _prompt_section(prompt, "Latest Tool Results", "Current Input") assert "0:1 source=tr.2" in file_context assert "|new" in file_context @@ -653,74 +600,6 @@ def test_recalled_read_does_not_override_newer_read(tmp_path): assert "tool=Recall" not in latest_results -def test_forget_tool_removes_visible_result_without_new_key(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - session = Session(cwd=str(tmp_path)) - agent = Agent(session) - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - latest = agent.execute_tool_calls([{"name": "Forget", "intention": "drop sample", "args": ["tr.1"]}]) - - assert session.state.tool_result_counter == 1 - assert list(session.state.tool_result_store) == ["tr.1"] - assert "tool=Forget" not in latest - assert "recall=tr.1" in latest - assert "" not in latest - prompt = agent.build_user_prompt() - assert "File Context:\n(empty)" in prompt - - -def test_recall_tool_reactivates_forgotten_result_without_new_key(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - session = Session(cwd=str(tmp_path)) - agent = Agent(session) - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.execute_tool_calls([{"name": "Forget", "intention": "drop sample", "args": ["tr.1"]}]) - agent.execute_tool_calls([{"name": "Recall", "intention": "recall sample", "args": ["tr.1"]}]) - - assert session.state.tool_result_counter == 1 - assert list(session.state.tool_result_store) == ["tr.1"] - prompt = agent.build_user_prompt() - file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") - latest_results = _prompt_section(prompt, "Latest Tool Results", "Current Input") - assert "File: sample.txt" in file_context - assert "|alpha" in file_context - assert "tool=Recall" not in latest_results - - -def test_observe_keep_tool_keeps_result_without_new_key(tmp_path, monkeypatch): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - session = Session(cwd=str(tmp_path)) - agent = Agent(session) - _set_context_budget(monkeypatch, agent, observe_after_results=1) - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - assert agent.mode == nanocode.AgentMode.OBSERVE - agent.handle_response({"actions": [{"type": "tool", "name": "Keep", "intention": "keep sample", "args": ["tr.1"]}]}) - - assert agent.mode == nanocode.AgentMode.ACT - assert session.state.tool_result_counter == 1 - assert list(session.state.tool_result_store) == ["tr.1"] - assert "key=tr.1" in _blocks_text(agent.tool_context.kept_results) - - -def test_empty_observe_compacts_unreduced_tool_results(tmp_path, monkeypatch): - (tmp_path / "one.txt").write_text("one\n", encoding="utf-8") - (tmp_path / "two.txt").write_text("two\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, raw_chars=300, observe_after_results=2) - - agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) - agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) - - agent.handle_response({"actions": [], "_assistant_text": "checking result"}) - - assert agent.blackboard.memory_checkpoint_tool_result_counter == 2 - assert agent.mode == nanocode.AgentMode.ACT - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - - def test_assistant_text_does_not_mark_memory_checkpoint(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") agent = Agent(Session(cwd=str(tmp_path))) @@ -742,47 +621,6 @@ def test_known_action_accepts_source_references(tmp_path): assert "[tr.1, tr.2] Router setup lives in app.py." in agent.build_user_prompt() -def test_observe_prompt_uses_narrow_context(tmp_path): - session = Session(cwd=str(tmp_path)) - session.state.conversation.append(nanocode.UserMessage(content="old conversation text")) - session.state.tool_result_store["tr.9"] = nanocode.ToolResultItem(description="stored result", value="stored raw text") - session.state.user_rules.add("always run tests") - agent = Agent(session) - agent.mode = nanocode.AgentMode.OBSERVE - agent.blackboard.user_input = "fix bug" - agent.blackboard.goal = "fix bug goal" - agent.blackboard.plan = [nanocode.PlanItem(id="p1", text="inspect failing path", status=nanocode.PlanStatus.DOING)] - agent.blackboard.leads = [nanocode.Lead(id="h1", text="cache branch", status=nanocode.LeadStatus.ACTIVE, source=("tr.1",))] - agent.blackboard.known = ["known fact"] - agent.tool_context.kept_results = ['- ok tool=Read args=["old.py"] key=tr.1\n output:\nselected result'] - agent.recent_edits = ["- sample.py: old edit"] - agent.agent_feedback_errors = ["act error"] - agent.observe_feedback_errors = ["observe error"] - agent.tool_context.latest = ['- ok tool=Read args=["sample.py"] key=tr.2\n output:\nraw alpha'] - - prompt = agent.build_observe_prompt() - - assert "fix bug" in prompt - assert "always run tests" not in prompt - assert "fix bug goal" in prompt - assert "inspect failing path" in prompt - assert "cache branch" in prompt - assert "known fact" in prompt - assert "selected result" in prompt - assert "raw alpha" in prompt - assert "Observe Errors" in prompt - assert "observe error" in prompt - assert "act error" not in prompt - assert "Conversation History" not in prompt - assert "old conversation text" not in prompt - assert "Tool Result Index" not in prompt - assert "Archived Recall Index" not in prompt - assert "stored raw text" not in prompt - assert "Kept Tool Results" in prompt - assert "Recent Edits" not in prompt - assert "old edit" not in prompt - - def test_act_prompt_includes_current_focus_from_doing_plan_item(tmp_path): agent = Agent(Session(cwd=str(tmp_path))) agent.blackboard.plan = [ @@ -955,64 +793,6 @@ def status_fn(root, *, db_path=None, check=False, max_pending_files=50, format=" assert "- indexed-language-breakdown: python 80 files (62.5%), typescript 48 files (37.5%)" in prompt -def test_act_prompt_includes_kept_tool_results(tmp_path): - (tmp_path / "sample.txt").write_text("alpha unique\n", encoding="utf-8") - (tmp_path / "other.txt").write_text("beta unique\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - - agent.execute_tool_calls( - [ - {"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, - {"name": "Read", "intention": "read other", "args": _read_args("other.txt", line_range=[0, 1])}, - ] - ) - agent.mode = nanocode.AgentMode.OBSERVE - agent.handle_response( - { - "actions": [ - {"type": "keep", "source": ["tr.1"], "reason": "sample has alpha"}, - {"type": "forget", "source": ["tr.2"], "reason": "other.txt is not needed"}, - ] - } - ) - - prompt = agent.build_user_prompt() - assert "Kept Tool Results:" in prompt - file_context = _prompt_section(prompt, "File Context", "Kept Tool Results") - kept = _prompt_section(prompt, "Kept Tool Results", "Unreduced Tool Results") - assert "alpha unique" in file_context - assert "" not in kept - assert "content=file_context" in kept - assert "beta unique" not in prompt - assert len(agent.tool_context.kept_results) == 1 - - -def test_act_prompt_includes_context_hygiene_guidance(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - agent.tool_context.kept_results = ['- ok tool=Read args=["kept.txt"] key=tr.8\n output:\nkept'] - - prompt = agent.build_user_prompt() - hygiene = _prompt_section(prompt, "Context Hygiene", "Discovery Context") - - assert "- kept keys: tr.8" in hygiene - assert "- no visible raw result keys need action now." in hygiene - assert "Before another work tool, use Forget for stale/noisy visible raw result keys" in prompt - assert "Do not Keep raw Read/Search results solely because their File Context or Discovery Context projection is visible." in prompt - - -def test_act_prompt_context_hygiene_lists_visible_file_ranges(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\nbeta\ngamma\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[1, 3])}]) - - prompt = agent.build_user_prompt() - hygiene = _prompt_section(prompt, "Context Hygiene", "Discovery Context") - assert "- visible file ranges already available:" in hygiene - assert "sample.txt: 1:3 source=tr.1" in hygiene - assert "use visible File Context line anchors before Read" in hygiene - - def test_act_prompt_projects_search_results_to_discovery_context(tmp_path): sample = tmp_path / "sample.py" sample.write_text("class StatusBar:\n def elapsed(self):\n return 1\n", encoding="utf-8") @@ -1030,92 +810,6 @@ def test_act_prompt_projects_search_results_to_discovery_context(tmp_path): assert "content=discovery_context" in latest -def test_discovery_context_follows_active_result_lifecycle(tmp_path, monkeypatch): - sample = tmp_path / "sample.py" - sample.write_text("class StatusBar:\n pass\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, observe_after_results=1) - - agent.execute_tool_calls([{"name": "Search", "intention": "find status", "args": _search_args("StatusBar", path="sample.py")}]) - assert agent.mode == nanocode.AgentMode.OBSERVE - assert "Source: tr.1 tool=Search" in _prompt_section(agent.build_observe_prompt(), "Discovery Context", "File Context") - - agent.handle_response({"actions": [{"type": "keep", "source": ["tr.1"], "reason": "status symbol location is still useful"}]}) - assert agent.mode == nanocode.AgentMode.ACT - assert "Source: tr.1 tool=Search" in _prompt_section(agent.build_user_prompt(), "Discovery Context", "File Context") - - agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "location no longer needed"}]}) - assert "Discovery Context:\n(empty)" in agent.build_user_prompt() - - -def test_observed_discovery_result_compacts_out_of_discovery_context(tmp_path, monkeypatch): - sample = tmp_path / "sample.py" - sample.write_text("class StatusBar:\n pass\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, observe_after_results=1) - - agent.execute_tool_calls([{"name": "Search", "intention": "find status", "args": _search_args("StatusBar", path="sample.py")}]) - agent.handle_response({"actions": []}) - - prompt = agent.build_user_prompt() - assert "Discovery Context:\n(empty)" in prompt - assert "content=discovery_context" not in prompt - assert "recall=tr.1" in prompt - - -def test_kept_tool_results_deduplicate_by_tool_key(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.mode = nanocode.AgentMode.OBSERVE - agent.handle_response( - { - "actions": [ - {"type": "keep", "source": ["tr.1", "tr.1"], "reason": "sample contains alpha"}, - {"type": "known", "items": [{"source": ["tr.1"], "text": "sample.txt was inspected."}]}, - ] - } - ) - - assert len(agent.tool_context.kept_results) == 1 - assert agent.tool_context.kept_results[0].count("key=tr.1") == 1 - - -def test_observe_reports_kept_tool_result_keys(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.mode = nanocode.AgentMode.OBSERVE - messages = [] - - agent.handle_response( - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "sample contains alpha"}]}, - on_message=messages.append, - ) - - assert "Tool Result Context: +tr.1" in messages - - -def test_forget_removes_kept_tool_result_but_keeps_known_source(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - _seed_plan(agent, "debug branch") - agent.tool_context.kept_results = [ - '- ok tool=Read args=["a"] key=tr.1\n output:\na', - '- ok tool=Read args=["b"] key=tr.2\n output:\nb', - ] - agent.blackboard.known = [nanocode.KnownItem(text="a was ruled out.", source=("tr.1",))] - messages = [] - - result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "branch ruled out"}]}, on_message=messages.append) - - assert result.done is False - assert "tr.1" not in _blocks_text(agent.tool_context.kept_results) - assert "tr.2" in _blocks_text(agent.tool_context.kept_results) - assert nanocode.KnownItem.source_of(agent.blackboard.known[0]) == ("tr.1",) - assert messages == ["Tool Result Context: -tr.1"] - - def test_lead_action_updates_blackboard_and_report(tmp_path): agent = Agent(Session(cwd=str(tmp_path))) _seed_plan(agent, "debug branch") @@ -1154,130 +848,6 @@ def test_lead_action_updates_blackboard_and_report(tmp_path): assert messages == ["Leads Updated\n 1. [active] h1: admin filtering drops history events [tr.1] context: feed search"] -def test_forget_rejects_active_lead_source(tmp_path): - agent = Agent(_session(tmp_path, debug=True)) - _seed_plan(agent, "debug branch") - agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - agent.blackboard.leads = [nanocode.Lead(text="branch still possible", source=("tr.1",))] - messages = [] - - result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "branch ruled out"}]}, on_message=messages.append) - - assert result.done is False - assert "tr.1" in _blocks_text(agent.tool_context.kept_results) - assert any("protected source: tr.1 (active lead)" in error for error in agent.agent_feedback_errors) - assert messages == ["ToolResult_Gate: protected source: tr.1 (active lead)."] - - -def test_forget_allows_source_when_lead_is_closed_same_response(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - _seed_plan(agent, "debug branch") - agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - agent.blackboard.leads = [nanocode.Lead(id="h1", text="branch still possible", source=("tr.1",))] - messages = [] - - result = agent.handle_response( - { - "actions": [ - { - "type": "lead", - "items": [{"id": "h1", "text": "branch ruled out", "status": "ruled_out", "source": ["tr.1"]}], - }, - {"type": "forget", "source": ["tr.1"], "reason": "branch ruled out"}, - ] - }, - on_message=messages.append, - ) - - assert result.done is False - assert agent.blackboard.leads[0].status == nanocode.LeadStatus.RULED_OUT - assert "tr.1" not in _blocks_text(agent.tool_context.kept_results) - assert messages == [ - "Leads Updated\n 1. [ruled_out] h1: branch ruled out [tr.1]", - "Tool Result Context: -tr.1", - ] - - -def test_forget_allows_source_when_lead_is_dropped_same_response(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - _seed_plan(agent, "debug branch") - agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - agent.blackboard.leads = [nanocode.Lead(id="h1", text="branch lost priority", source=("tr.1",))] - messages = [] - - result = agent.handle_response( - { - "actions": [ - {"type": "lead", "items": [{"id": "h1", "text": "branch no longer matters", "status": "dropped", "source": ["tr.1"]}]}, - {"type": "forget", "source": ["tr.1"], "reason": "branch no longer matters"}, - ] - }, - on_message=messages.append, - ) - - assert result.done is False - assert agent.blackboard.leads[0].status == nanocode.LeadStatus.DROPPED - assert "tr.1" not in _blocks_text(agent.tool_context.kept_results) - assert messages == [ - "Leads Updated\n 1. [dropped] h1: branch no longer matters [tr.1]", - "Tool Result Context: -tr.1", - ] - - -def test_forget_rejects_missing_or_unknown_tool_result_key(tmp_path): - agent = Agent(_session(tmp_path, debug=True)) - _seed_plan(agent, "debug branch") - agent.tool_context.kept_results = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - messages = [] - - result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.2"], "reason": "branch ruled out"}]}, on_message=messages.append) - - assert result.done is False - assert "tr.1" in _blocks_text(agent.tool_context.kept_results) - assert any("not in visible tool results: tr.2" in error for error in agent.agent_feedback_errors) - assert messages == ["ToolResult_Gate: not in visible tool results: tr.2."] - - -def test_observe_forget_does_not_cover_latest_result_key(tmp_path): - agent = Agent(_session(tmp_path, debug=True)) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.kept_results = ['- ok tool=Read args=["old"] key=tr.1\n output:\nold'] - agent.tool_context.latest = ['- ok tool=Read args=["new"] key=tr.2\n output:\nnew'] - messages = [] - - result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "old branch ruled out"}]}, on_message=messages.append) - - assert result.done is False - assert agent.mode == nanocode.AgentMode.ACT - assert "tr.1" not in _blocks_text(agent.tool_context.kept_results) - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - assert messages == ["Tool Result Context: -tr.1"] - - -def test_observe_can_forget_old_kept_result_while_forgetting_latest(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.kept_results = ['- ok tool=Read args=["old"] key=tr.1\n output:\nold'] - agent.tool_context.latest = ['- ok tool=Read args=["new"] key=tr.2\n output:\nnew'] - messages = [] - - result = agent.handle_response( - { - "actions": [ - {"type": "forget", "source": ["tr.1"], "reason": "old branch ruled out"}, - {"type": "forget", "source": ["tr.2"], "reason": "new result is not useful"}, - ] - }, - on_message=messages.append, - ) - - assert result.done is False - assert agent.mode == nanocode.AgentMode.ACT - assert agent.tool_context.kept_results == [] - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - assert messages == ["Tool Result Context: -tr.1 -tr.2"] - - def test_pending_user_feedback_does_not_rewrite_goal_by_default(tmp_path): agent = Agent(Session(cwd=str(tmp_path))) _seed_plan(agent, "implement demo") @@ -1291,26 +861,7 @@ def test_pending_user_feedback_does_not_rewrite_goal_by_default(tmp_path): assert any("Pending User Feedback is not a new task" in error for error in agent.agent_feedback_errors) -def test_keep_tool_results_ignore_non_tool_sources(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.mode = nanocode.AgentMode.OBSERVE - agent.handle_response( - { - "actions": [ - {"type": "keep", "source": ["note.1"], "reason": "invalid source is ignored"}, - {"type": "forget", "source": ["tr.1"], "reason": "invalid source is ignored"}, - ] - } - ) - - assert agent.tool_context.kept_results == [] - assert "alpha\n" not in agent.build_user_prompt() - - -def test_keep_action_is_observe_only(tmp_path): +def test_keep_action_is_invalid(tmp_path): agent = Agent(Session(cwd=str(tmp_path))) _seed_plan(agent, "answer") @@ -1320,200 +871,6 @@ def test_keep_action_is_observe_only(tmp_path): assert any("Invalid action(s): keep" in error for error in agent.agent_feedback_errors) -def test_observe_rejects_invalid_action_and_allows_empty_actions(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.mode = nanocode.AgentMode.OBSERVE - - agent.handle_response({"actions": [{"type": "goal", "text": "answer", "complete": False}]}) - assert any("latest results must be observed" in error for error in agent.observe_feedback_errors) - assert agent.mode == nanocode.AgentMode.OBSERVE - - agent.handle_response({"actions": []}) - - assert agent.mode == nanocode.AgentMode.ACT - assert agent.observe_feedback_errors == [] - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - - -def test_observe_rejects_search_with_context_tool_guidance(tmp_path): - agent = Agent(_session(tmp_path, debug=True)) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - messages = [] - - result = agent.handle_response( - {"actions": [{"type": "tool", "name": "Search", "intention": "keep investigating", "args": _search_args("needle")}]}, - on_message=messages.append, - ) - - assert result.done is False - assert agent.mode == nanocode.AgentMode.OBSERVE - assert messages == ["Protocol_Gate: invalid observe tool(s): Search."] - assert any("Search is not available in OBSERVE" in error for error in agent.observe_feedback_errors) - assert any("Keep, Forget, or Recall" in error for error in agent.observe_feedback_errors) - - -def test_observe_compacts_unmentioned_result_keys_by_default(tmp_path): - agent = Agent(_session(tmp_path, debug=True)) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.latest = [ - '- ok tool=Read args=["a"] key=tr.1\n output:\na', - '- ok tool=Read args=["b"] key=tr.2\n output:\nb', - ] - messages = [] - - result = agent.handle_response( - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "a matters"}]}, - on_message=messages.append, - ) - - assert result.done is False - assert agent.mode == nanocode.AgentMode.ACT - assert "tr.1" in _blocks_text(agent.tool_context.kept_results) - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - assert messages == ["Tool Result Context: +tr.1"] - - -def test_observe_forget_source_covers_result_key(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - - result = agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "not useful"}]}) - - assert result.done is False - assert agent.mode == nanocode.AgentMode.ACT - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - assert agent.tool_context.kept_results == [] - - -def test_observe_known_source_compacts_result_key_by_default(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - - agent.handle_response({"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "a exists"}]}]}) - - assert agent.mode == nanocode.AgentMode.ACT - assert [nanocode.KnownItem.format_item(item) for item in agent.blackboard.known] == ["[tr.1] a exists"] - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - - -def test_observe_warns_on_weak_known_without_source_or_coverage(tmp_path): - agent = Agent(Session(cwd=str(tmp_path))) - agent.mode = nanocode.AgentMode.OBSERVE - agent.tool_context.latest = ['- ok tool=Read args=["a"] key=tr.1\n output:\na'] - - agent.handle_response({"actions": [{"type": "known", "items": ["a exists"]}]}) - - assert agent.mode == nanocode.AgentMode.ACT - assert any("weak observe memory" in error for error in agent.observe_feedback_errors) - assert agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter) == [] - - -def test_kept_tool_results_respect_char_budget(tmp_path, monkeypatch): - agent = Agent(Session(cwd=str(tmp_path))) - agent.mode = nanocode.AgentMode.OBSERVE - _set_context_budget(monkeypatch, agent, kept_chars=100) - agent.tool_context.latest = [ - '- ok tool=Read args=["a"] key=tr.1\n output:\n' + ("a" * 30), - '- ok tool=Read args=["b"] key=tr.2\n output:\n' + ("b" * 30), - ] - - agent.handle_response( - { - "actions": [ - {"type": "keep", "source": ["tr.1", "tr.2"], "reason": "both results matter"} - ] - } - ) - - context = _blocks_text(agent.tool_context.kept_results) - assert "key=tr.1" not in context - assert "key=tr.2" in context - - -def test_kept_tool_results_respect_per_block_char_budget(tmp_path, monkeypatch): - agent = Agent(Session(cwd=str(tmp_path))) - agent.mode = nanocode.AgentMode.OBSERVE - _set_context_budget(monkeypatch, agent, kept_chars=10_000, kept_block_chars=300) - agent.tool_context.latest = [ - '- ok tool=Read args=["large.py"] key=tr.1\n output:\n' + ("head\n" + ("x" * 2000) + "\ntail") - ] - - agent.handle_response({"actions": [{"type": "keep", "source": ["tr.1"], "reason": "large output matters"}]}) - - assert len(agent.tool_context.kept_results[0]) <= agent.context_budget().kept_block_chars - assert "key=tr.1" in agent.tool_context.kept_results[0] - assert "[tool result excerpt]" in agent.tool_context.kept_results[0] - - -def test_observe_checkpoint_clears_observe_errors(tmp_path): - (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") - agent = Agent(Session(cwd=str(tmp_path))) - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.mode = nanocode.AgentMode.OBSERVE - agent.observe_feedback_errors = ["old observe error"] - - agent.handle_response({"actions": [{"type": "keep", "source": ["tr.1"], "reason": "sample.txt contains alpha"}]}) - - assert agent.mode == nanocode.AgentMode.ACT - assert agent.observe_feedback_errors == [] - - -def test_projected_read_context_budget_ignores_replaced_raw_read_blocks(tmp_path, monkeypatch): - session = Session(cwd=str(tmp_path)) - agent = Agent(session) - _set_context_budget(monkeypatch, agent, raw_chars=1_000_000, observe_after_results=99) - path = tmp_path / "sample.txt" - path.write_text("x" * 2000 + "\n", encoding="utf-8") - - for _ in range(4): - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - assert agent.mode == nanocode.AgentMode.ACT - - pending = agent._unreferenced_unreduced_blocks() - raw_chars = agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter) - projected_chars = agent._projected_unreduced_context_chars(pending) - assert projected_chars < raw_chars - - _set_context_budget(monkeypatch, agent, raw_chars=(raw_chars + projected_chars) // 2, observe_after_results=99) - - assert raw_chars >= agent.context_budget().raw_chars - assert agent._projected_unreduced_context_chars(pending) < agent.context_budget().raw_chars - assert agent._should_observe_after_tools() is False - - -def test_projected_raw_output_budget_triggers_observe(tmp_path, monkeypatch): - agent = Agent(Session(cwd=str(tmp_path))) - _set_context_budget(monkeypatch, agent, raw_chars=180, observe_after_results=99) - agent.tool_context.latest = ['- ok tool=Bash args=["big"] key=tr.1\n output:\n' + ("x" * 400)] - - pending = agent._unreferenced_unreduced_blocks() - - assert agent._projected_unreduced_context_chars(pending) >= agent.context_budget().raw_chars - assert agent._should_observe_after_tools() is True - - -def test_referenced_raw_context_does_not_force_observe(tmp_path, monkeypatch): - session = Session(cwd=str(tmp_path)) - agent = Agent(session) - _set_context_budget(monkeypatch, agent, raw_chars=10_000, observe_after_results=99) - path = tmp_path / "sample.txt" - path.write_text("x" * 400 + "\n", encoding="utf-8") - - agent.execute_tool_calls([{"name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]) - agent.apply_response( - {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "sample.txt content was inspected."}]}]} - ) - _set_context_budget(monkeypatch, agent, raw_chars=180, observe_after_results=99) - - assert agent.tool_context.raw_context_chars(agent.blackboard.memory_checkpoint_tool_result_counter, exclude_keys=agent.blackboard.referenced_result_keys()) == 0 - assert agent._should_observe_after_tools() is False - - def test_agent_tool_result_index_has_count_limit(tmp_path, monkeypatch): session = Session(cwd=str(tmp_path)) agent = Agent(session) @@ -1565,13 +922,11 @@ def test_agent_prunes_tool_result_store_but_keeps_referenced_result_keys(tmp_pat for index in range(52): key = "tr." + str(index + 1) session.state.tool_result_store[key] = nanocode.ToolResultItem(description=key, value="value") - agent.tool_context.kept_results = ['- ok tool=Read args=["sample.txt"] key=tr.1\n output:\nvalue'] agent.blackboard.leads = [nanocode.Lead(id="h1", text="kept branch", source=("tr.2",))] agent._prune_tool_result_store() assert len(session.state.tool_result_store) == 50 - assert "tr.1" in session.state.tool_result_store assert "tr.2" in session.state.tool_result_store assert "tr.3" not in session.state.tool_result_store assert "tr.52" in session.state.tool_result_store @@ -2969,10 +2324,9 @@ def test_agent_accepts_goal_without_plan_for_new_task(tmp_path): assert messages == ["Goal Updated\n change map"] -def test_new_goal_clears_task_local_kept_results_only(tmp_path): +def test_new_goal_compacts_task_local_raw_results(tmp_path): agent = Agent(Session(cwd=str(tmp_path))) agent.blackboard.goal = "old goal" - agent.tool_context.kept_results = ['- ok tool=Read args=["old.py"] key=tr.1\n output:\nselected result'] agent.tool_context.latest = ['- ok tool=Read args=["latest.py"] key=tr.3\n output:\nlatest raw'] agent.tool_context.recent = ['- ok tool=Read args=["recent.py"] key=tr.4\n out: 3 lines, 12 chars; recall=tr.4'] @@ -2988,16 +2342,14 @@ def test_new_goal_clears_task_local_kept_results_only(tmp_path): } ) - assert agent.tool_context.kept_results == [] assert "latest.py" in _blocks_text(agent.tool_context.latest) assert "latest raw" not in _blocks_text(agent.tool_context.latest) assert "recent.py" in _blocks_text(agent.tool_context.recent) -def test_same_goal_keeps_task_local_tool_results(tmp_path): +def test_same_goal_keeps_task_local_raw_results(tmp_path): agent = Agent(Session(cwd=str(tmp_path))) agent.blackboard.goal = "same goal" - agent.tool_context.kept_results = ['- ok tool=Read args=["old.py"] key=tr.1\n output:\nselected result'] agent.tool_context.latest = ['- ok tool=Read args=["new.py"] key=tr.2\n output:\npending raw'] agent.apply_response( @@ -3012,7 +2364,6 @@ def test_same_goal_keeps_task_local_tool_results(tmp_path): } ) - assert "selected result" in _blocks_text(agent.tool_context.kept_results) assert "pending raw" in _blocks_text(agent.tool_context.latest) @@ -3296,7 +2647,6 @@ def test_agent_blocks_repeated_identical_failed_tool_call(tmp_path): action = {"type": "tool", "name": "Read", "intention": "bad range", "args": _read_args("sample.txt", line_range=["bad", 1])} agent.handle_response({"actions": [action]}) - agent.handle_response({"actions": [{"type": "forget", "source": ["tr.1"], "reason": "failed read has no useful result"}]}) agent.handle_response({"actions": [action]}) result = agent.handle_response({"actions": [action]}) @@ -3383,7 +2733,7 @@ def __init__(self): { "actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}] }, - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, + {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "Read sample.txt and found alpha."}]}]}, { "actions": [ { @@ -3421,7 +2771,6 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert "alpha" in fake_client.user_prompts[1] assert "" not in fake_client.user_prompts[1] assert "alpha" in fake_client.user_prompts[2] - assert "Kept Tool Results:" in fake_client.user_prompts[2] assert "" not in fake_client.user_prompts[2] assert 'tool=Read args=[{"path":"sample.txt","range":[0,1]}]' in _blocks_text(agent.tool_context.latest) assert agent.tool_context.recent == [] @@ -3496,7 +2845,7 @@ def test_agent_normalizes_protocol_action_type_case(tmp_path): } ) - assert [action["type"] for action in ctx.actions] == ["goal", "plan", "known", "lead", "verify", "user_rule", "forget", "keep", "tool"] + assert [action["type"] for action in ctx.actions] == ["goal", "plan", "known", "lead", "verify", "user_rule", "FORGET", "KEEP", "tool"] def test_agent_accepts_capitalized_goal_action_type(tmp_path): @@ -3605,7 +2954,7 @@ def __init__(self): }, ] }, - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, + {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "Read sample.txt and found alpha."}]}]}, {"actions": [{"type": "goal", "text": "change sample", "complete": True, "message_for_complete": "done"}]}, { "actions": [ @@ -3689,7 +3038,7 @@ def __init__(self): self.responses = [ {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, {"_format_error": "Invalid function-tool response: plain answer", "actions": []}, - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, + {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "Read sample.txt and found alpha."}]}]}, {"actions": _final_actions("read sample")}, ] @@ -3699,7 +3048,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): session = Session(cwd=str(tmp_path)) agent = Agent(session) - _set_context_budget(monkeypatch, agent, observe_after_results=1) + _set_context_budget(monkeypatch, agent) _seed_plan(agent, "read sample") agent.model_client = FakeModelClient() @@ -3713,7 +3062,6 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert "File Context:" in agent.model_client.user_prompts[2] assert "alpha" in agent.model_client.user_prompts[2] assert "" not in agent.model_client.user_prompts[2] - assert "Kept Tool Results:" in agent.model_client.user_prompts[3] assert "alpha" in agent.model_client.user_prompts[3] assert "" not in agent.model_client.user_prompts[3] assert 'tool=Read args=[{"path":"sample.txt","range":[0,1]}]' in _blocks_text(agent.tool_context.latest) @@ -3733,8 +3081,7 @@ def __init__(self): for index in range(51) ] }, - {"actions": [{"type": "forget", "source": ["tr." + str(index) for index in range(1, 52)], "reason": "bulk sample reads are not needed after execution"}]}, - {"actions": _final_actions("read samples")}, + {"actions": _final_actions("read samples")}, ] def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): @@ -3775,7 +3122,7 @@ def __init__(self): self.user_prompts = [] self.responses = [ {"actions": [{"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}]}, - {"actions": [{"type": "forget", "source": ["tr.1"], "reason": "sample content is not needed"}]}, + {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "sample content was inspected."}]}]}, {"actions": _final_actions("read sample", "done too early")}, ] @@ -3866,7 +3213,7 @@ def __init__(self): {"type": "tool", "name": "Read", "intention": "read sample", "args": _read_args("sample.txt", line_range=[0, 1])}, ] }, - {"actions": [{"type": "keep", "source": ["tr.1"], "reason": "keep useful result"}]}, + {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "Read sample.txt and found alpha."}]}]}, { "actions": [ {"type": "plan", "items": [{"id": "p1", "text": "Read sample", "status": "done", "context": "read sample.txt"}]}, @@ -4798,7 +4145,7 @@ def __init__(self): ], "_assistant_text": "reading sample", }, - {"actions": [{"type": "forget", "source": ["tr.1"], "reason": "progress-only read result is not needed"}]}, + {"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "sample.txt was read."}]}]}, {"actions": _final_actions()}, ] diff --git a/tests/test_nanocode_commands.py b/tests/test_nanocode_commands.py index b43c94c..8489e44 100644 --- a/tests/test_nanocode_commands.py +++ b/tests/test_nanocode_commands.py @@ -34,7 +34,7 @@ def __init__(self, **kwargs): return seen -def make_session(tmp_path, *, model: str = "", stream: bool | None = None, compact_at: int = 50) -> Session: +def make_session(tmp_path, *, model: str = "", stream: bool | None = None, compact_at: int = 80) -> Session: provider: dict[str, object] = {"model": model} if stream is not None: provider["stream"] = stream @@ -79,7 +79,7 @@ def test_command_dispatcher_updates_config_and_auto_compacts(tmp_path): assert session.config.provider.first_token_timeout == 6 assert yolo_result.message == "Set runtime.yolo = on" assert session.settings.yolo is True - assert compact_result.message == "Set runtime.compact_at = 2" + assert compact_result.message == "Set runtime.compact_at = 2%" assert session.settings.compact_at == 2 assert context_result.message == "Set runtime.context_budget = low" assert session.settings.context_budget == "low" @@ -106,7 +106,7 @@ def test_status_reports_tokens_in_human_readable_format(tmp_path, monkeypatch): assert "cache: last=400 session=400 rate=40%" in result.message assert "model: model api=chat(auto) reasoning=medium(off) stream=on" in result.message assert "session: " + session.session_id in result.message - assert "runtime: yolo=off compact_at=50" in result.message + assert "runtime: yolo=off compact_at=80%" in result.message assert "models:" in result.message assert "model: calls=2 tokens=2m cached=400" in result.message assert "tool_calls: turn=0 session=0" in result.message @@ -201,7 +201,6 @@ def test_plan_runtime_config_keys_are_removed(tmp_path): def test_context_command_shows_and_sets_budget(tmp_path): session = make_session(tmp_path) agent = Agent(session) - agent.tool_context.kept_results = ['- ok tool=Read args=["large.py"] key=tr.1\n output:\n' + ("x" * 10_000)] dispatcher = CommandDispatcher(agent) show_result = dispatcher.dispatch("/context") @@ -210,10 +209,9 @@ def test_context_command_shows_and_sets_budget(tmp_path): invalid_result = dispatcher.dispatch("/context tiny") assert "context_budget: medium" in show_result.message - assert "observe_after_results: 10" in show_result.message + assert "prompt_chars: 160000" in show_result.message assert set_result.message.startswith("Set runtime.context_budget = low\ncontext_budget: low") assert session.settings.context_budget == "high" - assert len(agent.tool_context.kept_results[0]) <= agent.context_budget().kept_block_chars assert alias_result.message.startswith("Set runtime.context_budget = high\ncontext_budget: high") assert invalid_result.message == "Usage: /context [low|medium|high]" @@ -491,10 +489,9 @@ def test_command_dispatcher_auto_compacts_only_when_history_exceeds_keep_recent( result = dispatcher.dispatch("/set runtime.compact_at 2") - assert result.message == "Set runtime.compact_at = 2 and compacted history" + assert result.message == "Set runtime.compact_at = 2%" assert len(session.state.conversation) == 6 - assert session.state.conversation[0].content == "Conversation compact summary:\nLLM compact summary" - assert session.state.conversation[1].content == "keep 1" + assert session.state.conversation[0].content == "old" def test_command_dispatcher_runs_compact_with_status_runner(tmp_path): @@ -520,7 +517,7 @@ def run_with_status(action): result = dispatcher.dispatch("/compact") assert result.status == CommandStatus.HANDLED - assert result.message == "Compacted conversation history: 6 item(s) -> 6 item(s)" + assert result.message == "Compacted context: 6 item(s)" assert status_calls == ["run"] assert session.state.conversation[0].content == "Conversation compact summary:\nLLM compact summary" @@ -534,7 +531,7 @@ def test_compact_command_reports_short_history(tmp_path): result = dispatcher.dispatch("/compact") assert result.status == CommandStatus.HANDLED - assert result.message == "Nothing to compact: 2 item(s), keeping recent 5." + assert result.message == "Nothing to compact: conversation=2 item(s), raw_results=0." assert len(session.state.conversation) == 2 @@ -555,9 +552,9 @@ def test_command_dispatcher_auto_compact_uses_status_runner(tmp_path): result = dispatcher.dispatch("/set runtime.compact_at 2") - assert result.message == "Set runtime.compact_at = 2 and compacted history" - assert status_calls == ["run"] - assert session.state.conversation[0].content == "Conversation compact summary:\nLLM compact summary" + assert result.message == "Set runtime.compact_at = 2%" + assert status_calls == [] + assert session.state.conversation[0].content == "old" def test_command_dispatcher_reports_unhandled_input(tmp_path): diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py index 7cb010f..727cfd2 100644 --- a/tests/test_nanocode_loop.py +++ b/tests/test_nanocode_loop.py @@ -6,7 +6,7 @@ from nanocode import AgentLoop, CommandLexer, Config, ConfigFile, Blackboard, ParsedToolCall, RuntimeSettings, Session, StatusBar, ToolCallDisplayFormatter -def make_session(tmp_path, *, model: str = "", compact_at: int = 50, yolo: bool = False) -> Session: +def make_session(tmp_path, *, model: str = "", compact_at: int = 80, yolo: bool = False) -> Session: data = { "provider": {"active": "default", "default": {"model": model}}, "paths": {"data_dir": str(tmp_path / ".nanocode")}, @@ -87,7 +87,7 @@ def test_init_config_file_writes_default_toml(tmp_path): assert "chat_reasoning" not in config["provider"]["default"] assert config["provider"]["default"]["timeout"] == 180 assert config["provider"]["default"]["first_token_timeout"] == 90 - assert config["runtime"]["compact_at"] == 50 + assert config["runtime"]["compact_at"] == 80 assert config["runtime"]["context_budget"] == "medium" assert config["runtime"]["auto_clean_recent"] == "1d" assert config["runtime"]["yolo"] is False @@ -168,14 +168,14 @@ def test_status_bar_text_has_visible_sweep_marker(tmp_path): assert ">" not in text assert "model (medium)" in text - assert "ctx:0/9" in text + assert "ctx:0%" in text assert "tool:3" in text assert "tok:last:42 sess:1k" in text assert "turn:1.2s" in text assert all(style.startswith("#") for style, _ in fragments) assert len({style for style, _ in fragments}) > 3 snapshot = _status_text(bar) - assert snapshot == "model (medium) | ctx:0/9 | tool:3 | tok:last:42 sess:1k" + assert snapshot == "model (medium) | ctx:0% | tool:3 | tok:last:42 sess:1k" assert ">" not in snapshot @@ -199,9 +199,6 @@ def test_status_bar_shows_current_model_call_number(tmp_path): assert "10t/s" in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) session.state.current_model_call_has_content = False - session.state.current_model_call_activity = "observe" - assert "observing(2):0.6s" in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) - session.state.current_model_call_activity = "compact" assert "compacting(2):0.6s" in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) @@ -210,7 +207,7 @@ def test_status_bar_shows_active_modes(tmp_path): session = make_session(tmp_path, model="provider/model", yolo=True) bar = StatusBar(session) - assert _status_text(bar) == "model (medium) | yolo | ctx:0/50 | tool:0 | tok:last:- sess:-" + assert _status_text(bar) == "model (medium) | yolo | ctx:0% | tool:0 | tok:last:- sess:-" def test_status_bar_shows_recent_status_notice(tmp_path): @@ -997,5 +994,5 @@ def __init__(self): assert kwargs["refresh_interval"] == StatusBar.INTERVAL assert callable(kwargs["bottom_toolbar"]) assert "".join(text for _, text in kwargs["bottom_toolbar"]()) == ( - "model (medium) | ctx:0/50 | tool:0 | tok:last:- sess:-" + "model (medium) | ctx:0% | tool:0 | tok:last:- sess:-" ) From 9eabfd80c7203c732795b56f77c31ca18e5a9941 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 06:12:14 -0700 Subject: [PATCH 22/29] Use token-based context sizing in status bar --- nanocode.py | 57 ++++++++++++++++++++------------- tests/test_nanocode_commands.py | 2 +- tests/test_nanocode_loop.py | 14 ++++---- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/nanocode.py b/nanocode.py index c79542f..0cb36a7 100644 --- a/nanocode.py +++ b/nanocode.py @@ -596,14 +596,14 @@ class ContextBudget: kept_chars: int kept_block_chars: int index_items: int - prompt_chars: int + prompt_tokens: int planless_discovery_tool_calls: int CONTEXT_BUDGETS: dict[str, ContextBudget] = { - "low": ContextBudget(36_000, 16_000, 4_000, 20, 80_000, 6), - "medium": ContextBudget(72_000, 32_000, 6_000, 30, 160_000, 8), - "high": ContextBudget(120_000, 64_000, 8_000, 60, 240_000, 12), + "low": ContextBudget(36_000, 16_000, 4_000, 20, 64_000, 6), + "medium": ContextBudget(72_000, 32_000, 6_000, 30, 128_000, 8), + "high": ContextBudget(120_000, 64_000, 8_000, 60, 256_000, 12), } @@ -846,7 +846,7 @@ class AgentRunResult: @dataclass class RuntimeState: debug_prompt_count: int = 0 - last_context_chars: int = 0 + last_context_tokens: int = 0 last_context_percent: int = 0 last_prompt_tokens: int = 0 last_completion_tokens: int = 0 @@ -4873,6 +4873,10 @@ def _record_usage(self, usage: Json, config: ProviderConfig, *, elapsed: float = if completion_tokens > 0 and elapsed > 0: self.session.state.last_model_call_rate = completion_tokens / elapsed self.session.state.last_prompt_tokens = prompt_tokens + if prompt_tokens > 0: + budget = CONTEXT_BUDGETS[self.session.settings.context_budget] + self.session.state.last_context_tokens = prompt_tokens + self.session.state.last_context_percent = _ceil_percent(prompt_tokens, budget.prompt_tokens) self.session.state.last_completion_tokens = completion_tokens self.session.state.last_total_tokens = total_tokens self.session.state.last_cached_prompt_tokens = cached_prompt_tokens @@ -5810,20 +5814,17 @@ def compact_context(self) -> int: self.apply_context_budget() return compacted_conversation + len(observed_blocks) - def _prompt_context_chars(self, system_prompt: str, user_prompt: str, tool_schemas: list[Json]) -> int: + def _prompt_context_tokens(self, system_prompt: str, user_prompt: str, tool_schemas: list[Json]) -> int: schema_chars = len(json.dumps(tool_schemas, ensure_ascii=False, sort_keys=True, separators=(",", ":"))) if tool_schemas else 0 - return len(system_prompt) + len(user_prompt) + schema_chars + return _estimate_prompt_tokens(len(system_prompt) + len(user_prompt) + schema_chars) - def _context_percent(self, chars: int) -> int: - budget_chars = max(1, self.context_budget().prompt_chars) - if chars <= 0: - return 0 - return max(1, (chars * 100 + budget_chars - 1) // budget_chars) + def _context_percent(self, tokens: int) -> int: + return _ceil_percent(tokens, self.context_budget().prompt_tokens) def _record_context_size(self, system_prompt: str, user_prompt: str, tool_schemas: list[Json]) -> int: - chars = self._prompt_context_chars(system_prompt, user_prompt, tool_schemas) - percent = self._context_percent(chars) - self.session.state.last_context_chars = chars + tokens = self._prompt_context_tokens(system_prompt, user_prompt, tool_schemas) + percent = self._context_percent(tokens) + self.session.state.last_context_tokens = tokens self.session.state.last_context_percent = percent return percent @@ -7250,7 +7251,7 @@ def _status(self, args: str) -> str: + "%" + " context_budget=" + session.settings.context_budget, - "context: " + str(session.state.last_context_percent) + "% (" + str(session.state.last_context_chars) + " chars)", + "context: " + str(session.state.last_context_percent) + "% (" + str(session.state.last_context_tokens) + " tokens)", "conversation: " + str(len(session.state.conversation)) + " item(s)", "tool_calls: turn=" + str(session.state.turn_tool_calls) + " session=" + str(session.state.session_tool_calls), "tools: code_index=" + code_index, @@ -7316,7 +7317,7 @@ def _format_context_budget(self) -> str: "kept_chars: " + str(budget.kept_chars), "kept_block_chars: " + str(budget.kept_block_chars), "index_items: " + str(budget.index_items), - "prompt_chars: " + str(budget.prompt_chars), + "prompt_tokens: " + str(budget.prompt_tokens), ] ) @@ -7468,6 +7469,22 @@ def _format_count(value: int) -> str: return str(value) +def _estimate_prompt_tokens(chars: int) -> int: + return 0 if chars <= 0 else (chars + 3) // 4 + + +def _ceil_percent(value: int, total: int) -> int: + if value <= 0 or total <= 0: + return 0 + return max(1, (value * 100 + total - 1) // total) + + +def _format_duration(value: float) -> str: + seconds = max(0, int(value)) + minutes, seconds = divmod(seconds, 60) + return (str(minutes) + "m" if minutes else "") + str(seconds) + "s" + + def _format_percent(value: int, total: int) -> str: return "-" if value <= 0 or total <= 0 else str(round(value * 100 / total)) + "%" @@ -7561,15 +7578,11 @@ def _format_line(self, turn_elapsed: float, *, now: float, show_elapsed: bool) - if session.state.status_notice and session.state.status_notice_until > now: parts.insert(1, session.state.status_notice) if show_elapsed: - parts.append(f"turn:{turn_elapsed:.1f}s") + parts.append("turn:" + _format_duration(turn_elapsed)) if session.state.current_model_call_started_at > 0: - activity = {"compact": "compacting"}.get(session.state.current_model_call_activity, "working") - if session.state.current_model_call_has_content: - activity += "*" elapsed = max(0.0, now - session.state.current_model_call_started_at) if session.state.current_model_call_has_content and elapsed > 0: rate = session.state.current_model_call_streaming_chars / 4 / elapsed - parts.append(activity + "(" + str(session.state.turn_model_calls) + "):" + f"{elapsed:.1f}s") if rate > 0: parts[3] += " " + _format_count(int(rate)) + "t/s" return " | ".join(parts) diff --git a/tests/test_nanocode_commands.py b/tests/test_nanocode_commands.py index 8489e44..789b489 100644 --- a/tests/test_nanocode_commands.py +++ b/tests/test_nanocode_commands.py @@ -209,7 +209,7 @@ def test_context_command_shows_and_sets_budget(tmp_path): invalid_result = dispatcher.dispatch("/context tiny") assert "context_budget: medium" in show_result.message - assert "prompt_chars: 160000" in show_result.message + assert "prompt_tokens: 128000" in show_result.message assert set_result.message.startswith("Set runtime.context_budget = low\ncontext_budget: low") assert session.settings.context_budget == "high" assert alias_result.message.startswith("Set runtime.context_budget = high\ncontext_budget: high") diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py index 727cfd2..a33ebe6 100644 --- a/tests/test_nanocode_loop.py +++ b/tests/test_nanocode_loop.py @@ -171,7 +171,7 @@ def test_status_bar_text_has_visible_sweep_marker(tmp_path): assert "ctx:0%" in text assert "tool:3" in text assert "tok:last:42 sess:1k" in text - assert "turn:1.2s" in text + assert "turn:1s" in text assert all(style.startswith("#") for style, _ in fragments) assert len({style for style, _ in fragments}) > 3 snapshot = _status_text(bar) @@ -179,7 +179,7 @@ def test_status_bar_text_has_visible_sweep_marker(tmp_path): assert ">" not in snapshot -def test_status_bar_shows_current_model_call_number(tmp_path): +def test_status_bar_hides_current_model_call_timer(tmp_path): session = make_session(tmp_path, model="provider/model") session.state.turn_model_calls = 2 session.state.current_model_call_started_at = 0.4 @@ -191,16 +191,18 @@ def test_status_bar_shows_current_model_call_number(tmp_path): text = "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) assert "active-model (low)" in text - assert "working(2):0.6s" in text + assert "working(" not in text session.state.current_model_call_has_content = True session.state.current_model_call_streaming_chars = 24 - assert "working*(2):0.6s" in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) - assert "10t/s" in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) + streamed = "".join(text for _, text in bar._fragments(74.2, now=1.0, show_sweep=True, show_elapsed=True)) + assert "working" not in streamed + assert "10t/s" in streamed + assert "turn:1m14s" in streamed session.state.current_model_call_has_content = False session.state.current_model_call_activity = "compact" - assert "compacting(2):0.6s" in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) + assert "compacting(" not in "".join(text for _, text in bar._fragments(0.0, now=1.0, show_sweep=True, show_elapsed=True)) def test_status_bar_shows_active_modes(tmp_path): From b5ac3010f05e8e7632e62b1ee31a377bcd9b6589 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 06:32:29 -0700 Subject: [PATCH 23/29] Rework compact into working context snapshots --- design.md | 442 ++++++++++++++++++------------- nanocode.py | 284 ++++++++++++-------- tests/test_nanocode_agent.py | 20 +- tests/test_nanocode_commands.py | 8 +- tests/test_nanocode_compactor.py | 73 ++++- tests/test_nanocode_loop.py | 17 +- 6 files changed, 505 insertions(+), 339 deletions(-) diff --git a/design.md b/design.md index 8627dc7..b303d81 100644 --- a/design.md +++ b/design.md @@ -2,263 +2,340 @@ ## Agent Model -nanocode uses one agent. The agent is responsible for: +nanocode uses one primary agent plus a maintenance compactor. -- understanding the user request -- maintaining goal, plan, hypotheses, and memory -- calling tools -- verifying work +The primary agent is responsible for: + +- understanding the latest user request +- maintaining Goal, Plan, Facts, Leads, Checks, and User Rules +- calling repository, shell, edit, and context tools +- verifying work before completion - deciding when the task is complete -The agent has a work path and a cleanup path: +Runtime activities: -- `ACT`: normal work. It plans, investigates, edits, verifies, and answers. -- `OBSERVE`: tool-result reducer. It decides which unreduced raw tool results stay in context and which are compacted away. +- `agent`: normal work. It plans, investigates, edits, verifies, and answers. +- `compact`: maintenance. It rebuilds a minimal Working Context Snapshot from + old prompt conversation, blackboard state, and raw tool evidence when context + pressure is high or `/compact` is requested. -Conversation compaction is a background maintenance path. It summarizes old conversation history when the conversation list grows too large. +There is no separate result-reducer mode and no manual result-retention tools. +Tool-result cleanup is handled by context compaction and by dynamic prompt projection. ## Model Output Protocol Model decisions use function tools: -- state tools update goal, plan, hypotheses, known facts, verification, and result retention -- repository tools read, search, edit, run commands, and recall stored results -- compaction uses a dedicated `compact` function tool +- state actions update Goal, Plan, Facts, Leads, Checks, and User Rules +- repository tools read, search, inspect symbols, edit, run commands, and inspect + git state +- `Recall` retrieves stored tool results by `tr.N` key +- compaction uses a dedicated JSON response contract in the compact activity + +Assistant text is user-facing. It must not replace the next useful function +tool when work remains. Completing tracked work still requires a goal action +with `complete=true` after checks are settled. -Assistant text is optional user-facing text. It must not replace the next useful -function tool. Completing work still requires a `goal` function tool call with -`complete=true`. +Function-tool arguments are structured JSON. The CLI display is a separate +human-readable rendering, so a JSON call such as `Read({"path":"a.py"})` can be +shown as `Read a.py`. ## Task State -The main task state lives in the blackboard: +The active task state lives in the blackboard: - latest user request -- current task code -- goal +- task code +- goal and completion flag - plan -- hypotheses -- known facts: settled facts for the current task -- verification state -- recent edits - -New user input keeps the previous task state available for follow-ups like "continue". - -Old task state is cleared only when the model explicitly sets a different goal. When that happens, transient investigation state such as hypotheses and selected tool-result context is reset, while durable knowledge is kept. +- leads: unconfirmed findings, usually source-backed +- facts: confirmed knowledge, usually source-backed +- memory checkpoint for compacted tool results +- check requirement and check result -## New Goal Handling +Recent edits and feedback errors live in the agent runtime. User Rules are +durable session rules. -New user input does not immediately clear the previous task. This keeps short -follow-ups such as "continue" usable. - -When the model outputs `goal` with a different current-task goal: - -- goal and plan are replaced -- hypotheses are cleared -- verification is reset -- kept tool results are cleared -- visible raw tool results are compacted into summaries -- full tool logs remain available through `Recall tr.N` -- known facts remain available +New user input keeps previous task state available so follow-ups like +`continue` can resume. The agent must realign the state when the latest request +changes the task. When the model sets a different non-complete goal, current +raw tool context is compacted, the memory checkpoint advances, and active Leads +are cleared. Facts remain available unless explicitly changed by state updates. ## Context Construction -ACT mode receives a working context: - -- goal, plan, hypotheses, verification -- environment, including whether local symbol inspection is available -- Tool Result Index -- Kept Tool Results -- Unreduced Tool Results -- Latest Tool Results -- errors -- recent edits -- known facts -- conversation history -- latest user request - -OBSERVE receives a smaller cleanup context: - -- latest user request -- goal, plan, hypotheses -- known facts -- kept tool results -- observe errors -- unreduced raw tool results selected from recent/latest storage - -OBSERVE reduces tool-result noise before ACT continues. - -The code navigation tool is environment-gated. `InspectCode` is shown only when -the built-in code index is available. It supports `find`, `inspect`, and -`outline` modes for symbol queries or file paths, not natural-language -questions. The index is created explicitly with `/index`, rebuilt with -`/index force`, and lightly updated at startup when it already exists. - -Context layout: - -Layout rules: - -- Lower means closer to `YOUR OUTPUT`. -- Put stable background and lookup-only indexes higher. -- Put newer, authoritative, decision-driving context lower. -- Keep large evidence blocks above the final decision area. -- Apply the same ordering inside each section. +Agent prompts are built from stable context toward volatile decision context: ```text -ACT user prompt, top -> bottom +agent user prompt, top -> bottom +--------------------------------------------------+------------------------------+ -| Context section | Budget / control | +| Section | Main control | +--------------------------------------------------+------------------------------+ -| Background | compact_at | +| Stable Context | provider prefix cache | | - Environment | | | - User Rules | | -| - Conversation History | | -+--------------------------------------------------+------------------------------+ -| Tool Result Index | TOOL_RESULT_INDEX_ITEMS | -| - Archived Recall Index | | -| - Current Task Timeline | | +| - Conversation History | compact activity | +--------------------------------------------------+------------------------------+ -| Kept Tool Results | KEPT_TOOL_RESULT_CHARS | -| - kept_results | | +| Task State | blackboard | +| - Goal / Facts / Leads / Plan / Focus / Checks | | +| - Recent Edits | RECENT_EDITS | +--------------------------------------------------+------------------------------+ -| Unreduced Tool Results | TOOL_RESULT_RAW_CHARS trigger| -| - unreduced recent | OBSERVE_AFTER_PENDING... | +| Tool Context | context budget | +| - Tool Result Index | index_items | +| - Discovery Context | raw_chars / 3 | +| - File Context | raw_chars + kept_chars | +| - Unreduced Tool Results | compact checkpoint | +| - Latest Tool Results | latest batch | +--------------------------------------------------+------------------------------+ -| Latest Tool Results | TOOL_RESULT_RAW_CHARS trigger| -| - latest | MAX_TOOL_OUTPUT_CHARS/item | -+--------------------------------------------------+------------------------------+ -| Current Decision | section-local limits | -| - Recent Edits | | -| - Known | | -| - Current Phase / Work Mode | | -| - Goal / Plan / Hypotheses / Verify | | -| - Errors | | +| Current Input | latest user request | +| - Blocking Feedback | | +| - Pending User Feedback | | | - Latest User Request | | -| - Output Instructions | | ++--------------------------------------------------+------------------------------+ +| Output Guide | final steering | +--------------------------------------------------+------------------------------+ ``` -Bounded raw output means the original tool output after per-result truncation. -Compact summaries keep only execution metadata, size, and `recall=tr.N`. +Layout rules: + +- Put stable context higher to preserve provider prefix cache hits. +- Put current user input, blocking feedback, and output rules closest to + `YOUR OUTPUT`. +- Keep large evidence blocks in Tool Context, above the final decision area. +- Prefer dynamic projections over repeating raw tool outputs. + +## Tool Result Storage + +Every non-context tool call gets a result key such as `tr.12`. + +For regular tools: + +- full output is written to the session log directory +- bounded output is stored in the active `tool_result_store` +- prompt context receives bounded raw output, compact summaries, or projections +- full detail can be retrieved later with `Recall tr.N` -Raw tool result content is de-duplicated by `tr.N`. Timeline summaries may keep -duplicate keys as compact index entries, especially for kept results, so the -model can still see result ordering without rereading raw content. +Conversation has the same split: -Tool result context budgets: +- `conversation_log` is append-only audit state for the session +- `conversation` is prompt context and may be replaced by Working Context + Snapshot plus recent turns -- `MAX_TOOL_OUTPUT_CHARS` bounds each raw tool result before it enters context. -- `KEPT_TOOL_RESULT_CHARS` limits `Kept Tool Results`. -- `TOOL_RESULT_RAW_CHARS` triggers OBSERVE when `Unreduced Tool Results + Latest Tool Results` grow too large. It is not a pre-observe truncation limit. -- `TOOL_RESULT_INDEX_ITEMS` limits compact index/timeline entries; current-task timeline entries take priority over archived entries. +`Recall` is a context tool. It does not receive a new ordinary result key and +does not add its own raw block to the normal tool-result index. On success, the +stored results it returns are reconstructed as their original result blocks and +reactivated in the current tool context. + +Tool-result storage is bounded: + +- the runner keeps at most `MAX_TOOL_RESULT_STORE_ITEMS` entries during normal + storage pressure +- at the start of a user turn, completed-goal storage is pruned toward + `MAX_COMPLETED_GOAL_TOOL_RESULTS` +- result keys referenced by active state are protected from this pruning ## Tool Result Context -Internal tool-result storage has three fields: +`ToolResultContext` keeps only two active prompt lists: + +- `latest`: bounded raw output from the most recent regular tool batch +- `recent`: older blocks, either still raw or already compacted -- `latest`: raw bounded output from the most recent tool batch -- `kept_results`: useful raw results selected by OBSERVE and retained for ACT -- `recent`: older visible results, usually compact summaries +There is no `kept_results` bucket. Raw blocks remain visible until they are +covered by `compact_context()` or by the memory checkpoint. -Prompt layout renders those fields as Tool Result Index, Kept Tool Results, -Unreduced Tool Results, and Latest Tool Results. Recent raw results that have -not been reduced yet remain visible as Unreduced Tool Results until OBSERVE -covers them. +After each regular tool batch: -ACT should render tool context in this order: +1. previous `latest` moves to `recent` +2. the new batch becomes `latest` +3. `recent` is pruned so compact timeline entries fit the current budget +4. the next prompt renders timeline summaries plus active raw/projection blocks -1. Tool Result Index: - - archived recallable summaries, separated from the current task timeline - - current task timeline summaries -2. Kept Tool Results: kept raw results -3. Unreduced Tool Results: unreduced older raw results -4. Latest Tool Results: latest raw results +The Tool Result Index has two parts: -This keeps the newest and most actionable tool output closest to the model's -next decision while preserving a compact timeline above it. +- `Archived Recall Index`: recallable stored results not otherwise visible +- `Current Task Timeline`: compact summaries for current `recent + latest` -## Tool Result Storage +Raw content is de-duplicated by result key when rendering unreduced blocks and +timeline entries. -Every tool call gets a result key such as `tr.12`. +## File Context -The full tool output is written to the session log directory. The model sees -bounded output or compact summaries in context and can fetch full output later -with `Recall tr.N`. +File Context is a dynamic prompt projection built before each model request. +It is not separate persistent storage. -This separates storage from context: +Inputs: -- logs keep the full result -- context keeps active raw evidence and compact recall indexes -- `Recall` restores detail on demand -- the active store keeps up to `MAX_COMPLETED_GOAL_TOOL_RESULTS` completed-goal - results, inside the lower-level `MAX_TOOL_RESULT_STORE_ITEMS` cap +- active raw `Read` results +- active raw `Edit` results +- successful `Recall` results after they are reactivated into their original + blocks -Tool result lifetime: +Projection policy: -- full output is always stored under `tr.N` and can be restored with `Recall` -- active context starts with bounded raw output in `Latest Tool Results` -- after another tool batch, older raw output becomes `Unreduced Tool Results` -- OBSERVE either keeps raw output in `Kept Tool Results` or compacts it into - `Tool Result Index` -- kept results may still have compact timeline entries in `Tool Result Index` -- old timeline summaries may move under `Archived Recall Index` +- Read and Edit outputs carry `source=tr.N`. +- The rendered `Ranges` list and each `@@` content block show the nearest + source key. +- Lines are merged by file path and line number. +- Newer active Read/Edit results overwrite older lines. +- Edit results invalidate stale old ranges and add the edited replacement + ranges. +- `replace_all` invalidates the whole file projection for that source. -From the model's view: +Freshness policy: -1. every tool result gets a `tr.N` key and full log entry -2. ACT sees bounded raw output in Latest Tool Results and Unreduced Tool Results -3. ACT also sees Kept Tool Results selected by OBSERVE -4. OBSERVE sees unreduced raw results selected from `latest` and `recent` -5. OBSERVE must `keep` useful results or `forget` noisy ones -6. forgotten results leave active context, but full logs remain available through `Recall tr.N` +- Read/Edit outputs include file stat and `line:hash|content` anchors. +- If the current file stat still matches the tool result stat, projected lines + are accepted without rereading the file. +- If file stat changed, only projected line numbers are reread and their hashes + are checked. +- Stale or missing lines are omitted and reported under `Omitted stale content`. -After each tool batch: +This prevents Bash or other out-of-band file changes from silently keeping stale +File Context lines in prompt. The slow path only reads lines that are already +being projected. -1. the previous `latest` moves into `recent` -2. the new batch becomes `latest` -3. unreduced raw results render as Unreduced Tool Results or Latest Tool Results -4. OBSERVE later converts unreduced raw results into Kept Tool Results, Tool Result Index summaries, or forgotten context +## Discovery Context + +Discovery Context is a dynamic prompt projection for source-discovery results. + +Inputs: + +- active raw `Search` results +- active raw `InspectCode` results +- successful `Recall` results after reactivation + +Policy: + +- Discovery Context is source-backed by `tr.N`, but it is treated as leads, not + current source truth. +- It may include match snippets, symbol outlines, and line anchors. +- Before editing exact code, the agent should use File Context line anchors or + run `Read` for the missing/current range. +- Discovery blocks are compacted in normal Tool Result Index entries with + `content=discovery_context`, so the raw output is not repeated in Recent Tool + Results. + +## Read, Search, Edit, and Recall + +`Read` accepts structured JSON: + +- `Read({"path":"code.py","range":[0,80]})` +- `Read({"path":"code.py","ranges":[[0,80],[160,220]]})` +- `Read({"files":[{"path":"a.py"},{"path":"b.py","range":[10,40]}]})` +- `Read({"path":"a.py","range":[0,20]}, {"path":"b.py","range":[20,40]})` + +`Search` accepts one or more structured query objects: + +- `Search({"pattern":"class .*Tool","path":"nanocode.py"})` +- `Search({"pattern":"version","glob":"*.toml"}, {"pattern":"version","glob":"*.cfg"})` + +`Edit` uses anchored line hashes from Read, Search, or InspectCode. Successful +Edit results record changed ranges and File Context update data, so modified +ranges can appear in File Context without a follow-up Read. + +`Recall` retrieves stored results by key and optional line ranges. Recalled +Read/Edit results merge back into File Context. Recalled Search/InspectCode +results merge back into Discovery Context. Newer active Read/Edit blocks still +win over older recalled file lines. -This keeps tool results visible until the model has had a chance to decide whether they matter. +## Compact Policy -## Observe Policy +Context compaction is the single cleanup path. -OBSERVE is triggered when unresolved pending results accumulate by count or raw -context pressure. Tool failures stay visible to ACT first; very large failures -still trigger OBSERVE through raw-context pressure. +`/compact` means rebuilding the working prompt context, not deleting logs. It +reads old prompt conversation, current blackboard state, user rules, recent +edits, and selected tool evidence. It returns direct JSON, not a function tool +call, so reasoning/thinking modes stay available and provider `tool_choice` +quirks do not apply. -In OBSERVE, every unreduced result key must be covered by either: +The compact JSON contract is: -- `keep`: retain this raw result in `kept_results` -- `forget`: remove this result from future active context +- `snapshot`: required readable Working Context Snapshot +- `known`: required durable facts, preserving source keys where available +- `goal`, `plan`, `leads`, `checks`, and `user_rules`: optional blackboard/rule + updates -`forget` releases context pressure while preserving logs and Recall ability. +Before each model request: -If a forgotten result contained an important conclusion, the model should preserve that conclusion first in plan, known, hypothesis, or verification state. +1. build the system prompt, user prompt, and tool schemas +2. estimate prompt tokens and record context percent +3. if activity is `agent` and `runtime.compact_at` is reached, run + `compact_context()` +4. rebuild once after compaction before sending the model request + +`compact_context()`: + +- selects unreduced raw tool blocks after the memory checkpoint +- passes those blocks, bounded by the raw budget, to the compact model +- replaces old prompt conversation with Working Context Snapshot plus recent + turns when enough history or tool evidence exists +- updates Goal, Plan, Facts, Leads, Checks, and User Rules from compact JSON +- converts observed raw tool blocks into compact timeline summaries +- advances the memory checkpoint +- reapplies index pruning + +Tool failures stay visible to the agent at least once through Latest Tool Results and +blocking feedback. Invalid tool arguments are also remembered as feedback errors +so the model can correct the next call. ## Context Budgets Context is bounded at several layers: -- tool output is bounded before it enters context -- Tool Result Index has an item budget -- Kept Tool Results have a character budget -- Unreduced Tool Results and Latest Tool Results share a raw character pressure threshold that triggers OBSERVE -- conversation history can be compacted -- old stored tool results are pruned unless protected by active state +- each tool output is bounded before it enters active storage +- Tool Result Index is capped by `index_items` +- Discovery Context uses part of the raw character budget +- File Context uses `raw_chars + kept_chars` +- compact triggering is based on estimated or actual prompt tokens +- prompt conversation can be compacted into a Working Context Snapshot while + full conversation audit state remains append-only +- old stored tool results are pruned unless referenced by active state + +Budget presets: + +```text +low: raw_chars=36000 kept_chars=16000 index_items=20 prompt_tokens=64000 +medium: raw_chars=72000 kept_chars=32000 index_items=30 prompt_tokens=128000 +high: raw_chars=120000 kept_chars=64000 index_items=60 prompt_tokens=256000 +``` + +`runtime.compact_at` is a context percent from `1` to `100`, or `0` to disable +automatic compaction. The default is `80%`. + +The prompt-size estimate is `ceil(chars / 4)` plus tool schema size. When the +provider returns usage, actual prompt/input tokens replace the estimate for +status reporting. + +## Status and Commands + +The status bar shows: -The design favors keeping useful raw tool results visible, while aggressively compacting or forgetting noise. +- model and reasoning label +- optional mode/status notice +- `ctx:NN%` +- current turn tool-call count +- token totals and optional streaming token rate +- current turn elapsed time as `Ns` or `NmNs` + +It does not show a separate current model-call timer. + +`/context` reports the active context budget, including `prompt_tokens`. +`/status` reports runtime settings, model usage, token usage, code-index status, +goal, and checks. ## Completion and Verification The agent should complete only when: - the goal is achieved -- plan items are done or blocked with concrete context -- verification strength matches the task risk -- required verification has passed or is blocked by the user/environment/tool +- every plan item is done or blocked with concrete context +- required checks are passed or blocked with a stated reason +- failed checks have been recorded and addressed +- the final answer can state what changed, how it was verified, and remaining + risk -Verification is ACT work using tool calls plus a `verify` state update. +Verification is agent work using tools plus a `verify` state update. Verification strength is intentionally lightweight: @@ -269,6 +346,5 @@ Verification strength is intentionally lightweight: ## Design Principle -The core idea is: - -Keep full data outside context, keep useful evidence inside context, and let OBSERVE periodically remove noise. +Keep full logs outside prompt, project current evidence by source inside prompt, +and use compact as the single cleanup path when context pressure requires it. diff --git a/nanocode.py b/nanocode.py index 0cb36a7..92b1f38 100644 --- a/nanocode.py +++ b/nanocode.py @@ -594,16 +594,15 @@ def add(self, *, prompt_tokens: int, completion_tokens: int, total_tokens: int, class ContextBudget: raw_chars: int kept_chars: int - kept_block_chars: int index_items: int prompt_tokens: int planless_discovery_tool_calls: int CONTEXT_BUDGETS: dict[str, ContextBudget] = { - "low": ContextBudget(36_000, 16_000, 4_000, 20, 64_000, 6), - "medium": ContextBudget(72_000, 32_000, 6_000, 30, 128_000, 8), - "high": ContextBudget(120_000, 64_000, 8_000, 60, 256_000, 12), + "low": ContextBudget(36_000, 16_000, 20, 64_000, 6), + "medium": ContextBudget(72_000, 32_000, 30, 128_000, 8), + "high": ContextBudget(120_000, 64_000, 60, 256_000, 12), } @@ -828,15 +827,6 @@ def load(cls, path: str | None = None) -> Json: return data if isinstance(data, dict) else {} -############################ -# Agent Runtime (dataclasses) -############################ - - -class AgentMode(StrEnum): - ACT = "act" - - @dataclass class AgentRunResult: done: bool = False @@ -869,6 +859,7 @@ class RuntimeState: status_notice_until: float = 0.0 pending_user_feedback: str = "" conversation: list[ConversationItem] = field(default_factory=list) + conversation_log: list[ConversationItem] = field(default_factory=list) user_rules: UserRules = field(default_factory=UserRules) tool_result_store: dict[str, ToolResultItem] = field(default_factory=dict) tool_result_counter: int = 0 @@ -926,6 +917,7 @@ def is_path_in_cwd(self, path: str) -> bool: def append_conversation(self, item: ConversationItem) -> None: self.state.conversation.append(item) + self.state.conversation_log.append(item) def project_key(self) -> str: cwd = os.path.realpath(self.cwd) @@ -1139,7 +1131,6 @@ def handle_event( @staticmethod def _agent_payload(agent: Any) -> Json: return { - "mode": agent.mode, "goal": agent.blackboard.goal, "plan_items": len(agent.blackboard.plan), "feedback_tail": agent.agent_feedback_errors[-3:], @@ -3724,7 +3715,7 @@ class ToolResultTool(Tool): EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY DESCRIPTION: ClassVar[tuple[str, ...]] = ( "Retrieve stored tool results by tr.N key.", - "Use when output was truncated, forgotten, or no longer visible.", + "Use when output was truncated, compacted, or no longer visible.", "Optional 0-based ranges read exact slices from the stored full log.", "Returns result metadata plus content.", ) @@ -3802,7 +3793,6 @@ def _content(self, item: ToolResultItem) -> str: ToolResultTool.NAME: ToolResultTool, } CONTEXT_TOOL_NAMES: frozenset[str] = frozenset({ToolResultTool.NAME}) -REMOVED_CONTEXT_TOOL_NAMES: frozenset[str] = frozenset({"Forget", "Keep"}) def _canonical_tool_name(name: str | None) -> str: @@ -3908,18 +3898,6 @@ def _state_tool_schema(name: str) -> Json: return _function_tool_schema(name, description, _tool_object_schema(properties, required)) -COMPACT_TOOL_SCHEMA = _function_tool_schema( - "compact", - "Return a compact continuation summary and retained facts.", - _tool_object_schema( - { - "summary": TOOL_STRING_SCHEMA, - "known": TOOL_ITEMS_SCHEMA, - }, - ["summary", "known"], - ), -) - ############################ # Agent Prompt ############################ @@ -4056,40 +4034,56 @@ def _state_tool_schema(name: str) -> Json: ############################ -COMPACTOR_PROMPT = """You are nanocode's conversation-history compactor. - -Compress conversation history and Facts so the coding agent can continue later. -If tool results are included, preserve only conclusions, file paths, ranges, errors, and decisions needed to continue. -Do not solve the task or add unsupported facts. -Use the compact function tool only. - -Preserve continuity-critical facts: -- user requests and changes -- decisions made -- current goal and commitments -- plan/status -- files, paths, symbols, and APIs touched -- commands run and outcomes -- facts and context keys needed later -- unresolved blockers and open questions -- checks context - -Omit noise: -- raw logs -- repeated output -- full stack traces -- chatter -- context values unless needed for continuity - -Write the shortest complete continuation summary. -Compress Facts to concise durable facts. +COMPACTOR_PROMPT = """You are nanocode's working-context compactor for an AI coding agent. + +Rebuild the smallest state snapshot that lets the agent continue the task. +You are not chatting, solving the task, deleting logs, or deciding which tr.N records exist. +Full conversation and tool logs remain outside prompt for audit and Recall. + +Return exactly one JSON object. Do not wrap it in markdown. + +Required fields: +- snapshot: string, concise continuation state for prompt context. +- known: array of durable facts. Each item may be a string or {"text": "...", "source": ["tr.N"]}. + +Optional fields: +- goal: current goal if still active. +- plan: array of {"id"?, "text", "status"?, "context"?}. +- leads: array of {"id"?, "text", "status"?, "source"?, "context"?}. +- checks: {"status"?, "method"?, "context"?, "blocker"?}. +- user_rules: array of durable user rules explicitly stated by the user. + +Snapshot content should cover only continuity-critical state: +- latest user intent and task changes +- current goal, commitments, plan state, and next step +- confirmed facts and active leads +- files, paths, symbols, APIs, commands, outcomes, and recent edits needed later +- important source keys such as tr.N +- unresolved blockers, open questions, and checks + +Preserve source keys when evidence comes from tool results. +Prefer facts/plan/leads fields for structured state; use snapshot for the readable minimal working context. +Omit raw logs, repeated output, full stack traces, obsolete branches, and process chatter. +Do not invent facts not supported by the input. """ COMPACT_USER_PROMPT_TEMPLATE = """ ------------ Facts_To_Compact Begin ------------ +----------- Current_Blackboard Begin ---------- +{blackboard} +-------- Current_Blackboard End --------------- + +----------- User_Rules Begin ------------------ +{user_rules} +-------- User_Rules End ----------------------- + +----------- Existing_Facts Begin -------------- {known} ---------- Facts_To_Compact End ---------------- +-------- Existing_Facts End ------------------- + +----------- Recent_Edits Begin ---------------- +{recent_edits} +-------- Recent_Edits End --------------------- ----------- Conversation_To_Compact Begin ------ {conversation} @@ -4125,7 +4119,6 @@ def request( activity: str = "agent", on_stream_action: Callable[[Json], bool] | None = None, tool_schemas: list[Json] | None = None, - required_tool: str | None = None, ) -> Json: config = self.session.config.provider if not config.url: @@ -4152,10 +4145,9 @@ def request( user_prompt=user_prompt, stream=stream, tool_schemas=tool_schemas, - required_tool=required_tool, ) if api == "responses" - else self._chat_completion_params(config, model=model, messages=messages, stream=stream, tool_schemas=tool_schemas, required_tool=required_tool) + else self._chat_completion_params(config, model=model, messages=messages, stream=stream, tool_schemas=tool_schemas) ) DebugTrace.prompt(self.session, activity=activity, messages=messages) DebugTrace.model_request(self.session, activity=activity, api=api, model=model, stream=stream, params=params, tool_schemas=tool_schemas) @@ -4313,7 +4305,6 @@ def _chat_completion_params( messages: list[Json], stream: bool, tool_schemas: list[Json] | None = None, - required_tool: str | None = None, ) -> Json: params: Json = {"model": model, "messages": messages, "stream": stream} extra_body: Json = {} @@ -4324,12 +4315,12 @@ def _chat_completion_params( params["temperature"] = config.temperature if stream: params["stream_options"] = {"include_usage": True} + chat_reasoning = config.resolved_chat_reasoning() + reasoning_enabled = config.reasoning != "off" if tool_schemas: params["tools"] = tool_schemas - params["tool_choice"] = {"type": "function", "function": {"name": required_tool}} if required_tool else "auto" + params["tool_choice"] = "auto" params["parallel_tool_calls"] = True - chat_reasoning = config.resolved_chat_reasoning() - reasoning_enabled = config.reasoning != "off" if reasoning_enabled and chat_reasoning == "reasoning": extra_body["reasoning"] = {"effort": self._reasoning_effort(config)} if reasoning_enabled and chat_reasoning == "reasoning_effort": @@ -4625,7 +4616,6 @@ def _responses_params( user_prompt: str, stream: bool, tool_schemas: list[Json] | None = None, - required_tool: str | None = None, ) -> Json: params: Json = {"model": model, "instructions": system_prompt, "input": user_prompt, "stream": stream, "store": False} prompt_cache_key = self._prompt_cache_key(config, model=model, tool_schemas=tool_schemas) @@ -4633,7 +4623,7 @@ def _responses_params( params["prompt_cache_key"] = prompt_cache_key if tool_schemas: params["tools"] = self._responses_tool_schemas(tool_schemas) - params["tool_choice"] = {"type": "function", "name": required_tool} if required_tool else "auto" + params["tool_choice"] = "auto" params["parallel_tool_calls"] = True if config.temperature is not None: params["temperature"] = config.temperature @@ -5550,39 +5540,118 @@ def __init__(self, session: Session, model_client: ModelClient, blackboard: Blac self.model_client = model_client self.blackboard = blackboard - def compact(self, *, tool_results: str = "") -> int: + SNAPSHOT_HEADER: ClassVar[str] = "Working Context Snapshot:" + + def compact(self, *, tool_results: str = "", recent_edits: list[str] | None = None) -> int: count = len(self.session.state.conversation) tool_results = tool_results.strip() if count <= self.KEEP_RECENT and not tool_results: return 0 old_items = self.session.state.conversation[: -self.KEEP_RECENT] if count > self.KEEP_RECENT else [] keep_items = self.session.state.conversation[-self.KEEP_RECENT :] if count > self.KEEP_RECENT else list(self.session.state.conversation) - summary, known = self._summarize(old_items, tool_results=tool_results) - self.session.state.conversation = [AssistantMessage(content="Conversation compact summary:\n" + summary)] + keep_items - self.blackboard.known = known + snapshot = self._summarize(old_items, tool_results=tool_results, recent_edits=recent_edits or []) + self.session.state.conversation = [AssistantMessage(content=self.SNAPSHOT_HEADER + "\n" + snapshot)] + keep_items return count + (1 if tool_results else 0) - def _summarize(self, items: list[ConversationItem], *, tool_results: str = "") -> tuple[str, list[KnownItem]]: + def _summarize(self, items: list[ConversationItem], *, tool_results: str = "", recent_edits: list[str]) -> str: user_prompt = COMPACT_USER_PROMPT_TEMPLATE.format( + blackboard=self._format_blackboard(), + user_rules=self.session.state.user_rules.format(), known="\n".join(KnownItem.format_item(item) for item in self.blackboard.known) or "(empty)", + recent_edits="\n".join(recent_edits) or "(empty)", conversation="\n\n".join(item.format() for item in items) or "(empty)", tool_results=tool_results or "(empty)", ).strip() - response = self.model_client.request( - COMPACTOR_PROMPT.strip(), user_prompt, activity="compact", tool_schemas=[COMPACT_TOOL_SCHEMA], required_tool="compact" - ) - if "actions" in response: - response = next( - (_json_dict(action) for action in _json_list(response.get("actions")) if _json_str(_json_dict(action).get("type")) == "compact"), - {}, - ) - summary = _json_str(response.get("summary")) - if not summary: - raise LLMError("compact response missing summary") - known = [item for item in (KnownItem.from_json(raw) for raw in _json_list(response.get("known"))) if item] + response = self._response_json(self.model_client.request(COMPACTOR_PROMPT.strip(), user_prompt, activity="compact")) + snapshot = _json_str(response.get("snapshot")) or "" + if not snapshot: + raise LLMError("compact response missing snapshot") + self._apply_snapshot_state(response) + return snapshot.strip() + + def _format_blackboard(self) -> str: + lines = ["Goal:", self.blackboard.goal or "(empty)", "", "Plan:"] + lines.append("\n".join(item.format() for item in self.blackboard.plan) if self.blackboard.plan else "(empty)") + lines.extend(["", "Leads:", "\n".join(item.format() for item in self.blackboard.leads) if self.blackboard.leads else "(empty)"]) + lines.extend(["", "Checks:", self.blackboard.checks.format() if self.blackboard.checks.has_context() else "(empty)"]) + return "\n".join(lines) + + def _apply_snapshot_state(self, response: Json) -> None: + goal = (_json_str(response.get("goal")) or "").strip() + if goal: + self.blackboard.goal = goal + plan = [item for item in (self._plan_item_from_json(raw) for raw in _json_list(response.get("plan"))) if item] + if plan: + self.blackboard.plan = plan + leads = [item for item in (Lead.from_json(raw) for raw in _json_list(response.get("leads"))) if item] + if leads: + self.blackboard.leads = leads + self._apply_snapshot_checks(_json_dict(response.get("checks"))) + + known = [item for item in (KnownItem.from_json(raw) for raw in (_json_list(response.get("known")) or _json_list(response.get("facts")))) if item] if not known: known = list(self.blackboard.known) - return summary, known[-self.MAX_COMPACTED_KNOWN_ITEMS :] + self.blackboard.known = known[-self.MAX_COMPACTED_KNOWN_ITEMS :] + + rules_changed = False + for raw_rule in _json_list(response.get("user_rules")): + rule = (_json_str(raw_rule) or "").strip() + rules_changed = self.session.state.user_rules.add(rule) or rules_changed + if rules_changed: + self.session.save_user_rules() + + @staticmethod + def _plan_item_from_json(value: JsonValue) -> PlanItem | None: + if isinstance(value, str): + text = value.strip() + return PlanItem(text=text) if text else None + item = _json_dict(value) + text = _json_str(item.get("text")) or "" + if not text: + return None + status = _json_str(item.get("status")) or PlanStatus.TODO + if status not in ALL_PLAN_STATUSES: + status = PlanStatus.TODO + return PlanItem(text=text, status=PlanStatus(status), id=_json_str(item.get("id")) or "", context=_json_str(item.get("context")) or "") + + def _apply_snapshot_checks(self, item: Json) -> None: + if not item: + return + status = _json_str(item.get("status")) or "" + if status in frozenset(CheckStatus): + self.blackboard.checks.status = CheckStatus(status) + if "method" in item: + self.blackboard.checks.method = _json_str(item.get("method")) or "" + if "context" in item: + self.blackboard.checks.context = _json_str(item.get("context")) or "" + blocker = _json_str(item.get("blocker")) or "" + if blocker in ALL_CHECK_BLOCKERS: + self.blackboard.checks.blocker = CheckBlocker(blocker) + + @staticmethod + def _response_json(response: Json) -> Json: + if response and response.get("_assistant_text") is None: + return response + text = (_json_str(response.get("_assistant_text")) or "").strip() + for pattern in (r"(?ms)^```(?:json)?\s*(.*?)\s*```$", r"(?ms)```(?:json)?\s*(.*?)\s*```"): + match = re.search(pattern, text) + if match: + text = match.group(1).strip() + break + try: + parsed = json.loads(text) + except json.JSONDecodeError: + start = text.find("{") + if start < 0: + raise LLMError("compact response invalid JSON") + try: + parsed, _end = json.JSONDecoder().raw_decode(text[start:]) + except json.JSONDecodeError as error: + raise LLMError("compact response invalid JSON: " + str(error)) + data = _json_dict(parsed) + if not data: + raise LLMError("compact response must be a JSON object") + return data ############################ @@ -5659,11 +5728,10 @@ def __init__(self, session: Session): self.failed_tool_call_key: tuple[str, tuple[str, ...]] | None = None self.failed_tool_call_count = 0 self.agent_feedback_errors: list[str] = [] - self.latest_context_tool_recalled: list[str] = [] + self.latest_recalled_result_keys: list[str] = [] self.task_alignment_required = False self.incomplete_task_context_at_turn_start = False self.stream_stop_requested = False - self.mode = AgentMode.ACT def context_budget(self) -> ContextBudget: return CONTEXT_BUDGETS[self.session.settings.context_budget] @@ -5798,7 +5866,7 @@ def _set_status_notice(self, text: str, ttl: float = 5.0) -> None: self.session.state.status_notice_until = time.monotonic() + ttl def compact_history(self) -> int: - return self.compactor.compact() + return self.compactor.compact(recent_edits=self.recent_edits) def compact_context(self) -> int: observed_blocks = self.tool_context.unreduced_blocks(self.blackboard.memory_checkpoint_tool_result_counter) @@ -5806,7 +5874,7 @@ def compact_context(self) -> int: "\n\n".join(observed_blocks), self.context_budget().raw_chars, ) - compacted_conversation = self.compactor.compact(tool_results=tool_results) + compacted_conversation = self.compactor.compact(tool_results=tool_results, recent_edits=self.recent_edits) observed_counter = ToolResultContext.max_counter(observed_blocks) if observed_blocks: self.tool_context.compact_observed(observed_blocks) @@ -6075,7 +6143,7 @@ def _step_prompts(self) -> tuple[str, str, str]: return self._system_prompt(), self.build_user_prompt(), "agent" def _tool_schemas(self) -> list[Json]: - action_names = self.ACT_ACTION_TYPES - {"tool", "forget"} + action_names = self.ACT_ACTION_TYPES - {"tool"} tool_classes: Iterable[ToolClass] = tuple(TOOL_REGISTRY.values()) if not CodeIndex(self.session).available(): tool_classes = tuple(tool for tool in tool_classes if tool is not InspectCodeTool) @@ -6165,7 +6233,7 @@ def on_stream_action(action: Json) -> bool: def _can_stream_tools(self) -> bool: return isinstance(self.model_client, ModelClient) and self.session.config.provider.stream is not False - def apply_response(self, response: Json) -> list[str]: + def apply_response(self, response: Json) -> None: actions = self._response_actions(response) response = {**response, "actions": actions} if any(self._is_pending_check_action(action) for action in actions): @@ -6176,7 +6244,6 @@ def apply_response(self, response: Json) -> list[str]: self._mark_memory_checkpoint() self.blackboard.leads = [] self.state_updater.apply(response) - return [] def _goal_changes_task(self, actions: list[Json]) -> bool: if not self.blackboard.goal: @@ -6202,7 +6269,7 @@ def execute_tool_calls( append_to_latest: bool = False, ) -> str: self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) - self.latest_context_tool_recalled = [] + self.latest_recalled_result_keys = [] regular_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name not in CONTEXT_TOOL_NAMES] if regular_executions: self.tool_context.append_latest( @@ -6232,7 +6299,7 @@ def _apply_context_tool_executions( continue if execution.call.name == ToolResultTool.NAME: blocks = ToolResultContext.recalled_result_blocks(ToolResultContext.format_execution(execution)) - self.latest_context_tool_recalled.extend( + self.latest_recalled_result_keys.extend( self.tool_context.reactivate_result_blocks( blocks, max_index_items=self.context_budget().index_items, @@ -6568,11 +6635,6 @@ def _gate_protocol_actions(self, ctx: ResponseContext, on_message: MessageCallba ) def _gate_tool_actions(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool: - unavailable = sorted({_json_str(_json_dict(call).get("name")) for call in ctx.tool_calls if _json_str(_json_dict(call).get("name")) in REMOVED_CONTEXT_TOOL_NAMES}) - if unavailable: - self._remember_agent_error(self._error("context tools are no longer available in ACT: " + ", ".join(unavailable) + ".")) - self._report_gate(on_message, "Retrying: use Recall or continue work directly.", "Protocol_Gate: unavailable context tool(s): " + ", ".join(unavailable) + ".") - return True repeated_tool_retry_error = self._repeated_tool_retry_error(ctx.tool_calls) if repeated_tool_retry_error: self.stream_stop_requested = True @@ -6706,25 +6768,15 @@ def _run_tool_actions( report = ToolCallDisplayFormatter.latest_report(self.tool_runner.latest_executions) if report: on_message(report) - self._emit_tool_context_update( - self.latest_context_tool_recalled, - [], - on_message, - ) + self._emit_recalled_context_update(on_message) if self.session.settings.debug and self.tool_runner.skipped_after_failure_count: on_message(f"Tool Calls Skipped: {self.tool_runner.skipped_after_failure_count} after {self.tool_runner.skipped_after_failure_key} failed") self.apply_context_budget() return True - def _emit_tool_context_update(self, kept: list[str], forgotten: list[str], on_message: MessageCallback | None) -> None: - if on_message is None or not (kept or forgotten): - return - parts = [] - if kept: - parts.append(" ".join("+" + key for key in kept)) - if forgotten: - parts.append(" ".join("-" + key for key in forgotten)) - on_message("Tool Result Context: " + " / ".join(parts)) + def _emit_recalled_context_update(self, on_message: MessageCallback | None) -> None: + if on_message is not None and self.latest_recalled_result_keys: + on_message("Tool Result Context: " + " ".join("+" + key for key in self.latest_recalled_result_keys)) def _finish_or_continue(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult: completion_gate = self._gate_completion(ctx, on_message) @@ -6793,7 +6845,7 @@ def run( checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, ) self._prune_tool_result_store() - self.latest_context_tool_recalled = [] + self.latest_recalled_result_keys = [] self.session.state.turn_tool_calls = 0 self.session.state.turn_model_calls = 0 old_goal = self.blackboard.goal @@ -6858,10 +6910,9 @@ def handle_response( DebugTrace.handle_event(self, "handle-text", ctx, response, result=text_result) return text_result - forgotten_keys = self.apply_response(response) - DebugTrace.handle_event(self, "handle-applied", ctx, response, extra={"forgotten": forgotten_keys}) + self.apply_response(response) + DebugTrace.handle_event(self, "handle-applied", ctx, response) self._emit_state_and_text(ctx, on_message) - self._emit_tool_context_update([], forgotten_keys, on_message) self._refresh_agent_feedback() if ctx.has_user_rule_action and not ctx.tool_calls and not ctx.pending_check_requested: message = ctx.user_rule_message or "Rule saved." @@ -7315,7 +7366,6 @@ def _format_context_budget(self) -> str: "context_budget: " + self.agent.session.settings.context_budget, "raw_chars: " + str(budget.raw_chars), "kept_chars: " + str(budget.kept_chars), - "kept_block_chars: " + str(budget.kept_block_chars), "index_items: " + str(budget.index_items), "prompt_tokens: " + str(budget.prompt_tokens), ] diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index b720214..e8eec54 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -69,7 +69,7 @@ def _search_args(pattern: str, *, path: str | None = None, glob: str | None = No return [spec] -def _observe_tool_result_context(agent): +def _unreduced_tool_result_context(agent): return "\n\n".join(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) @@ -367,7 +367,7 @@ def test_agent_keeps_latest_batch_and_unreduced_tool_results(tmp_path, monkeypat assert "" in latest assert "" in recent assert len(agent.tool_context.recent) == 3 - context = _observe_tool_result_context(agent) + context = _unreduced_tool_result_context(agent) assert "one.txt" in context assert "two.txt" in context assert "three.txt" in context @@ -386,7 +386,6 @@ def test_referenced_unreduced_results_are_excluded_from_pending_context(tmp_path agent.apply_response({"actions": [{"type": "known", "items": [{"source": ["tr.1"], "text": "one.txt was inspected."}]}]}) agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) - assert agent.mode == nanocode.AgentMode.ACT assert agent.blackboard.memory_checkpoint_tool_result_counter == 0 assert len(agent.tool_context.unreduced_blocks(agent.blackboard.memory_checkpoint_tool_result_counter)) == 2 assert [nanocode.ToolResultContext.result_key(block) for block in agent._unreferenced_unreduced_blocks()] == ["tr.2"] @@ -420,7 +419,6 @@ def test_agent_act_context_keeps_pending_raw_after_latest_rotates(tmp_path, monk agent.execute_tool_calls([{"name": "Read", "intention": "read one", "args": _read_args("one.txt", line_range=[0, 1])}]) agent.execute_tool_calls([{"name": "Read", "intention": "read two", "args": _read_args("two.txt", line_range=[0, 1])}]) - assert agent.mode == nanocode.AgentMode.ACT assert "key=tr.1" in _blocks_text(agent.tool_context.recent) index, unreduced, latest = agent._format_act_tool_result_context() assert "one.txt" in unreduced @@ -973,9 +971,9 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): fake_client = FakeModelClient() agent.model_client = fake_client - response = agent.request("system", "user", activity="observe") + response = agent.request("system", "user", activity="agent") - assert response == {"actions": [{"type": "message", "text": "system/user/observe"}]} + assert response == {"actions": [{"type": "message", "text": "system/user/agent"}]} assert fake_client.calls == 2 assert session.state.status_notice == "" @@ -1348,7 +1346,7 @@ def walk(value, path="schema"): state_schemas = [nanocode._state_tool_schema(name) for name in nanocode.STATE_TOOL_PARAMS] repo_schemas = [tool.tool_schema() for tool in nanocode.TOOL_REGISTRY.values()] - for schema in [*state_schemas, *repo_schemas, nanocode.COMPACT_TOOL_SCHEMA]: + for schema in [*state_schemas, *repo_schemas]: walk(schema) @@ -1366,7 +1364,7 @@ def walk(value, path="schema"): state_schemas = [nanocode._state_tool_schema(name) for name in nanocode.STATE_TOOL_PARAMS] repo_schemas = [tool.tool_schema() for tool in nanocode.TOOL_REGISTRY.values()] - for schema in [*state_schemas, *repo_schemas, nanocode.COMPACT_TOOL_SCHEMA]: + for schema in [*state_schemas, *repo_schemas]: walk(schema) @@ -2617,13 +2615,12 @@ def test_agent_drops_old_feedback_after_successful_tool_progress(tmp_path): assert agent.agent_feedback_errors == [] -def test_tool_arg_error_does_not_force_observe(tmp_path): +def test_tool_arg_error_stays_visible_for_repair(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) agent.execute_tool_calls([{"name": "Read", "intention": "bad range", "args": _read_args("sample.txt", line_range=["bad", 1])}]) - assert agent.mode == nanocode.AgentMode.ACT assert agent.agent_feedback_errors @@ -2636,7 +2633,6 @@ def test_non_arg_tool_failure_stays_in_act_for_repair(tmp_path): confirm=lambda call, tool: True, ) - assert agent.mode == nanocode.AgentMode.ACT assert "exit 7" in _blocks_text(agent.tool_context.latest) @@ -3114,7 +3110,7 @@ def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): assert agent.blackboard.goal_reached is False -def test_agent_run_observe_checkpoint_allows_completion_without_known(tmp_path): +def test_agent_run_compact_checkpoint_allows_completion_without_known(tmp_path): (tmp_path / "sample.txt").write_text("alpha\n", encoding="utf-8") class FakeModelClient: diff --git a/tests/test_nanocode_commands.py b/tests/test_nanocode_commands.py index 789b489..489aba0 100644 --- a/tests/test_nanocode_commands.py +++ b/tests/test_nanocode_commands.py @@ -6,13 +6,13 @@ class FakeModelClient: - def __init__(self, summary="LLM compact summary"): - self.summary = summary + def __init__(self, snapshot="LLM working snapshot"): + self.snapshot = snapshot self.requests = [] def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): self.requests.append((system_prompt, user_prompt, activity)) - return {"summary": self.summary} + return {"snapshot": self.snapshot} def patch_openai_models(monkeypatch, models=None, error: Exception | None = None): @@ -519,7 +519,7 @@ def run_with_status(action): assert result.status == CommandStatus.HANDLED assert result.message == "Compacted context: 6 item(s)" assert status_calls == ["run"] - assert session.state.conversation[0].content == "Conversation compact summary:\nLLM compact summary" + assert session.state.conversation[0].content == "Working Context Snapshot:\nLLM working snapshot" def test_compact_command_reports_short_history(tmp_path): diff --git a/tests/test_nanocode_compactor.py b/tests/test_nanocode_compactor.py index 02cbcbe..7fa1107 100644 --- a/tests/test_nanocode_compactor.py +++ b/tests/test_nanocode_compactor.py @@ -3,20 +3,23 @@ class FakeModelClient: - def __init__(self, summary="LLM compact summary", known=None): - self.summary = summary + def __init__(self, snapshot="LLM working snapshot", known=None, response=None): + self.snapshot = snapshot self.known = known + self.response = response self.requests = [] - def request(self, system_prompt, user_prompt, *, activity="agent", **_kwargs): - self.requests.append((system_prompt, user_prompt, activity)) - response = {"summary": self.summary} + def request(self, system_prompt, user_prompt, *, activity="agent", **kwargs): + self.requests.append((system_prompt, user_prompt, activity, kwargs)) + if self.response is not None: + return self.response + response = {"snapshot": self.snapshot} if self.known is not None: response["known"] = self.known return response -def test_agent_compact_history_uses_llm_and_keeps_recent(tmp_path): +def test_agent_compact_history_builds_working_snapshot_and_keeps_recent(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) fake_client = FakeModelClient("LLM kept the old user request and assistant note.") @@ -40,11 +43,14 @@ def test_agent_compact_history_uses_llm_and_keeps_recent(tmp_path): assert isinstance(session.state.conversation[0], AssistantMessage) assert session.state.conversation[1].content == "keep 1" summary = session.state.conversation[0].content - assert summary == "Conversation compact summary:\nLLM kept the old user request and assistant note." + assert summary == "Working Context Snapshot:\nLLM kept the old user request and assistant note." assert agent.blackboard.known == ["old known", "keep known"] assert len(fake_client.requests) == 1 - _system_prompt, _user_prompt, activity = fake_client.requests[0] + _system_prompt, user_prompt, activity, kwargs = fake_client.requests[0] assert activity == "compact" + assert kwargs == {} + assert "Current_Blackboard" in user_prompt + assert "Existing_Facts" in user_prompt assert "" not in summary @@ -96,6 +102,44 @@ def test_agent_compact_history_preserves_known_sources(tmp_path): assert agent.blackboard.known[0].source == ("tr.1",) +def test_agent_compact_history_applies_snapshot_state(tmp_path): + session = Session(cwd=str(tmp_path)) + agent = Agent(session) + fake_client = FakeModelClient( + response={ + "snapshot": "Continue by editing app.py. Evidence: tr.1.", + "goal": "fix route", + "plan": [{"id": "p1", "text": "Patch route", "status": "doing", "context": "tr.1"}], + "leads": [{"id": "h1", "text": "route is stale", "source": ["tr.1"]}], + "checks": {"status": "blocked", "method": "pytest", "context": "missing dependency", "blocker": "environment"}, + "known": [{"text": "router lives in app.py", "source": ["tr.1"]}], + "user_rules": ["Keep tests targeted."], + } + ) + agent.compactor.model_client = fake_client + session.state.conversation = [ + UserMessage(content="old 1"), + UserMessage(content="old 2"), + UserMessage(content="old 3"), + UserMessage(content="keep 1"), + UserMessage(content="keep 2"), + UserMessage(content="keep 3"), + UserMessage(content="keep 4"), + UserMessage(content="keep 5"), + ] + + agent.compact_history() + + assert session.state.conversation[0].content == "Working Context Snapshot:\nContinue by editing app.py. Evidence: tr.1." + assert agent.blackboard.goal == "fix route" + assert agent.blackboard.plan == [nanocode.PlanItem(id="p1", text="Patch route", status=nanocode.PlanStatus.DOING, context="tr.1")] + assert agent.blackboard.leads == [nanocode.Lead(id="h1", text="route is stale", source=("tr.1",))] + assert agent.blackboard.checks.status == nanocode.CheckStatus.BLOCKED + assert agent.blackboard.checks.blocker == nanocode.CheckBlocker.ENVIRONMENT + assert agent.blackboard.known[0].source == ("tr.1",) + assert "Keep tests targeted." in agent.session.state.user_rules.content + + def test_agent_compact_history_skips_when_not_over_keep_recent(tmp_path): session = Session(cwd=str(tmp_path)) agent = Agent(session) @@ -114,3 +158,16 @@ def test_agent_compact_history_skips_when_not_over_keep_recent(tmp_path): assert count == 0 assert [item.content for item in session.state.conversation] == ["one", "two", "three", "four", "five"] assert fake_client.requests == [] + + +def test_agent_compact_history_keeps_full_conversation_log(tmp_path): + session = Session(cwd=str(tmp_path)) + agent = Agent(session) + agent.compactor.model_client = FakeModelClient() + for content in ["old", "keep 1", "keep 2", "keep 3", "keep 4", "keep 5"]: + session.append_conversation(UserMessage(content=content)) + + agent.compact_history() + + assert [item.content for item in session.state.conversation_log] == ["old", "keep 1", "keep 2", "keep 3", "keep 4", "keep 5"] + assert session.state.conversation[0].content == "Working Context Snapshot:\nLLM working snapshot" diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py index a33ebe6..db767f3 100644 --- a/tests/test_nanocode_loop.py +++ b/tests/test_nanocode_loop.py @@ -316,22 +316,9 @@ def __init__(self): captured = [] loop = AgentLoop(FakeAgent(), output_fn=captured.append) - loop._print_message("Tool Result Context: +tr.12 +tr.15 / -tr.8") + loop._print_message("Tool Result Context: +tr.12 +tr.15") - assert captured == [" ctx: +tr.12 +tr.15 / -tr.8"] - - -def test_agent_loop_renders_forgotten_tool_result_context_as_weak_status(tmp_path): - class FakeAgent: - def __init__(self): - self.session = make_session(tmp_path, model="model") - - captured = [] - loop = AgentLoop(FakeAgent(), output_fn=captured.append) - - loop._print_message("Tool Result Context: -tr.12 -tr.15") - - assert captured == [" ctx: -tr.12 -tr.15"] + assert captured == [" ctx: +tr.12 +tr.15"] def test_agent_loop_styles_compact_state_section_labels(tmp_path): From d4a46bdeca5e75bc88bb3287ae4c6bb00e48b045 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 06:42:41 -0700 Subject: [PATCH 24/29] Trim stale context code paths --- nanocode.py | 225 ++++++++++------------------- tests/test_nanocode_bash_tool.py | 5 +- tests/test_nanocode_git_tool.py | 2 +- tests/test_nanocode_search_tool.py | 2 +- 4 files changed, 83 insertions(+), 151 deletions(-) diff --git a/nanocode.py b/nanocode.py index 92b1f38..a73e99e 100644 --- a/nanocode.py +++ b/nanocode.py @@ -29,12 +29,13 @@ import time import tomllib import uuid -from contextlib import nullcontext +from collections.abc import Callable, Iterable, Iterator +from contextlib import nullcontext, suppress from dataclasses import dataclass, field from datetime import datetime from enum import StrEnum -from typing import Any, Callable, ClassVar, Iterator, Iterable, Self, Type, TypeAlias +from typing import Any, ClassVar, Self, TypeAlias from urllib.parse import urlparse from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError, OpenAI @@ -236,10 +237,7 @@ def format_item(item: "KnownItem | str") -> str: @classmethod def from_json(cls, value: JsonValue) -> "KnownItem | None": item = _json_dict(value) - if item: - fact = (_json_str(item.get("text")) or _json_str(item.get("fact")) or "").strip() - else: - fact = (_json_str(value) or "").strip() + fact = (_json_str(item.get("text")) or _json_str(item.get("fact")) or "").strip() if item else (_json_str(value) or "").strip() if not fact: return None if fact.startswith("<") and fact.endswith(">"): @@ -365,7 +363,7 @@ class UserRules: @classmethod def load(cls, path: str) -> "UserRules": try: - with open(path, "r", encoding="utf-8") as file: + with open(path, encoding="utf-8") as file: return cls(file.read().strip()) except FileNotFoundError: return cls() @@ -426,9 +424,6 @@ def referenced_result_keys(self) -> set[str]: keys.update(TOOL_RESULT_KEY_REF_PATTERN.findall(str(text))) return {key for key in keys if key.startswith("tr.")} - def protected_result_sources(self) -> dict[str, str]: - return {key: "active lead" for item in self.leads if item.status == LeadStatus.ACTIVE for key in item.source if key.startswith("tr.")} - @dataclass(frozen=True) class ChatReasoningRule: @@ -1251,7 +1246,7 @@ def requires_confirmation(self, session: Session) -> bool: return self.REQUIRES_CONFIRMATION if self.REQUIRES_CONFIRMATION is not None else self.EFFECT == ToolEffect.EDIT -ToolClass: TypeAlias = Type[Tool] +ToolClass: TypeAlias = type[Tool] @dataclass @@ -1270,7 +1265,7 @@ class ToolCallExecution: call: ParsedToolCall outcome: str output: str - error_type: Type[Exception] | None = None + error_type: type[Exception] | None = None result_key: str = "" result_excerpted: bool = False requires_checks: bool = False @@ -1419,9 +1414,6 @@ def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = N seen.add(key) return blocks - def raw_context_chars(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> int: - return len("\n\n".join(self.unreduced_recent_blocks(checkpoint, exclude_keys=exclude_keys) + self.latest_raw_blocks(exclude_keys=exclude_keys))) - def _needs_reduction(self, block: str, checkpoint: int) -> bool: key = self.result_key(block) return self.is_full_block(block) and (self.result_counter(block) > checkpoint or key in self.reactivated_keys) @@ -1538,8 +1530,7 @@ def format_file_context(cls, blocks: list[str], *, cwd: str = "", max_chars: int if omitted: lines.append("Omitted stale content:") for path in sorted(omitted): - for source in sorted(omitted[path], key=cls._result_key_counter): - lines.append("- " + path + " source=" + source + " stale_lines=" + str(omitted[path][source])) + lines.extend("- " + path + " source=" + source + " stale_lines=" + str(omitted[path][source]) for source in sorted(omitted[path], key=cls._result_key_counter)) lines.append("") rendered = "\n".join(lines).rstrip() @@ -1658,7 +1649,7 @@ def _read_file_context_lines(path: str, *, cwd: str, line_numbers: set[int]) -> max_line = max(wanted) lines: dict[int, str] = {} try: - with open(filepath, "r", encoding="utf-8") as f: + with open(filepath, encoding="utf-8") as f: for index, line in enumerate(f): if index in wanted: lines[index] = line @@ -1708,14 +1699,17 @@ def _file_context_output_items( if not path: continue mtime_ns, size = cls._file_context_section_stat(section) - for clear_match in re.finditer(r"(?m)^[ \t]*(\d+):(\d+)", section): - items.append(FileContextItem(order, 0, "clear", source, path, int(clear_match.group(1)), int(clear_match.group(2)), "", mtime_ns, size)) + items.extend( + FileContextItem(order, 0, "clear", source, path, int(clear_match.group(1)), int(clear_match.group(2)), "", mtime_ns, size) + for clear_match in re.finditer(r"(?m)^[ \t]*(\d+):(\d+)", section) + ) for match in re.finditer(r"(?ms)^[ \t]*\n(.*?)^[ \t]*", section): content = match.group(1) - for line in content.splitlines(): - line_match = re.match(r"(\d+):[0-9a-f]{6}\|", line) - if line_match: - items.append(FileContextItem(order, 1, "line", source, path, int(line_match.group(1)), 0, line, mtime_ns, size)) + items.extend( + FileContextItem(order, 1, "line", source, path, int(line_match.group(1)), 0, line, mtime_ns, size) + for line in content.splitlines() + if (line_match := re.match(r"(\d+):[0-9a-f]{6}\|", line)) + ) return items @staticmethod @@ -1824,19 +1818,6 @@ def _file_context_file_sections(output: str, *, default_path: str) -> Iterator[t path_match = re.search(r"(.*?)", section) yield (path_match.group(1).strip() if path_match else default_path), section - @classmethod - def bound_block(cls, block: str, *, max_chars: int) -> str: - if len(block) <= max_chars: - return block - if not cls.is_full_block(block): - return _shorten(block, max_chars) - header, output = block.split("\n output:\n", 1) - separator = "\n output:\n" - output_budget = max_chars - len(header) - len(separator) - if output_budget <= 0: - return _shorten(cls.compact_block(block), max_chars) - return header + separator + _bound_tool_output(output, max_chars=output_budget).value - @classmethod def result_key(cls, block: str) -> str: match = RESULT_KEY_PATTERN.search(block) @@ -1883,7 +1864,7 @@ def __init__(self, path: str): def acquire(self) -> None: os.makedirs(os.path.dirname(self.path), exist_ok=True) - self.file = open(self.path, "a+", encoding="utf-8") + self.file = open(self.path, "a+", encoding="utf-8") # noqa: SIM115 - lock file stays open while held try: fcntl.flock(self.file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) except OSError: @@ -1901,10 +1882,8 @@ def release(self) -> None: fcntl.flock(self.file.fileno(), fcntl.LOCK_UN) self.file.close() self.file = None - try: + with suppress(OSError): os.remove(self.path) - except OSError: - pass def __enter__(self) -> Self: self.acquire() @@ -1944,10 +1923,8 @@ def clean_sessions(session: Session, *, older_than_seconds: int = 0) -> None: continue if SessionLock.is_locked(os.path.join(session_dir, "session.lock")): continue - try: + with suppress(OSError): shutil.rmtree(session_dir) - except OSError: - pass ############################ @@ -2208,10 +2185,10 @@ def _read_range(self, start: int, end: int, *, filepath: str) -> tuple[str, int, truncated = False bounded_read_lines = end - start if end else 0 if end and bounded_read_lines <= self.MAX_LINES: - with open(filepath, "r", encoding="utf-8") as f: + with open(filepath, encoding="utf-8") as f: selected_lines = list(itertools.islice(f, start, end)) else: - with open(filepath, "r", encoding="utf-8") as f: + with open(filepath, encoding="utf-8") as f: for index, line in enumerate(f): total_lines = index + 1 if index < start: @@ -2292,7 +2269,7 @@ def call(self) -> str: return "" + str(total) + "" total = 0 for filepath in self.filepaths: - with open(filepath, "r", encoding="utf-8", errors="replace") as file: + with open(filepath, encoding="utf-8", errors="replace") as file: total += sum(1 for _ in file) return "" + str(total) + "" @@ -2353,8 +2330,7 @@ def call(self) -> str: entries.append({"name": entry.name, "path": entry.path, "type": entry_type}) entries.sort(key=lambda item: (sort_order.get(str(item["type"]), 4), str(item["name"]))) lines = [""] - for e in entries: - lines.append(f"* ({e['type']}): {os.path.relpath(str(e['path']), self.cwd)}") + lines.extend(f"* ({e['type']}): {os.path.relpath(str(e['path']), self.cwd)}" for e in entries) lines.append("") return "\n".join(lines) @@ -2552,10 +2528,10 @@ def _load_gitignore_patterns(cwd: str) -> list[str]: path = os.path.join(cwd, ".gitignore") patterns = [] try: - with open(path, "r", encoding="utf-8", errors="ignore") as f: + with open(path, encoding="utf-8", errors="ignore") as f: for line in f: pattern = line.strip() - if not pattern or pattern.startswith("#") or pattern.startswith("!"): + if not pattern or pattern.startswith(("#", "!")): continue patterns.append(pattern.lstrip("/")) except OSError: @@ -2572,10 +2548,7 @@ def _is_gitignored(self, path: str) -> bool: if not pattern: continue if directory_only: - if "/" in pattern: - matched = relpath == pattern or relpath.startswith(pattern + "/") - else: - matched = pattern in parts + matched = (relpath == pattern or relpath.startswith(pattern + "/")) if "/" in pattern else pattern in parts if matched: return True continue @@ -2617,7 +2590,7 @@ def _read_match_context(self, path: str, line_number: int) -> list[tuple[int, st try: if os.path.getsize(path) > self.MAX_FILE_BYTES: return [] - with open(path, "r", encoding="utf-8", errors="ignore") as f: + with open(path, encoding="utf-8", errors="ignore") as f: for lineno, line in enumerate(f, start=1): if lineno > end: break @@ -2688,14 +2661,14 @@ def _rg_command(self, rg: str, *, pcre2: bool = False) -> list[str]: def _call_rg(self, rg: str) -> str: pcre2 = False try: - proc = subprocess.run(self._rg_command(rg), text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=30) + proc = subprocess.run(self._rg_command(rg), text=True, capture_output=True, timeout=30) except subprocess.TimeoutExpired: raise ToolCallError("rg timed out") stderr = proc.stderr.lower() if proc.returncode not in (0, 1) and "pcre2" in stderr and ("look-around" in stderr or "look-ahead" in stderr or "look-behind" in stderr): pcre2 = True try: - proc = subprocess.run(self._rg_command(rg, pcre2=True), text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=30) + proc = subprocess.run(self._rg_command(rg, pcre2=True), text=True, capture_output=True, timeout=30) except subprocess.TimeoutExpired: raise ToolCallError("rg timed out") if proc.returncode not in (0, 1): @@ -2737,7 +2710,7 @@ def _call_python(self) -> str: try: if os.path.getsize(path) > self.MAX_FILE_BYTES: continue - with open(path, "r", encoding="utf-8", errors="ignore") as f: + with open(path, encoding="utf-8", errors="ignore") as f: for lineno, line in enumerate(f, start=1): text = line.rstrip("\n") if not self._line_matches(text): @@ -2761,7 +2734,7 @@ def _call_python_multiline(self) -> str: try: if os.path.getsize(path) > self.MAX_FILE_BYTES: continue - with open(path, "r", encoding="utf-8", errors="ignore") as f: + with open(path, encoding="utf-8", errors="ignore") as f: content = f.read() for match in regex.finditer(content): line_number = content.count("\n", 0, match.start()) + 1 @@ -3412,7 +3385,7 @@ def _format_hashline_content(start: int, lines: list[str], *, indent: str) -> li def _preview(self) -> tuple[str, str, list[tuple[int, int, list[str]]]]: try: - with open(self.filepath, "r", encoding="utf-8") as f: + with open(self.filepath, encoding="utf-8") as f: original = f.read() except FileNotFoundError: raise ToolCallError("file does not exist; use CreateFile for new files") @@ -3618,14 +3591,10 @@ def _read_stream_chunk( except OSError: data = b"" if not data: - try: + with suppress(Exception): selector.unregister(key.fileobj) - except Exception: - pass - try: + with suppress(Exception): key.fileobj.close() - except Exception: - pass return False text = data.decode("utf-8", errors="replace") stream = "stdout" if key.data == "stdout" else "stderr" @@ -3634,10 +3603,8 @@ def _read_stream_chunk( else: stderr_parts.append(text) if live_output is not None: - try: + with suppress(Exception): live_output(stream, text) - except Exception: - pass return True @@ -3700,8 +3667,7 @@ def call(self) -> str: [self.git_path, *self.args], cwd=self.cwd, text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, timeout=self.timeout, ) return _format_process_result("GitToolResult", proc.returncode, proc.stdout, proc.stderr) @@ -4795,10 +4761,9 @@ def _arm_stream_timeout(self, *, request_deadline: float, first_output_seen: boo if remaining <= 0: raise ModelRequestTimeout("request model timeout") self._timeout_reason = "request model timeout" - if not first_output_seen and first_token_timeout is not None and first_token_timeout > 0: - if first_token_timeout < remaining: - remaining = first_token_timeout - self._timeout_reason = "request first token timeout" + if not first_output_seen and first_token_timeout is not None and 0 < first_token_timeout < remaining: + remaining = first_token_timeout + self._timeout_reason = "request first token timeout" signal.setitimer(signal.ITIMER_REAL, remaining) def _invalid_model_response(self, content: str, reason: str = "expected a function tool call") -> Json: @@ -4831,10 +4796,7 @@ def _responses_content(self, result: JsonValue) -> str | None: for item in _json_list(data.get("output")): if _json_str(_json_dict(item).get("type")) != "message": continue - for content in _json_list(_json_dict(item).get("content")): - text = _json_dict(content).get("text") - if isinstance(text, str): - parts.append(text) + parts.extend(text for content in _json_list(_json_dict(item).get("content")) if isinstance(text := _json_dict(content).get("text"), str)) return "".join(parts) if parts else None def _format_missing_message_content(self, result: JsonValue) -> str: @@ -4851,7 +4813,7 @@ def _format_missing_message_content(self, result: JsonValue) -> str: message = _json_dict(choice.get("message")) details: Json = { "finish_reason": choice.get("finish_reason"), - "message_keys": sorted(str(key) for key in message.keys()), + "message_keys": sorted(str(key) for key in message), } return "API response missing message content: " + json.dumps(details, ensure_ascii=False) @@ -4905,11 +4867,8 @@ def latest_report(cls, executions: list[ToolCallExecution]) -> str: return "" offset = max(0, len(executions) - cls.DISPLAY_LIMIT) visible = executions[offset:] - lines = [] - if offset: - lines.append(" ... " + str(offset) + " older") - for execution in visible: - lines.append(cls._format_execution(execution)) + lines = [" ... " + str(offset) + " older"] if offset else [] + lines.extend(cls._format_execution(execution) for execution in visible) return "\n".join(lines) @classmethod @@ -4969,7 +4928,7 @@ def execute( call: ParsedToolCall | None = None outcome = "success" output = "" - error_type: Type[Exception] | None = None + error_type: type[Exception] | None = None requires_confirmation = False requires_checks = False try: @@ -5369,7 +5328,8 @@ def _apply_plan_patches(self, plan: list[PlanItem], value: JsonValue) -> bool: changed = True return changed - def _plan_item_from_json(self, value: JsonValue) -> PlanItem | None: + @staticmethod + def _plan_item_from_json(value: JsonValue, *, include_followups: bool = True) -> PlanItem | None: if isinstance(value, str): text = value.strip() return PlanItem(text=text) if text else None @@ -5385,8 +5345,8 @@ def _plan_item_from_json(self, value: JsonValue) -> PlanItem | None: status=PlanStatus(status), id=_json_str(item.get("id")) or "", context=_json_str(item.get("context")) or "", - followup_action=self._plan_followup(item.get("followup_action")), - followup_check=self._plan_followup(item.get("followup_check")), + followup_action=AgentStateUpdater._plan_followup(item.get("followup_action")) if include_followups else PlanFollowup(), + followup_check=AgentStateUpdater._plan_followup(item.get("followup_check")) if include_followups else PlanFollowup(), ) @staticmethod @@ -5547,8 +5507,8 @@ def compact(self, *, tool_results: str = "", recent_edits: list[str] | None = No tool_results = tool_results.strip() if count <= self.KEEP_RECENT and not tool_results: return 0 - old_items = self.session.state.conversation[: -self.KEEP_RECENT] if count > self.KEEP_RECENT else [] - keep_items = self.session.state.conversation[-self.KEEP_RECENT :] if count > self.KEEP_RECENT else list(self.session.state.conversation) + old_items = self.session.state.conversation[: -self.KEEP_RECENT] + keep_items = self.session.state.conversation[-self.KEEP_RECENT :] snapshot = self._summarize(old_items, tool_results=tool_results, recent_edits=recent_edits or []) self.session.state.conversation = [AssistantMessage(content=self.SNAPSHOT_HEADER + "\n" + snapshot)] + keep_items return count + (1 if tool_results else 0) @@ -5570,17 +5530,20 @@ def _summarize(self, items: list[ConversationItem], *, tool_results: str = "", r return snapshot.strip() def _format_blackboard(self) -> str: - lines = ["Goal:", self.blackboard.goal or "(empty)", "", "Plan:"] - lines.append("\n".join(item.format() for item in self.blackboard.plan) if self.blackboard.plan else "(empty)") - lines.extend(["", "Leads:", "\n".join(item.format() for item in self.blackboard.leads) if self.blackboard.leads else "(empty)"]) - lines.extend(["", "Checks:", self.blackboard.checks.format() if self.blackboard.checks.has_context() else "(empty)"]) - return "\n".join(lines) + return "\n\n".join( + ( + "Goal:\n" + (self.blackboard.goal or "(empty)"), + "Plan:\n" + ("\n".join(item.format() for item in self.blackboard.plan) or "(empty)"), + "Leads:\n" + ("\n".join(item.format() for item in self.blackboard.leads) or "(empty)"), + "Checks:\n" + (self.blackboard.checks.format() if self.blackboard.checks.has_context() else "(empty)"), + ) + ) def _apply_snapshot_state(self, response: Json) -> None: goal = (_json_str(response.get("goal")) or "").strip() if goal: self.blackboard.goal = goal - plan = [item for item in (self._plan_item_from_json(raw) for raw in _json_list(response.get("plan"))) if item] + plan = [item for item in (AgentStateUpdater._plan_item_from_json(raw, include_followups=False) for raw in _json_list(response.get("plan"))) if item] if plan: self.blackboard.plan = plan leads = [item for item in (Lead.from_json(raw) for raw in _json_list(response.get("leads"))) if item] @@ -5600,44 +5563,26 @@ def _apply_snapshot_state(self, response: Json) -> None: if rules_changed: self.session.save_user_rules() - @staticmethod - def _plan_item_from_json(value: JsonValue) -> PlanItem | None: - if isinstance(value, str): - text = value.strip() - return PlanItem(text=text) if text else None - item = _json_dict(value) - text = _json_str(item.get("text")) or "" - if not text: - return None - status = _json_str(item.get("status")) or PlanStatus.TODO - if status not in ALL_PLAN_STATUSES: - status = PlanStatus.TODO - return PlanItem(text=text, status=PlanStatus(status), id=_json_str(item.get("id")) or "", context=_json_str(item.get("context")) or "") - def _apply_snapshot_checks(self, item: Json) -> None: if not item: return status = _json_str(item.get("status")) or "" if status in frozenset(CheckStatus): self.blackboard.checks.status = CheckStatus(status) - if "method" in item: - self.blackboard.checks.method = _json_str(item.get("method")) or "" - if "context" in item: - self.blackboard.checks.context = _json_str(item.get("context")) or "" + for field_name in ("method", "context"): + if field_name in item: + setattr(self.blackboard.checks, field_name, _json_str(item.get(field_name)) or "") blocker = _json_str(item.get("blocker")) or "" if blocker in ALL_CHECK_BLOCKERS: self.blackboard.checks.blocker = CheckBlocker(blocker) @staticmethod def _response_json(response: Json) -> Json: - if response and response.get("_assistant_text") is None: + if response and "_assistant_text" not in response: return response text = (_json_str(response.get("_assistant_text")) or "").strip() - for pattern in (r"(?ms)^```(?:json)?\s*(.*?)\s*```$", r"(?ms)```(?:json)?\s*(.*?)\s*```"): - match = re.search(pattern, text) - if match: - text = match.group(1).strip() - break + if match := re.search(r"(?ms)```(?:json)?\s*(.*?)\s*```", text): + text = match.group(1).strip() try: parsed = json.loads(text) except json.JSONDecodeError: @@ -5936,10 +5881,9 @@ def run_loop( format_error = _json_str(response.get("_format_error")) if format_error: consecutive_format_errors += 1 - if consecutive_format_errors >= self.MAX_CONSECUTIVE_FORMAT_ERRORS: - if on_format_error_limit is not None: - self._remember_format_gate(format_error) - return on_format_error_limit(response, format_error) + if consecutive_format_errors >= self.MAX_CONSECUTIVE_FORMAT_ERRORS and on_format_error_limit is not None: + self._remember_format_gate(format_error) + return on_format_error_limit(response, format_error) self._handle_format_gate(response, format_error, consecutive_format_errors, on_message) continue consecutive_format_errors = 0 @@ -6206,11 +6150,7 @@ def on_stream_action(action: Json) -> bool: ) if is_tool: streamed_tool_batch_started = True - if latest_result.done or self.stream_stop_requested: - return True - if is_tool and any(execution.outcome != "success" for execution in self.tool_runner.latest_executions): - return True - return False + return latest_result.done or self.stream_stop_requested or (is_tool and any(execution.outcome != "success" for execution in self.tool_runner.latest_executions)) system_prompt, user_prompt, activity, tool_schemas = self._prepare_request_context() response = self.request( @@ -6418,7 +6358,7 @@ def _validate_action_response(self, response: Json) -> Json | None: action_bad_outputs.append(bad_output) if action_errors: return self._invalid_action_response(response, "; ".join(action_errors), "\n".join(action_bad_outputs) or None) - extra_keys = sorted(str(key) for key in response.keys() if key not in {"actions", "_assistant_text"} and not str(key).startswith("_format_")) + extra_keys = sorted(str(key) for key in response if key not in {"actions", "_assistant_text"} and not str(key).startswith("_format_")) if extra_keys: return self._invalid_action_response(response, "unexpected top-level keys: " + ", ".join(extra_keys)) return None @@ -7698,10 +7638,8 @@ def __exit__(self, *args) -> None: signal.signal(signal.SIGQUIT, self.previous_handler) self.previous_handler = None if self.fd is not None and self.original_attrs is not None: - try: + with suppress(termios.error): termios.tcsetattr(self.fd, termios.TCSADRAIN, self.original_attrs) - except termios.error: - pass self.fd = None self.original_attrs = None @@ -7888,10 +7826,8 @@ def _stop_runtime_ui(self) -> bool: self._runtime_ui_ready.wait(timeout=0.2) app = self._runtime_ui_app if app is not None: - try: + with suppress(Exception): app.exit() - except Exception: - pass thread.join(timeout=0.8) stopped = not thread.is_alive() if stopped: @@ -8268,10 +8204,9 @@ def _select_choice( self._emit("Invalid selection: " + raw_choice) def _select_model(self, models: tuple[str, ...], current_model: str) -> SelectionResult: - labels = {current_model: current_model + " (current)"} if current_model in models else {} - for label in CommandDispatcher.MODEL_LABELS: - if label in models: - labels[label] = label + labels = {label: label for label in CommandDispatcher.MODEL_LABELS if label in models} + if current_model in models: + labels[current_model] = current_model + " (current)" while True: selected = self._select_choice("Model", models, labels, current=current_model, disabled=set(CommandDispatcher.MODEL_LABELS)) if not isinstance(selected, str) or selected not in CommandDispatcher.MODEL_LABELS: @@ -8298,10 +8233,8 @@ def _discard_pending_tty_input(self) -> None: import termios except ImportError: return - try: + with suppress(AttributeError, OSError, termios.error): termios.tcflush(sys.stdin.fileno(), termios.TCIFLUSH) - except (AttributeError, OSError, termios.error): - pass def _make_prompt_session(self): os.makedirs(os.path.dirname(self.history_path), exist_ok=True) @@ -8572,7 +8505,7 @@ def _print_message(self, message: str) -> None: self._emit_segments([("ansicyan", message + "\n")], message) def _is_tool_call_line(self, line: str) -> bool: - return line.startswith("[success] ") or line.startswith("[failure] ") + return line.startswith(("[success] ", "[failure] ")) def _emit_segments(self, segments: list[tuple[str, str]], plain: str, *, end: str = "\n") -> None: if self.output_fn is print: diff --git a/tests/test_nanocode_bash_tool.py b/tests/test_nanocode_bash_tool.py index 15f3dbe..8d34516 100644 --- a/tests/test_nanocode_bash_tool.py +++ b/tests/test_nanocode_bash_tool.py @@ -1,6 +1,7 @@ import os import signal import time +from contextlib import suppress from nanocode import BashTool, RuntimeSettings, Session @@ -89,7 +90,5 @@ def interrupt_on_output(selector, key, stdout_parts, stderr_parts, live_output=N finally: if pid_file.exists(): pid = int(pid_file.read_text(encoding="utf-8").strip()) - try: + with suppress(OSError): os.killpg(pid, signal.SIGKILL) - except OSError: - pass diff --git a/tests/test_nanocode_git_tool.py b/tests/test_nanocode_git_tool.py index 8a3b4ed..7440108 100644 --- a/tests/test_nanocode_git_tool.py +++ b/tests/test_nanocode_git_tool.py @@ -9,7 +9,7 @@ def test_git_tool_runs_readonly_git_command(tmp_path): if not shutil.which("git"): pytest.skip("git not installed") - subprocess.run(["git", "init"], cwd=tmp_path, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + subprocess.run(["git", "init"], cwd=tmp_path, check=True, capture_output=True, text=True) (tmp_path / "sample.txt").write_text("hello\n", encoding="utf-8") session = Session(cwd=str(tmp_path)) diff --git a/tests/test_nanocode_search_tool.py b/tests/test_nanocode_search_tool.py index 1948ca0..73799a9 100644 --- a/tests/test_nanocode_search_tool.py +++ b/tests/test_nanocode_search_tool.py @@ -202,7 +202,7 @@ def test_search_tool_retries_rg_with_pcre2_for_lookaround(tmp_path, monkeypatch) session = Session(cwd=str(tmp_path)) calls = [] - def fake_run(cmd, text, stdout, stderr, timeout): + def fake_run(cmd, text, capture_output, timeout): calls.append(cmd) if "--pcre2" not in cmd: return nanocode.subprocess.CompletedProcess( From 237b4bfe3b5f54da11a78554f618fe1b8c4d0820 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 06:56:33 -0700 Subject: [PATCH 25/29] Remove recalled context status output --- nanocode.py | 24 +++++------------------- tests/test_nanocode_loop.py | 13 ------------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/nanocode.py b/nanocode.py index a73e99e..059bbcb 100644 --- a/nanocode.py +++ b/nanocode.py @@ -5673,7 +5673,6 @@ def __init__(self, session: Session): self.failed_tool_call_key: tuple[str, tuple[str, ...]] | None = None self.failed_tool_call_count = 0 self.agent_feedback_errors: list[str] = [] - self.latest_recalled_result_keys: list[str] = [] self.task_alignment_required = False self.incomplete_task_context_at_turn_start = False self.stream_stop_requested = False @@ -6209,7 +6208,6 @@ def execute_tool_calls( append_to_latest: bool = False, ) -> str: self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) - self.latest_recalled_result_keys = [] regular_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name not in CONTEXT_TOOL_NAMES] if regular_executions: self.tool_context.append_latest( @@ -6239,13 +6237,11 @@ def _apply_context_tool_executions( continue if execution.call.name == ToolResultTool.NAME: blocks = ToolResultContext.recalled_result_blocks(ToolResultContext.format_execution(execution)) - self.latest_recalled_result_keys.extend( - self.tool_context.reactivate_result_blocks( - blocks, - max_index_items=self.context_budget().index_items, - checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, - append=append_to_latest or bool(self.tool_context.latest), - ) + self.tool_context.reactivate_result_blocks( + blocks, + max_index_items=self.context_budget().index_items, + checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, + append=append_to_latest or bool(self.tool_context.latest), ) def _unreferenced_unreduced_blocks(self) -> list[str]: @@ -6708,16 +6704,11 @@ def _run_tool_actions( report = ToolCallDisplayFormatter.latest_report(self.tool_runner.latest_executions) if report: on_message(report) - self._emit_recalled_context_update(on_message) if self.session.settings.debug and self.tool_runner.skipped_after_failure_count: on_message(f"Tool Calls Skipped: {self.tool_runner.skipped_after_failure_count} after {self.tool_runner.skipped_after_failure_key} failed") self.apply_context_budget() return True - def _emit_recalled_context_update(self, on_message: MessageCallback | None) -> None: - if on_message is not None and self.latest_recalled_result_keys: - on_message("Tool Result Context: " + " ".join("+" + key for key in self.latest_recalled_result_keys)) - def _finish_or_continue(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult: completion_gate = self._gate_completion(ctx, on_message) if completion_gate is not None: @@ -6785,7 +6776,6 @@ def run( checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, ) self._prune_tool_result_store() - self.latest_recalled_result_keys = [] self.session.state.turn_tool_calls = 0 self.session.state.turn_model_calls = 0 old_goal = self.blackboard.goal @@ -8473,10 +8463,6 @@ def _print_message(self, message: str) -> None: ): self._emit_segments(self._compact_state_segments(message), message) return - if message.startswith("Tool Result Context:"): - plain = " ctx: " + message.removeprefix("Tool Result Context:").strip() - self._emit_segments([("ansibrightblack", plain + "\n")], plain) - return if message.startswith("Tool Calls Skipped:"): plain = " skipped: " + message.removeprefix("Tool Calls Skipped:").strip() self._emit_segments([("ansibrightblack", plain + "\n")], plain) diff --git a/tests/test_nanocode_loop.py b/tests/test_nanocode_loop.py index db767f3..84255a0 100644 --- a/tests/test_nanocode_loop.py +++ b/tests/test_nanocode_loop.py @@ -308,19 +308,6 @@ def __init__(self): assert captured == [" Read sample.txt 0:1"] -def test_agent_loop_renders_tool_result_context_as_weak_status(tmp_path): - class FakeAgent: - def __init__(self): - self.session = make_session(tmp_path, model="model") - - captured = [] - loop = AgentLoop(FakeAgent(), output_fn=captured.append) - - loop._print_message("Tool Result Context: +tr.12 +tr.15") - - assert captured == [" ctx: +tr.12 +tr.15"] - - def test_agent_loop_styles_compact_state_section_labels(tmp_path): class FakeAgent: def __init__(self): From a0f300ca295f35a5ced8f2894c5b6097c9566771 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 07:20:21 -0700 Subject: [PATCH 26/29] Trim redundant code paths --- nanocode.py | 262 +++++++++++++++++----------------------------------- 1 file changed, 83 insertions(+), 179 deletions(-) diff --git a/nanocode.py b/nanocode.py index 059bbcb..012d2a5 100644 --- a/nanocode.py +++ b/nanocode.py @@ -734,13 +734,9 @@ def str_tuple(cls, config: Json, key: str) -> tuple[str, ...]: return () if not isinstance(value, list): raise ConfigError(f"config value `{key}` must be a string array") - models = [] - for item in value: - if not isinstance(item, str): - raise ConfigError(f"config value `{key}` must be a string array") - if item := item.strip(): - models.append(item) - return tuple(models) + if any(not isinstance(item, str) for item in value): + raise ConfigError(f"config value `{key}` must be a string array") + return tuple(item for raw in value if (item := raw.strip())) class ConfigFile: @@ -802,8 +798,7 @@ def init(cls, path: str | None = None) -> tuple[str, bool]: config_path = os.path.expanduser(path) if path else cls.path() if os.path.exists(config_path): return config_path, False - parent = os.path.dirname(config_path) - if parent: + if parent := os.path.dirname(config_path): os.makedirs(parent, exist_ok=True) with open(config_path, "w", encoding="utf-8") as file: file.write(cls.DEFAULT_TEXT) @@ -892,15 +887,11 @@ def from_config_data(cls, data: Json, *, yolo: bool = False, debug: bool = False def resolve_path(self, path: str) -> str: path = os.path.expanduser(path) - if not os.path.isabs(path): - path = os.path.join(self.cwd, path) - return os.path.abspath(path) + return os.path.abspath(path if os.path.isabs(path) else os.path.join(self.cwd, path)) def data_path(self, *parts: str) -> str: base = os.path.expanduser(self.config.data_dir) - if not os.path.isabs(base): - base = os.path.join(self.cwd, base) - return os.path.abspath(os.path.join(base, *parts)) + return os.path.abspath(os.path.join(base if os.path.isabs(base) else os.path.join(self.cwd, base), *parts)) def is_path_in_cwd(self, path: str) -> bool: cwd = os.path.realpath(self.cwd) @@ -996,11 +987,7 @@ def response_summary(response: Json) -> Json: @staticmethod def tool_names(tool_schemas: list[Json] | None) -> list[str]: - names = [] - for schema in tool_schemas or []: - function = _json_dict(schema.get("function")) or schema - names.append(_json_str(function.get("name")) or "(unknown)") - return names + return [_json_str((_json_dict(schema.get("function")) or schema).get("name")) or "(unknown)" for schema in tool_schemas or []] @classmethod def model_request( @@ -1176,13 +1163,7 @@ class Tool: @classmethod def cli_args(cls, args: list[JsonValue]) -> list[str]: - tokens: list[str] = [] - for arg in args: - if isinstance(arg, dict): - tokens.extend(cls.cli_object_args(arg)) - else: - tokens.append(cls.cli_token(arg)) - return tokens + return [token for arg in args for token in (cls.cli_object_args(arg) if isinstance(arg, dict) else [cls.cli_token(arg)])] @classmethod def cli_object_args(cls, value: Json) -> list[str]: @@ -1285,7 +1266,7 @@ def _tool_output_line_count(output: str) -> int: return output.count("\n") + (0 if output.endswith("\n") else 1) -def _bound_tool_output(output: str, *, log_path: str = "", max_chars: int = MAX_TOOL_OUTPUT_CHARS) -> BoundedToolOutput: +def _bound_tool_output(output: str, *, max_chars: int = MAX_TOOL_OUTPUT_CHARS) -> BoundedToolOutput: original_chars = len(output) original_lines = _tool_output_line_count(output) if original_chars <= max_chars: @@ -1368,14 +1349,8 @@ def compact_observed(self, observed_blocks: list[str]) -> None: if not observed: return self.reactivated_keys.difference_update(observed_keys) - - def compact(block: str) -> str: - if self.is_full_block(block) and self.result_counter(block) in observed: - return self.compact_block(block) - return block - - self.recent = [compact(block) for block in self.recent] - self.latest = [compact(block) for block in self.latest] + self.recent = [self.compact_block(block) if self.is_full_block(block) and self.result_counter(block) in observed else block for block in self.recent] + self.latest = [self.compact_block(block) if self.is_full_block(block) and self.result_counter(block) in observed else block for block in self.latest] def current_timeline_blocks(self) -> list[str]: seen: set[str] = set() @@ -1389,18 +1364,15 @@ def current_timeline_blocks(self) -> list[str]: blocks.append(self.compact_block(block)) return blocks - def latest_raw_blocks(self, *, exclude_keys: set[str] | None = None) -> list[str]: - excluded = exclude_keys or set() - return [block for block in self.latest if self.is_full_block(block) and self.result_key(block) not in excluded] + def latest_raw_blocks(self) -> list[str]: + return [block for block in self.latest if self.is_full_block(block)] - def unreduced_recent_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> list[str]: - excluded = exclude_keys or set() + def unreduced_recent_blocks(self, checkpoint: int) -> list[str]: latest_keys = set(self.blocks_by_key(self.latest)) return [ block for block in self.recent - for key in [self.result_key(block)] - if key not in latest_keys and key not in excluded and self._needs_reduction(block, checkpoint) + if self.result_key(block) not in latest_keys and self._needs_reduction(block, checkpoint) ] def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = None) -> list[str]: @@ -1434,11 +1406,11 @@ def format_execution(execution: ToolCallExecution) -> str: lines.extend([" output:", execution.output]) return "\n".join(lines) - def reactivate_result_blocks(self, blocks: list[str], *, max_index_items: int, checkpoint: int, append: bool = False) -> list[str]: + def reactivate_result_blocks(self, blocks: list[str], *, max_index_items: int, checkpoint: int, append: bool = False) -> None: blocks = [block for block in blocks if self.is_full_block(block) and self.result_key(block)] keys = set(self.blocks_by_key(blocks)) if not keys: - return [] + return self.recent = [block for block in self.recent if self.result_key(block) not in keys] self.latest = [block for block in self.latest if self.result_key(block) not in keys] self.reactivated_keys.update(keys) @@ -1446,7 +1418,6 @@ def reactivate_result_blocks(self, blocks: list[str], *, max_index_items: int, c self.recent.extend(self.latest) self.latest = [*self.latest, *blocks] if append else blocks self.prune_recent(max_index_items=max_index_items, checkpoint=checkpoint) - return [key for key in self.blocks_by_key(blocks) if key in keys] @staticmethod def is_full_block(block: str) -> bool: @@ -1846,12 +1817,8 @@ def max_counter(cls, blocks: list[str]) -> int: StatusRunner: TypeAlias = Callable[[StatusAction], str] -class SelectionBack: - pass - - -SELECTION_BACK = SelectionBack() -SelectionResult: TypeAlias = str | None | SelectionBack +SELECTION_BACK = object() +SelectionResult: TypeAlias = str | None | object ReasoningSelector: TypeAlias = Callable[[], SelectionResult] ModelSelector: TypeAlias = Callable[[tuple[str, ...], str], SelectionResult] ProviderSelector: TypeAlias = Callable[[tuple[str, ...], str], SelectionResult] @@ -2083,12 +2050,10 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: @classmethod def _payload_from_args(cls, args: list[JsonValue]) -> Json | None: objects = [_json_object_arg(arg) for arg in args] - if len(objects) == 1 and objects[0] is not None: + if len(objects) == 1: return objects[0] - if len(objects) > 1: - files = [obj for obj in objects if obj is not None and "files" not in obj] - if len(files) == len(objects): - return {"files": files} + if len(objects) > 1 and all(obj is not None and "files" not in obj for obj in objects): + return {"files": objects} return None @classmethod @@ -2464,9 +2429,7 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: @classmethod def _payloads_from_args(cls, args: list[JsonValue]) -> list[Json] | None: objects = [_json_object_arg(arg) for arg in args] - if not objects or any(obj is None for obj in objects): - return None - return [obj for obj in objects if obj is not None] + return None if not objects or any(obj is None for obj in objects) else objects @classmethod def _parse_request(cls, session: Session, payload: Json) -> Request: @@ -2526,17 +2489,11 @@ def _matches_glob(self, path: str) -> bool: @staticmethod def _load_gitignore_patterns(cwd: str) -> list[str]: path = os.path.join(cwd, ".gitignore") - patterns = [] try: with open(path, encoding="utf-8", errors="ignore") as f: - for line in f: - pattern = line.strip() - if not pattern or pattern.startswith(("#", "!")): - continue - patterns.append(pattern.lstrip("/")) + return [pattern.lstrip("/") for line in f if (pattern := line.strip()) and not pattern.startswith(("#", "!"))] except OSError: - pass - return patterns + return [] def _is_gitignored(self, path: str) -> bool: relpath = self._relpath(path).replace(os.sep, "/") @@ -2801,14 +2758,10 @@ def _call_single(self) -> str: return self._format_result("python", [], False) rg = shutil.which("rg") - if rg: - return self._call_rg(rg) - return self._call_python() + return self._call_rg(rg) if rg else self._call_python() def call(self) -> str: - if self.requests: - return self._call_batch() - return self._call_single() + return self._call_batch() if self.requests else self._call_single() CODE_INDEX_AUTO_UPDATE_PENDING_LIMIT = 20 @@ -3053,12 +3006,9 @@ def make(cls, session: Session, args: list[JsonValue]) -> Self: target = str(args[1]).strip() if not target: raise ToolCallArgError("target cannot be empty") - if len(args) == 2: - options = {} - else: - options = _json_dict(args[2]) - if not options: - raise ToolCallArgError("options must be an object") + options = {} if len(args) == 2 else _json_dict(args[2]) + if len(args) == 3 and not options: + raise ToolCallArgError("options must be an object") limit = cls.DEFAULT_LIMIT if mode == "find": cls._validate_symbolish(target, "query") @@ -3523,7 +3473,13 @@ def call(self) -> str: if proc.returncode is None: self._kill_process_group(proc) proc.wait() - return self._interrupted_result("".join(stdout_parts), "".join(stderr_parts)) + return _format_process_result( + "BashToolResult", + -1, + "".join(stdout_parts), + "".join(stderr_parts), + details=("* interrupted: true", "* reason: user_ctrl_c"), + ) except BaseException: if proc.returncode is None: self._kill_process_group(proc) @@ -3537,28 +3493,11 @@ def call(self) -> str: stdout_text = "".join(stdout_parts) stderr_text = "".join(stderr_parts) if timed_out: - if stderr_text: - stderr_text += "\n" - return _format_process_result("BashToolResult", -1, stdout_text, stderr_text + "timeout") + return _format_process_result("BashToolResult", -1, stdout_text, stderr_text + ("\n" if stderr_text else "") + "timeout") return _format_process_result("BashToolResult", proc.returncode, stdout_text, stderr_text) except OSError as error: raise ToolCallError(str(error)) - @staticmethod - def _interrupted_result(stdout: str, stderr: str) -> str: - lines = [ - "", - "* exit_code: -1", - "* interrupted: true", - "* reason: user_ctrl_c", - ] - if stdout: - lines.extend(["", stdout.rstrip("\n"), ""]) - if stderr: - lines.extend(["", stderr.rstrip("\n"), ""]) - lines.append("") - return "\n".join(lines) - @staticmethod def _kill_process_group(proc: subprocess.Popen) -> None: try: @@ -3597,14 +3536,10 @@ def _read_stream_chunk( key.fileobj.close() return False text = data.decode("utf-8", errors="replace") - stream = "stdout" if key.data == "stdout" else "stderr" - if key.data == "stdout": - stdout_parts.append(text) - else: - stderr_parts.append(text) + (stdout_parts if key.data == "stdout" else stderr_parts).append(text) if live_output is not None: with suppress(Exception): - live_output(stream, text) + live_output(str(key.data), text) return True @@ -4929,7 +4864,6 @@ def execute( outcome = "success" output = "" error_type: type[Exception] | None = None - requires_confirmation = False requires_checks = False try: call = item if isinstance(item, ParsedToolCall) else self.parse_tool_call(item) @@ -4941,12 +4875,9 @@ def execute( tool.live_output = self.live_output requires_checks = tool.EFFECT == ToolEffect.EDIT preview_error = getattr(tool, "preview_error", None) - if callable(preview_error): - preview_error_text = str(preview_error()) - if preview_error_text: - raise ToolCallError("preview unavailable: " + preview_error_text) - requires_confirmation = tool.requires_confirmation(self.session) - if requires_confirmation: + if callable(preview_error) and (preview_error_text := str(preview_error())): + raise ToolCallError("preview unavailable: " + preview_error_text) + if tool.requires_confirmation(self.session): if self.session.settings.yolo: if on_auto_approve is not None: on_auto_approve(call, tool) @@ -4956,12 +4887,9 @@ def execute( confirmation = confirm(call, tool) if confirmation is not True: reason = " ".join(confirmation.split()) if isinstance(confirmation, str) else "" - if reason: - raise Cancellation("user refused: " + reason) - raise Cancellation("user refused") + raise Cancellation("user refused" + (": " + reason if reason else "")) output = tool.call() - exit_match = re.search(r"^\* exit_code: (-?\d+)$", output, re.MULTILINE) - if exit_match and int(exit_match.group(1)) != 0: + if (exit_match := re.search(r"^\* exit_code: (-?\d+)$", output, re.MULTILINE)) and int(exit_match.group(1)) != 0: outcome = "failure" except Cancellation as error: outcome = "failure" @@ -4973,9 +4901,7 @@ def execute( error_type = type(error) if call is None: raw = _json_dict(item) - summary = "invalid tool action" - if _json_str(raw.get("type")) == "tool" and not _json_str(raw.get("name")): - summary += ": missing required field name" + summary = "invalid tool action" + (": missing required field name" if _json_str(raw.get("type")) == "tool" and not _json_str(raw.get("name")) else "") call = ParsedToolCall(name="InvalidToolCall", intention=summary, args=[]) result_key = "" result_excerpted = False @@ -4995,11 +4921,10 @@ def execute( requires_checks=outcome == "success" and requires_checks, ) executions.append(execution) - if outcome == "failure" and error_type is not Cancellation: - self.skipped_after_failure_count = len(items) - index - 1 - self.skipped_after_failure_key = result_key or _format_tool_call_summary(call) - break - if error_type is Cancellation: + if outcome == "failure": + if error_type is not Cancellation: + self.skipped_after_failure_count = len(items) - index - 1 + self.skipped_after_failure_key = result_key or _format_tool_call_summary(call) break self.latest_executions = executions @@ -5038,7 +4963,7 @@ def _store_tool_result(self, call: ParsedToolCall, outcome: str, output: str) -> description += " - " + call.intention log_path = self._write_tool_result_log(key, output) tool_class = TOOL_REGISTRY.get(call.name) - bounded = _bound_tool_output(output, log_path=log_path, max_chars=tool_class.OUTPUT_CHARS if tool_class is not None else MAX_TOOL_OUTPUT_CHARS) + bounded = _bound_tool_output(output, max_chars=tool_class.OUTPUT_CHARS if tool_class is not None else MAX_TOOL_OUTPUT_CHARS) self.session.state.tool_result_store[key] = ToolResultItem( description=description, value=bounded.value, @@ -6583,7 +6508,7 @@ def _gate_tool_actions(self, ctx: ResponseContext, on_message: MessageCallback | return True return False - def _gate_task_state(self, ctx: ResponseContext, on_message: MessageCallback | None) -> bool: + def _gate_task_state(self, ctx: ResponseContext) -> bool: if ( not (self.blackboard.goal or self.blackboard.plan or self.blackboard.leads) and any(execution.call.name == BashTool.NAME and execution.outcome == "success" for execution in self.tool_runner.latest_executions) @@ -6633,7 +6558,7 @@ def _emit_state_and_text(self, ctx: ResponseContext, on_message: MessageCallback if on_message is not None and ctx.assistant_text and ctx.actions and not ctx.completion_message: on_message(ctx.assistant_text) - def _gate_after_apply(self, ctx: ResponseContext, on_message: MessageCallback | None) -> AgentRunResult | None: + def _gate_after_apply(self, ctx: ResponseContext) -> AgentRunResult | None: if ctx.plan_was_empty and not self.blackboard.plan and (ctx.pending_check_requested or ctx.has_edit_tool_call): self._warn_agent("mutating work before Plan was set.", self.RULE_GOAL_PLAN_FIRST) if ( @@ -6831,7 +6756,7 @@ def handle_response( ctx = self._build_response_context(response) feedback_checkpoint = len(self.agent_feedback_errors) DebugTrace.handle_event(self, "handle-start", ctx, response) - if self._gate_protocol_actions(ctx, on_message) or self._gate_tool_actions(ctx, on_message) or self._gate_task_state(ctx, on_message): + if self._gate_protocol_actions(ctx, on_message) or self._gate_tool_actions(ctx, on_message) or self._gate_task_state(ctx): DebugTrace.handle_event(self, "handle-gated-before-apply", ctx, response) return AgentRunResult() @@ -6853,7 +6778,7 @@ def handle_response( DebugTrace.handle_event(self, "handle-user-rule", ctx, response) return AgentRunResult(done=True, value=response) - gate_result = self._gate_after_apply(ctx, on_message) + gate_result = self._gate_after_apply(ctx) if gate_result is not None: DebugTrace.handle_event(self, "handle-gated-after-apply", ctx, response, result=gate_result) return gate_result @@ -7105,7 +7030,7 @@ def _fetch_remote_models(self, provider: ProviderConfig) -> tuple[str, ...]: ids.append(model_id) return tuple(dict.fromkeys(sorted(ids))) - def _set_model(self, model: str, *, back_to_model: bool = False) -> str | SelectionBack: + def _set_model(self, model: str, *, back_to_model: bool = False) -> SelectionResult: message = "Set provider.model = " + model choice = self.select_reasoning() if self.select_reasoning is not None else None if choice is SELECTION_BACK: @@ -8164,14 +8089,11 @@ def _select_choice( return selected continue - lines = [] - index = 1 - for choice in visible_choices: - if choice in disabled: - lines.append(" " + labels.get(choice, choice)) - continue - lines.append(" " + str(index) + ". " + labels.get(choice, choice)) - index += 1 + enabled_index = {choice: str(index) for index, choice in enumerate(enabled_choices, start=1)} + lines = [ + " " + (labels.get(choice, choice) if choice in disabled else enabled_index[choice] + ". " + labels.get(choice, choice)) + for choice in visible_choices + ] self._emit(title + ((" /" + query) if query else "") + ":\n" + "\n".join(lines)) prompt = "Select " + title.lower() + " [1-" + str(len(enabled_choices)) + "] or /keyword " try: @@ -8647,8 +8569,8 @@ def _make_unified_diff(old_content: str, new_content: str, filepath: str) -> str ) -def _format_process_result(tag: str, exit_code: int, stdout: str, stderr: str) -> str: - lines = [f"<{tag}>", f"* exit_code: {exit_code}"] +def _format_process_result(tag: str, exit_code: int, stdout: str, stderr: str, *, details: tuple[str, ...] = ()) -> str: + lines = [f"<{tag}>", f"* exit_code: {exit_code}", *details] if stdout: lines.extend(["", stdout.rstrip("\n"), ""]) if stderr: @@ -8666,11 +8588,7 @@ def _json_list(value: JsonValue) -> list[JsonValue]: def _json_str(value: JsonValue) -> str | None: - if isinstance(value, str): - return value - if value is None: - return None - return str(value) + return value if isinstance(value, str) else None if value is None else str(value) def _json_object_arg(value: JsonValue) -> Json | None: @@ -8708,48 +8626,34 @@ def __init__(self, providers: Iterable[str] | Callable[[], Iterable[str]] = (), def _values(self, values: Iterable[str] | Callable[[], Iterable[str]]) -> Iterable[str]: return values() if callable(values) else values + @staticmethod + def _complete_values(values: Iterable[str], text: str) -> Iterator[Completion]: + for value in values: + if value.startswith(text): + yield Completion(value, start_position=-len(text)) + def get_completions(self, document, complete_event): text = document.text_before_cursor if text.startswith("/set "): text = text[len("/set ") :] if " " not in text: - for key in CONFIG_SET_KEYS: - if key.startswith(text): - yield Completion(key, start_position=-len(text)) + yield from self._complete_values(CONFIG_SET_KEYS, text) return key, _, value_prefix = text.partition(" ") - for value in CONFIG_VALUE_COMPLETIONS.get(key, ()): - if value.startswith(value_prefix): - yield Completion(value, start_position=-len(value_prefix)) - return - if text.startswith("/provider "): - text = text[len("/provider ") :] - for provider in self._values(self.providers): - if provider.startswith(text): - yield Completion(provider, start_position=-len(text)) - return - if text.startswith("/model "): - text = text[len("/model ") :] - for model in self._values(self.models): - if model.startswith(text): - yield Completion(model, start_position=-len(text)) - return - if text.startswith("/api "): - text = text[len("/api ") :] - for value in ("auto", "chat", "responses"): - if value.startswith(text): - yield Completion(value, start_position=-len(text)) - return - if text.startswith("/reason-payload "): - text = text[len("/reason-payload ") :] - for value in CHAT_REASONING_CHOICES: - if value.startswith(text): - yield Completion(value, start_position=-len(text)) + yield from self._complete_values(CONFIG_VALUE_COMPLETIONS.get(key, ()), value_prefix) return + for prefix, values in ( + ("/provider ", self._values(self.providers)), + ("/model ", self._values(self.models)), + ("/api ", ("auto", "chat", "responses")), + ("/reason-payload ", CHAT_REASONING_CHOICES), + ): + if text.startswith(prefix): + yield from self._complete_values(values, text[len(prefix) :]) + return if text.startswith("/") and " " not in text: - for spec in COMMANDS: - if spec.name.startswith(text): - yield Completion(spec.name, start_position=-len(text)) + yield from (Completion(spec.name, start_position=-len(text)) for spec in COMMANDS if spec.name.startswith(text)) + return class CommandLexer(Lexer): From b339a902db0f5f200f27b621bebaef5434edffc3 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 19:41:14 -0700 Subject: [PATCH 27/29] Optimize Recall handling and response brevity --- nanocode.py | 181 +++++++++++++++++++++++++--- tests/test_nanocode_agent.py | 38 ++++++ tests/test_nanocode_context_tool.py | 24 ++++ 3 files changed, 228 insertions(+), 15 deletions(-) diff --git a/nanocode.py b/nanocode.py index 012d2a5..816e462 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1386,6 +1386,9 @@ def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = N seen.add(key) return blocks + def active_raw_keys(self, checkpoint: int) -> set[str]: + return set(self.blocks_by_key(self.unreduced_recent_blocks(checkpoint) + self.latest_raw_blocks())) + def _needs_reduction(self, block: str, checkpoint: int) -> bool: key = self.result_key(block) return self.is_full_block(block) and (self.result_counter(block) > checkpoint or key in self.reactivated_keys) @@ -1956,6 +1959,7 @@ class ReadTool(Tool): EFFECT: ClassVar[ToolEffect] = ToolEffect.READONLY DESCRIPTION: ClassVar[tuple[str, ...]] = ( "Read one or more UTF-8 files with line:hash anchors.", + "Read accepts file paths, not tr.N result keys; use Recall for stored tool results.", "Pass one structured object. Use path for one file, files for multiple files, or multiple file objects as args.", "Each file can omit range for the first 600 lines, pass range=[start,end], or ranges=[[start,end],...].", ) @@ -3610,6 +3614,12 @@ def call(self) -> str: return _format_process_result("GitToolResult", -1, error.stdout or "", (error.stderr or "") + "timeout") +@dataclass +class RecallRequest: + key: str + ranges: tuple[tuple[int, int], ...] = () + + @dataclass class ToolResultTool(Tool): NAME: ClassVar[str] = "Recall" @@ -3618,47 +3628,154 @@ class ToolResultTool(Tool): "Retrieve stored tool results by tr.N key.", "Use when output was truncated, compacted, or no longer visible.", "Optional 0-based ranges read exact slices from the stored full log.", + "Do not Recall keys already visible in Discovery Context, File Context, Unreduced Tool Results, or Latest Tool Results.", + "Recall takes result keys only; Read takes file paths and never tr.N keys.", "Returns result metadata plus content.", ) - SIGNATURE: ClassVar[str] = "Recall(key[, key...][, range...]) -> RecallToolResult" + SIGNATURE: ClassVar[str] = "Recall(key[, key...][, range...]) or Recall({key, range?|ranges?}) -> RecallToolResult" EXAMPLE: ClassVar[tuple[str, ...]] = ( 'Example args: ["tr.1"]', 'Example args: ["tr.1", "tr.2"]', 'Example args: ["tr.1", "0,120"]', + 'Example args: [{"key":"tr.1","range":[0,120]}]', ) REQUIRES_CONFIRMATION: ClassVar[bool | None] = False - keys: list[str] + requests: list[RecallRequest] results: dict[str, ToolResultItem] cwd: str = "" - ranges: list[tuple[int, int]] = field(default_factory=list) @classmethod - def make(cls, session: Session, args: list[str]) -> Self: - keys = [arg for arg in args if not re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg)] - ranges = [_parse_line_range_token(arg) for arg in args if re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg)] - return cls(keys=keys, results=session.state.tool_result_store, cwd=session.cwd, ranges=ranges) + def cli_args(cls, args: list[JsonValue]) -> list[str]: + try: + requests = cls.requests_from_args(args) + except ToolCallArgError: + return super().cli_args(args) + tokens = [token for request in requests for token in cls._request_cli_tokens(request)] + return tokens or super().cli_args(args) + + @classmethod + def tool_schema(cls) -> Json: + range_schema: Json = { + "anyOf": [ + {"type": "array", "items": {"type": "integer"}, "minItems": 2, "maxItems": 2}, + {"type": "string", "description": "Comma range token such as 0,120."}, + ] + } + recall_arg_schema = { + "anyOf": [ + {"type": "string", "description": "A tr.N result key, or a legacy range token applying to all keys."}, + _tool_object_schema( + { + "key": {"type": "string", "description": "A tr.N result key."}, + "result_key": {"type": "string", "description": "Alias for key."}, + "keys": {"type": "array", "items": {"type": "string"}, "description": "Multiple tr.N result keys."}, + "range": range_schema, + "ranges": {"type": "array", "items": range_schema}, + }, + [], + ), + ] + } + return _function_tool_schema( + cls.NAME, + cls.schema_description(), + _tool_object_schema( + { + "intention": {"type": "string", "description": "Question being answered or concrete outcome needed."}, + "args": {"type": "array", "items": recall_arg_schema, "minItems": 1}, + }, + ["intention", "args"], + ), + ) + + @classmethod + def make(cls, session: Session, args: list[JsonValue]) -> Self: + return cls(requests=cls.requests_from_args(args), results=session.state.tool_result_store, cwd=session.cwd) + + @classmethod + def requests_from_args(cls, args: list[JsonValue]) -> list[RecallRequest]: + requests: list[RecallRequest] = [] + common_ranges: list[tuple[int, int]] = [] + for arg in args: + if isinstance(arg, str) and re.fullmatch(r"\s*\d+\s*[-:,]\s*\d+\s*", arg): + common_ranges.append(_parse_line_range_token(arg)) + elif (payload := _json_object_arg(arg)) is not None: + requests.extend(cls._requests_from_payload(payload)) + else: + key = (_json_str(arg) or "").strip() + if key: + requests.append(RecallRequest(key)) + if common_ranges: + common = tuple(common_ranges) + requests = [request if request.ranges else RecallRequest(request.key, common) for request in requests] + return cls._dedupe_requests(requests) + + @classmethod + def request_keys_from_args(cls, args: list[JsonValue]) -> list[str]: + return [request.key for request in cls.requests_from_args(args)] + + @classmethod + def _requests_from_payload(cls, payload: Json) -> list[RecallRequest]: + unexpected = sorted(set(payload) - {"key", "result_key", "keys", "range", "ranges"}) + if unexpected: + raise ToolCallArgError("unexpected Recall option: " + ", ".join(unexpected)) + keys = [_json_str(payload.get(name)) or "" for name in ("key", "result_key")] + keys.extend(_json_str(item) or "" for item in _json_list(payload.get("keys"))) + keys = [key.strip() for key in keys if key and key.strip()] + if not keys: + raise ToolCallArgError("Recall requires key, result_key, or keys") + ranges: list[tuple[int, int]] = [] + if "range" in payload: + ranges.append(cls._parse_range(payload["range"], label="range")) + if "ranges" in payload: + raw_ranges = _json_list(payload.get("ranges")) + if not raw_ranges: + raise ToolCallArgError("ranges must be a non-empty array") + ranges.extend(cls._parse_range(item, label="ranges item") for item in raw_ranges) + return [RecallRequest(key, tuple(ranges)) for key in keys] + + @staticmethod + def _parse_range(value: JsonValue, *, label: str) -> tuple[int, int]: + if isinstance(value, str): + return _parse_line_range_token(value) + return _parse_structured_line_range(value, label=label) + + @staticmethod + def _dedupe_requests(requests: list[RecallRequest]) -> list[RecallRequest]: + seen: set[tuple[str, tuple[tuple[int, int], ...]]] = set() + unique = [] + for request in requests: + key = (request.key, request.ranges) + if key not in seen: + seen.add(key) + unique.append(request) + return unique + + @staticmethod + def _request_cli_tokens(request: RecallRequest) -> list[str]: + return [request.key, *(str(start) + ":" + str(end) for start, end in request.ranges)] def preview(self) -> str: - ranges = [str(start) + ":" + str(end) for start, end in self.ranges] - return "Recall " + ", ".join([*self.keys, *ranges]) + return "Recall " + ", ".join(token for request in self.requests for token in self._request_cli_tokens(request)) def call(self) -> str: - if not self.keys: + if not self.requests: raise ToolCallArgError("Recall requires at least one key") lines = ["RecallToolResult:"] - for key in self.keys: + for request in self.requests: + key = request.key if key not in self.results: lines.append("- result_key: " + key) lines.append(" status: missing") continue item = self.results[key] - lines.append(item.format(result_key=key, include_content=True, content=self._content(item))) + lines.append(item.format(result_key=key, include_content=True, content=self._content(item, request.ranges))) result = "\n".join(lines) return _bound_tool_output(result).value - def _content(self, item: ToolResultItem) -> str: - if not self.ranges: + def _content(self, item: ToolResultItem, ranges: tuple[tuple[int, int], ...]) -> str: + if not ranges: return item.value path = item.log_path if path and not os.path.isabs(path): @@ -3669,7 +3786,7 @@ def _content(self, item: ToolResultItem) -> str: except OSError: return item.value chunks = [] - for start, end in self.ranges: + for start, end in ranges: if end <= start: continue chunks.append("\n".join(lines[start:end])) @@ -3852,6 +3969,7 @@ def _state_tool_schema(name: str) -> Json: - Use ordered calls for clear edit-then-check flows. - If a tool fails, diagnose the failure before retrying. - Do not repeatedly run the same failing command without a new hypothesis or change. +- Do not Recall a tr.N key already visible in Tool Context; use that content or Read concrete file paths for new evidence. - Prefer targeted checks first; run broader checks only when useful or requested. - For long or expensive checks, run the narrowest command that can verify the change. @@ -3868,6 +3986,9 @@ def _state_tool_schema(name: str) -> Json: Response: - Reply in the language of the latest user input unless asked otherwise. - Keep output plain, concise, and literal-preserving. +- Default assistant replies are 1-5 short lines; use sections only when they reduce ambiguity. +- For completed tasks, include only Goal, status, key summary, and failed/blocked checks if any. +- Do not list every touched file, command, or implementation detail unless the user asks. - Plain text by default. """ @@ -3924,6 +4045,7 @@ def _state_tool_schema(name: str) -> Json: If Pending User Feedback is not empty, answer it briefly first. Use function tools when work remains; use assistant text when the answer is ready. Use visible File Context line anchors before Read; Read only missing ranges or after file changes. +Keep final/chat replies short; avoid process narration and exhaustive summaries unless requested. REPLY IN THE LANGUAGE OF LATEST USER REQUEST. YOUR OUTPUT: @@ -6132,6 +6254,10 @@ def execute_tool_calls( on_auto_approve: ToolDisplayCallback | None = None, append_to_latest: bool = False, ) -> str: + tool_calls = self._filter_redundant_recall_calls(tool_calls) + if not tool_calls: + self.tool_runner.latest_executions = [] + return "\n\n".join(self.tool_context.latest) self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) regular_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name not in CONTEXT_TOOL_NAMES] if regular_executions: @@ -6151,6 +6277,31 @@ def execute_tool_calls( self._after_tool_execution(execution) return "\n\n".join(self.tool_context.latest) + def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> list[JsonValue]: + active_keys = self.tool_context.active_raw_keys(self.blackboard.memory_checkpoint_tool_result_counter) + if not active_keys: + return tool_calls + filtered: list[JsonValue] = [] + skipped: list[str] = [] + for item in tool_calls: + try: + call = self.tool_runner.parse_tool_call(item) + requests = ToolResultTool.requests_from_args(call.args) if call.name == ToolResultTool.NAME else [] + except ToolCallArgError: + filtered.append(item) + continue + if call.name != ToolResultTool.NAME or not requests or any(request.ranges for request in requests): + filtered.append(item) + continue + needed = [request.key for request in requests if request.key not in active_keys] + skipped.extend(request.key for request in requests if request.key in active_keys) + if needed: + filtered.append(ParsedToolCall(name=call.name, intention=call.intention, args=needed)) + if skipped: + keys = ", ".join(dict.fromkeys(skipped)) + self._remember_agent_error("Recall skipped for already-visible result key(s): " + keys + ". Use visible Tool Context content; Read concrete files for new evidence.") + return filtered + def _apply_context_tool_executions( self, executions: list[ToolCallExecution], diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index e8eec54..741e3f5 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -291,6 +291,44 @@ def test_agent_merges_adjacent_recall_calls(tmp_path): assert agent.tool_runner.latest_executions[0].call.args == ["tr.1", "tr.2"] +def test_agent_skips_redundant_visible_recall(tmp_path): + session = Session(cwd=str(tmp_path)) + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem(description="success Search alpha", value="alpha") + agent = Agent(session) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall once", "args": ["tr.1"]}]) + + latest = agent.execute_tool_calls([{"name": "Recall", "intention": "recall again", "args": ["tr.1"]}]) + + assert latest.count("key=tr.1") == 1 + assert agent.tool_runner.latest_executions == [] + assert any("already-visible" in error for error in agent.agent_feedback_errors) + + +def test_agent_prunes_visible_keys_from_mixed_recall(tmp_path): + session = Session(cwd=str(tmp_path)) + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem(description="success Search alpha", value="alpha") + session.state.tool_result_store["tr.2"] = nanocode.ToolResultItem(description="success Search beta", value="beta") + agent = Agent(session) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall first", "args": ["tr.1"]}]) + + agent.execute_tool_calls([{"name": "Recall", "intention": "recall both", "args": ["tr.1", "tr.2"]}]) + + assert len(agent.tool_runner.latest_executions) == 1 + assert agent.tool_runner.latest_executions[0].call.args == ["tr.2"] + + +def test_agent_allows_visible_recall_with_range(tmp_path): + session = Session(cwd=str(tmp_path)) + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem(description="success Search alpha", value="alpha\nbeta") + agent = Agent(session) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall once", "args": ["tr.1"]}]) + + agent.execute_tool_calls([{"name": "Recall", "intention": "recall range", "args": ["tr.1", "0,1"]}]) + + assert len(agent.tool_runner.latest_executions) == 1 + assert agent.tool_runner.latest_executions[0].call.name == "Recall" + + def test_agent_does_not_dedupe_same_batch_edit_tool_calls(tmp_path): path = tmp_path / "sample.txt" path.write_text("old\n", encoding="utf-8") diff --git a/tests/test_nanocode_context_tool.py b/tests/test_nanocode_context_tool.py index 6a17dfd..4e52e65 100644 --- a/tests/test_nanocode_context_tool.py +++ b/tests/test_nanocode_context_tool.py @@ -63,6 +63,27 @@ def test_tool_result_tool_reads_internal_log_ranges_without_exposing_path(tmp_pa assert "log:" not in result +def test_tool_result_tool_accepts_structured_key_ranges(tmp_path): + session = Session(cwd=str(tmp_path)) + log_path = tmp_path / ".nanocode" / "sessions" / "test-session" / "tool_results" / "sample.log" + log_path.parent.mkdir(parents=True) + log_path.write_text("zero\none\ntwo\nthree\n", encoding="utf-8") + session.state.tool_result_store["tr.1"] = ToolResultItem( + description="Search sample.", + value="[tool result excerpt]", + log_path=os.path.relpath(log_path, tmp_path), + original_lines=4, + original_chars=19, + excerpted=True, + ) + + result = ToolResultTool.make(session, [{"key": "tr.1", "range": [1, 3]}]).call() + + assert "one\ntwo" in result + assert "zero" not in result + assert ToolResultTool.cli_args([{"key": "tr.1", "range": [1, 3]}]) == ["tr.1", "1:3"] + + def test_tool_result_item_format_hides_log_path(): item = ToolResultItem(description="Read sample.", value="line", excerpted=True) @@ -98,3 +119,6 @@ def test_tool_result_invalid_args(tmp_path): with pytest.raises(ToolCallError, match="Recall requires"): ToolResultTool.make(session, []).call() + + with pytest.raises(ToolCallError, match="Recall requires key"): + ToolResultTool.make(session, [{}]).call() From 11a512325c2d738f1b5ed91ec60fe0c027018ab4 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 20:04:44 -0700 Subject: [PATCH 28/29] Stop repeated ranged Recall loops --- nanocode.py | 105 ++++++++++++++++++++++++++++++----- tests/test_nanocode_agent.py | 29 +++++++++- 2 files changed, 118 insertions(+), 16 deletions(-) diff --git a/nanocode.py b/nanocode.py index 816e462..12a6d86 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1325,6 +1325,7 @@ class ToolResultContext: latest: list[str] = field(default_factory=list) recent: list[str] = field(default_factory=list) reactivated_keys: set[str] = field(default_factory=set) + reactivated_recall_fingerprints: set[str] = field(default_factory=set) def append_latest(self, executions: list[ToolCallExecution], *, max_index_items: int, checkpoint: int, append: bool = False) -> None: if not executions: @@ -1349,6 +1350,7 @@ def compact_observed(self, observed_blocks: list[str]) -> None: if not observed: return self.reactivated_keys.difference_update(observed_keys) + self.drop_recall_fingerprints(observed_keys) self.recent = [self.compact_block(block) if self.is_full_block(block) and self.result_counter(block) in observed else block for block in self.recent] self.latest = [self.compact_block(block) if self.is_full_block(block) and self.result_counter(block) in observed else block for block in self.latest] @@ -1389,6 +1391,23 @@ def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = N def active_raw_keys(self, checkpoint: int) -> set[str]: return set(self.blocks_by_key(self.unreduced_recent_blocks(checkpoint) + self.latest_raw_blocks())) + def active_recall_fingerprints(self, checkpoint: int) -> set[str]: + active_keys = self.active_raw_keys(checkpoint) + return {fingerprint for fingerprint in self.reactivated_recall_fingerprints if self.recall_fingerprint_key(fingerprint) in active_keys} + + def replace_recall_fingerprints(self, keys: set[str], fingerprints: set[str]) -> None: + self.drop_recall_fingerprints(keys) + self.reactivated_recall_fingerprints.update(fingerprints) + + def drop_recall_fingerprints(self, keys: set[str]) -> None: + self.reactivated_recall_fingerprints = { + fingerprint for fingerprint in self.reactivated_recall_fingerprints if self.recall_fingerprint_key(fingerprint) not in keys + } + + @staticmethod + def recall_fingerprint_key(fingerprint: str) -> str: + return fingerprint.split("|", 1)[0] + def _needs_reduction(self, block: str, checkpoint: int) -> bool: key = self.result_key(block) return self.is_full_block(block) and (self.result_counter(block) > checkpoint or key in self.reactivated_keys) @@ -3715,6 +3734,23 @@ def requests_from_args(cls, args: list[JsonValue]) -> list[RecallRequest]: def request_keys_from_args(cls, args: list[JsonValue]) -> list[str]: return [request.key for request in cls.requests_from_args(args)] + @classmethod + def args_from_requests(cls, requests: list[RecallRequest]) -> list[JsonValue]: + args: list[JsonValue] = [] + for request in requests: + if not request.ranges: + args.append(request.key) + elif len(request.ranges) == 1: + start, end = request.ranges[0] + args.append({"key": request.key, "range": [start, end]}) + else: + args.append({"key": request.key, "ranges": [[start, end] for start, end in request.ranges]}) + return args + + @staticmethod + def request_fingerprint(request: RecallRequest) -> str: + return request.key + "|" + ",".join(str(start) + ":" + str(end) for start, end in request.ranges) + @classmethod def _requests_from_payload(cls, payload: Json) -> list[RecallRequest]: unexpected = sorted(set(payload) - {"key", "result_key", "keys", "range", "ranges"}) @@ -6254,11 +6290,16 @@ def execute_tool_calls( on_auto_approve: ToolDisplayCallback | None = None, append_to_latest: bool = False, ) -> str: - tool_calls = self._filter_redundant_recall_calls(tool_calls) + tool_calls, skipped_recall_executions = self._filter_redundant_recall_calls(tool_calls) if not tool_calls: - self.tool_runner.latest_executions = [] + self.tool_runner.latest_executions = skipped_recall_executions + self.session.state.turn_tool_calls += len(skipped_recall_executions) + self.session.state.session_tool_calls += len(skipped_recall_executions) + for execution in skipped_recall_executions: + self._after_tool_execution(execution) return "\n\n".join(self.tool_context.latest) self.tool_runner.execute(tool_calls, confirm=confirm, on_auto_approve=on_auto_approve) + self.tool_runner.latest_executions = [*skipped_recall_executions, *self.tool_runner.latest_executions] regular_executions = [execution for execution in self.tool_runner.latest_executions if execution.call.name not in CONTEXT_TOOL_NAMES] if regular_executions: self.tool_context.append_latest( @@ -6277,12 +6318,15 @@ def execute_tool_calls( self._after_tool_execution(execution) return "\n\n".join(self.tool_context.latest) - def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> list[JsonValue]: - active_keys = self.tool_context.active_raw_keys(self.blackboard.memory_checkpoint_tool_result_counter) - if not active_keys: - return tool_calls + def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> tuple[list[JsonValue], list[ToolCallExecution]]: + checkpoint = self.blackboard.memory_checkpoint_tool_result_counter + active_keys = self.tool_context.active_raw_keys(checkpoint) + active_fingerprints = self.tool_context.active_recall_fingerprints(checkpoint) + if not active_keys and not active_fingerprints: + return tool_calls, [] filtered: list[JsonValue] = [] - skipped: list[str] = [] + skipped_executions: list[ToolCallExecution] = [] + skipped_labels: list[str] = [] for item in tool_calls: try: call = self.tool_runner.parse_tool_call(item) @@ -6290,17 +6334,44 @@ def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> list[Js except ToolCallArgError: filtered.append(item) continue - if call.name != ToolResultTool.NAME or not requests or any(request.ranges for request in requests): + if call.name != ToolResultTool.NAME or not requests: filtered.append(item) continue - needed = [request.key for request in requests if request.key not in active_keys] - skipped.extend(request.key for request in requests if request.key in active_keys) + needed, skipped = self._split_recall_requests(requests, active_keys, active_fingerprints) + skipped_labels.extend(self._recall_request_label(request) for request in skipped) if needed: - filtered.append(ParsedToolCall(name=call.name, intention=call.intention, args=needed)) - if skipped: - keys = ", ".join(dict.fromkeys(skipped)) - self._remember_agent_error("Recall skipped for already-visible result key(s): " + keys + ". Use visible Tool Context content; Read concrete files for new evidence.") - return filtered + filtered.append(ParsedToolCall(name=call.name, intention=call.intention, args=ToolResultTool.args_from_requests(needed))) + elif skipped: + skipped_executions.append( + ToolCallExecution( + call=call, + outcome="failure", + output="ToolCallError: redundant Recall skipped because the requested result content is already visible in Tool Context.", + error_type=ToolCallError, + ) + ) + if skipped_labels: + keys = ", ".join(dict.fromkeys(skipped_labels)) + self._remember_agent_error("Recall skipped for already-visible result content: " + keys + ". Use visible Tool Context content; Read concrete files for new evidence.") + return filtered, skipped_executions + + @staticmethod + def _split_recall_requests( + requests: list[RecallRequest], + active_keys: set[str], + active_fingerprints: set[str], + ) -> tuple[list[RecallRequest], list[RecallRequest]]: + needed: list[RecallRequest] = [] + skipped: list[RecallRequest] = [] + for request in requests: + redundant = ToolResultTool.request_fingerprint(request) in active_fingerprints if request.ranges else request.key in active_keys + (skipped if redundant else needed).append(request) + return needed, skipped + + @staticmethod + def _recall_request_label(request: RecallRequest) -> str: + ranges = " ".join(str(start) + ":" + str(end) for start, end in request.ranges) + return request.key + ((" " + ranges) if ranges else "") def _apply_context_tool_executions( self, @@ -6313,12 +6384,16 @@ def _apply_context_tool_executions( continue if execution.call.name == ToolResultTool.NAME: blocks = ToolResultContext.recalled_result_blocks(ToolResultContext.format_execution(execution)) + block_keys = set(ToolResultContext.blocks_by_key(blocks)) self.tool_context.reactivate_result_blocks( blocks, max_index_items=self.context_budget().index_items, checkpoint=self.blackboard.memory_checkpoint_tool_result_counter, append=append_to_latest or bool(self.tool_context.latest), ) + requests = [request for request in ToolResultTool.requests_from_args(execution.call.args) if request.key in block_keys] + fingerprints = {ToolResultTool.request_fingerprint(request) for request in requests if request.ranges} + self.tool_context.replace_recall_fingerprints({request.key for request in requests}, fingerprints) def _unreferenced_unreduced_blocks(self) -> list[str]: return self.tool_context.unreduced_blocks( diff --git a/tests/test_nanocode_agent.py b/tests/test_nanocode_agent.py index 741e3f5..6dc22b6 100644 --- a/tests/test_nanocode_agent.py +++ b/tests/test_nanocode_agent.py @@ -300,7 +300,8 @@ def test_agent_skips_redundant_visible_recall(tmp_path): latest = agent.execute_tool_calls([{"name": "Recall", "intention": "recall again", "args": ["tr.1"]}]) assert latest.count("key=tr.1") == 1 - assert agent.tool_runner.latest_executions == [] + assert len(agent.tool_runner.latest_executions) == 1 + assert agent.tool_runner.latest_executions[0].outcome == "failure" assert any("already-visible" in error for error in agent.agent_feedback_errors) @@ -329,6 +330,32 @@ def test_agent_allows_visible_recall_with_range(tmp_path): assert agent.tool_runner.latest_executions[0].call.name == "Recall" +def test_agent_skips_repeated_recall_with_same_range(tmp_path): + session = Session(cwd=str(tmp_path)) + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem(description="success Search alpha", value="alpha\nbeta") + agent = Agent(session) + recall_range = [{"key": "tr.1", "range": [0, 1]}] + + agent.execute_tool_calls([{"name": "Recall", "intention": "recall range", "args": recall_range}]) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall range again", "args": recall_range}]) + + assert len(agent.tool_runner.latest_executions) == 1 + assert agent.tool_runner.latest_executions[0].outcome == "failure" + assert any("tr.1 0:1" in error for error in agent.agent_feedback_errors) + + +def test_agent_allows_recall_with_different_range(tmp_path): + session = Session(cwd=str(tmp_path)) + session.state.tool_result_store["tr.1"] = nanocode.ToolResultItem(description="success Search alpha", value="alpha\nbeta") + agent = Agent(session) + agent.execute_tool_calls([{"name": "Recall", "intention": "recall first range", "args": [{"key": "tr.1", "range": [0, 1]}]}]) + + agent.execute_tool_calls([{"name": "Recall", "intention": "recall second range", "args": [{"key": "tr.1", "range": [1, 2]}]}]) + + assert len(agent.tool_runner.latest_executions) == 1 + assert agent.tool_runner.latest_executions[0].outcome == "success" + + def test_agent_does_not_dedupe_same_batch_edit_tool_calls(tmp_path): path = tmp_path / "sample.txt" path.write_text("old\n", encoding="utf-8") From 413a4af9c0455567071b1d5420877e8e4f19c1a9 Mon Sep 17 00:00:00 2001 From: hit9 Date: Sun, 24 May 2026 23:23:39 -0700 Subject: [PATCH 29/29] Simplify Recall context implementation --- nanocode.py | 97 ++++++++++++++++++----------------------------------- 1 file changed, 32 insertions(+), 65 deletions(-) diff --git a/nanocode.py b/nanocode.py index 12a6d86..fd83b11 100644 --- a/nanocode.py +++ b/nanocode.py @@ -1391,23 +1391,11 @@ def unreduced_blocks(self, checkpoint: int, *, exclude_keys: set[str] | None = N def active_raw_keys(self, checkpoint: int) -> set[str]: return set(self.blocks_by_key(self.unreduced_recent_blocks(checkpoint) + self.latest_raw_blocks())) - def active_recall_fingerprints(self, checkpoint: int) -> set[str]: - active_keys = self.active_raw_keys(checkpoint) - return {fingerprint for fingerprint in self.reactivated_recall_fingerprints if self.recall_fingerprint_key(fingerprint) in active_keys} - - def replace_recall_fingerprints(self, keys: set[str], fingerprints: set[str]) -> None: - self.drop_recall_fingerprints(keys) - self.reactivated_recall_fingerprints.update(fingerprints) - def drop_recall_fingerprints(self, keys: set[str]) -> None: self.reactivated_recall_fingerprints = { - fingerprint for fingerprint in self.reactivated_recall_fingerprints if self.recall_fingerprint_key(fingerprint) not in keys + fingerprint for fingerprint in self.reactivated_recall_fingerprints if fingerprint.partition("|")[0] not in keys } - @staticmethod - def recall_fingerprint_key(fingerprint: str) -> str: - return fingerprint.split("|", 1)[0] - def _needs_reduction(self, block: str, checkpoint: int) -> bool: key = self.result_key(block) return self.is_full_block(block) and (self.result_counter(block) > checkpoint or key in self.reactivated_keys) @@ -3638,6 +3626,23 @@ class RecallRequest: key: str ranges: tuple[tuple[int, int], ...] = () + def cli_tokens(self) -> list[str]: + return [self.key, *(str(start) + ":" + str(end) for start, end in self.ranges)] + + def label(self) -> str: + return " ".join(self.cli_tokens()) + + def fingerprint(self) -> str: + return self.key + "|" + ",".join(str(start) + ":" + str(end) for start, end in self.ranges) + + def to_arg(self) -> JsonValue: + if not self.ranges: + return self.key + if len(self.ranges) == 1: + start, end = self.ranges[0] + return {"key": self.key, "range": [start, end]} + return {"key": self.key, "ranges": [[start, end] for start, end in self.ranges]} + @dataclass class ToolResultTool(Tool): @@ -3670,7 +3675,7 @@ def cli_args(cls, args: list[JsonValue]) -> list[str]: requests = cls.requests_from_args(args) except ToolCallArgError: return super().cli_args(args) - tokens = [token for request in requests for token in cls._request_cli_tokens(request)] + tokens = [token for request in requests for token in request.cli_tokens()] return tokens or super().cli_args(args) @classmethod @@ -3730,27 +3735,6 @@ def requests_from_args(cls, args: list[JsonValue]) -> list[RecallRequest]: requests = [request if request.ranges else RecallRequest(request.key, common) for request in requests] return cls._dedupe_requests(requests) - @classmethod - def request_keys_from_args(cls, args: list[JsonValue]) -> list[str]: - return [request.key for request in cls.requests_from_args(args)] - - @classmethod - def args_from_requests(cls, requests: list[RecallRequest]) -> list[JsonValue]: - args: list[JsonValue] = [] - for request in requests: - if not request.ranges: - args.append(request.key) - elif len(request.ranges) == 1: - start, end = request.ranges[0] - args.append({"key": request.key, "range": [start, end]}) - else: - args.append({"key": request.key, "ranges": [[start, end] for start, end in request.ranges]}) - return args - - @staticmethod - def request_fingerprint(request: RecallRequest) -> str: - return request.key + "|" + ",".join(str(start) + ":" + str(end) for start, end in request.ranges) - @classmethod def _requests_from_payload(cls, payload: Json) -> list[RecallRequest]: unexpected = sorted(set(payload) - {"key", "result_key", "keys", "range", "ranges"}) @@ -3788,12 +3772,8 @@ def _dedupe_requests(requests: list[RecallRequest]) -> list[RecallRequest]: unique.append(request) return unique - @staticmethod - def _request_cli_tokens(request: RecallRequest) -> list[str]: - return [request.key, *(str(start) + ":" + str(end) for start, end in request.ranges)] - def preview(self) -> str: - return "Recall " + ", ".join(token for request in self.requests for token in self._request_cli_tokens(request)) + return "Recall " + ", ".join(token for request in self.requests for token in request.cli_tokens()) def call(self) -> str: if not self.requests: @@ -6321,9 +6301,9 @@ def execute_tool_calls( def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> tuple[list[JsonValue], list[ToolCallExecution]]: checkpoint = self.blackboard.memory_checkpoint_tool_result_counter active_keys = self.tool_context.active_raw_keys(checkpoint) - active_fingerprints = self.tool_context.active_recall_fingerprints(checkpoint) - if not active_keys and not active_fingerprints: + if not active_keys: return tool_calls, [] + active_fingerprints = {fingerprint for fingerprint in self.tool_context.reactivated_recall_fingerprints if fingerprint.partition("|")[0] in active_keys} filtered: list[JsonValue] = [] skipped_executions: list[ToolCallExecution] = [] skipped_labels: list[str] = [] @@ -6337,10 +6317,14 @@ def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> tuple[l if call.name != ToolResultTool.NAME or not requests: filtered.append(item) continue - needed, skipped = self._split_recall_requests(requests, active_keys, active_fingerprints) - skipped_labels.extend(self._recall_request_label(request) for request in skipped) + needed: list[RecallRequest] = [] + skipped: list[RecallRequest] = [] + for request in requests: + redundant = request.fingerprint() in active_fingerprints if request.ranges else request.key in active_keys + (skipped if redundant else needed).append(request) + skipped_labels.extend(request.label() for request in skipped) if needed: - filtered.append(ParsedToolCall(name=call.name, intention=call.intention, args=ToolResultTool.args_from_requests(needed))) + filtered.append(ParsedToolCall(name=call.name, intention=call.intention, args=[request.to_arg() for request in needed])) elif skipped: skipped_executions.append( ToolCallExecution( @@ -6355,24 +6339,6 @@ def _filter_redundant_recall_calls(self, tool_calls: list[JsonValue]) -> tuple[l self._remember_agent_error("Recall skipped for already-visible result content: " + keys + ". Use visible Tool Context content; Read concrete files for new evidence.") return filtered, skipped_executions - @staticmethod - def _split_recall_requests( - requests: list[RecallRequest], - active_keys: set[str], - active_fingerprints: set[str], - ) -> tuple[list[RecallRequest], list[RecallRequest]]: - needed: list[RecallRequest] = [] - skipped: list[RecallRequest] = [] - for request in requests: - redundant = ToolResultTool.request_fingerprint(request) in active_fingerprints if request.ranges else request.key in active_keys - (skipped if redundant else needed).append(request) - return needed, skipped - - @staticmethod - def _recall_request_label(request: RecallRequest) -> str: - ranges = " ".join(str(start) + ":" + str(end) for start, end in request.ranges) - return request.key + ((" " + ranges) if ranges else "") - def _apply_context_tool_executions( self, executions: list[ToolCallExecution], @@ -6392,8 +6358,9 @@ def _apply_context_tool_executions( append=append_to_latest or bool(self.tool_context.latest), ) requests = [request for request in ToolResultTool.requests_from_args(execution.call.args) if request.key in block_keys] - fingerprints = {ToolResultTool.request_fingerprint(request) for request in requests if request.ranges} - self.tool_context.replace_recall_fingerprints({request.key for request in requests}, fingerprints) + fingerprints = {request.fingerprint() for request in requests if request.ranges} + self.tool_context.drop_recall_fingerprints({request.key for request in requests}) + self.tool_context.reactivated_recall_fingerprints.update(fingerprints) def _unreferenced_unreduced_blocks(self) -> list[str]: return self.tool_context.unreduced_blocks(