From dba0103da51e3fe88ff84ee86421d5e3adc50b3d Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 13:41:23 -0700 Subject: [PATCH 001/134] introduced the core pkg for Anton --- anton/core/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 anton/core/__init__.py diff --git a/anton/core/__init__.py b/anton/core/__init__.py new file mode 100644 index 00000000..e69de29b From eca2051c035e70e819a70ac7cdcafb52e66731c8 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 14:48:40 -0700 Subject: [PATCH 002/134] added pkgs for core components --- anton/core/backends/__init__.py | 0 anton/core/llm/__init__.py | 0 anton/core/memory/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 anton/core/backends/__init__.py create mode 100644 anton/core/llm/__init__.py create mode 100644 anton/core/memory/__init__.py diff --git a/anton/core/backends/__init__.py b/anton/core/backends/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/anton/core/llm/__init__.py b/anton/core/llm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/anton/core/memory/__init__.py b/anton/core/memory/__init__.py new file mode 100644 index 00000000..e69de29b From 8bca73076047989dc2e0614577420a6bef686c7a Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 15:49:09 -0700 Subject: [PATCH 003/134] separated the core chat session --- anton/core/session.py | 1183 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1183 insertions(+) create mode 100644 anton/core/session.py diff --git a/anton/core/session.py b/anton/core/session.py new file mode 100644 index 00000000..b4253542 --- /dev/null +++ b/anton/core/session.py @@ -0,0 +1,1183 @@ +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator +from typing import TYPE_CHECKING + +from anton.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt +from anton.llm.provider import ( + ContextOverflowError, + StreamComplete, + StreamContextCompacted, + StreamEvent, + StreamTaskProgress, + StreamTextDelta, + StreamToolResult, +) +from anton.scratchpad import ScratchpadManager +from anton.tools import ( + CONNECT_DATASOURCE_TOOL, + MEMORIZE_TOOL, + PUBLISH_TOOL, + RECALL_TOOL, + SCRATCHPAD_TOOL, + dispatch_tool, + format_cell_result, + prepare_scratchpad_exec, +) + +from anton.utils.datasources import ( + build_datasource_context, + scrub_credentials, +) + +if TYPE_CHECKING: + from rich.console import Console + from anton.context.self_awareness import SelfAwarenessContext + from anton.llm.client import LLMClient + from anton.memory.cortex import Cortex + from anton.memory.episodes import EpisodicMemory + from anton.memory.history_store import HistoryStore + from anton.workspace import Workspace + + +_MAX_TOOL_ROUNDS = 25 # Hard limit on consecutive tool-call rounds per turn +_MAX_CONTINUATIONS = 3 # Max times the verification loop can restart the tool loop +_CONTEXT_PRESSURE_THRESHOLD = 0.7 # Trigger compaction when context is 70% full +_MAX_CONSECUTIVE_ERRORS = 5 # Stop if the same tool fails this many times in a row +_RESILIENCE_NUDGE_AT = 2 # Inject resilience nudge after this many consecutive errors +_RESILIENCE_NUDGE = ( + "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or " + "asking the user for help, try a creative workaround — different headers/user-agent, " + "a public API, archive.org, an alternate library, or a completely different data source. " + "Only involve the user if the problem truly requires something only they can provide." +) + +# TODO: Is this enough for now? +TOKEN_STATUS_CACHE_TTL = 60.0 + + +def _apply_error_tracking( + result_text: str, + tool_name: str, + error_streak: dict[str, int], + resilience_nudged: set[str], +) -> str: + """Track consecutive errors per tool and append nudge/circuit-breaker messages.""" + is_error = any( + marker in result_text + for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:") + ) + if is_error: + error_streak[tool_name] = error_streak.get(tool_name, 0) + 1 + else: + error_streak[tool_name] = 0 + resilience_nudged.discard(tool_name) + + streak = error_streak.get(tool_name, 0) + if streak >= _RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged: + result_text += _RESILIENCE_NUDGE + resilience_nudged.add(tool_name) + + if streak >= _MAX_CONSECUTIVE_ERRORS: + result_text += ( + f"\n\nSYSTEM: The '{tool_name}' tool has failed {_MAX_CONSECUTIVE_ERRORS} times " + "in a row. Stop retrying this approach. Either try a completely different " + "strategy or tell the user what's going wrong so they can help." + ) + + return result_text + + +class ChatSession: + """Manages a multi-turn conversation with tool-call delegation.""" + + def __init__( + self, + llm_client: LLMClient, + *, + self_awareness: SelfAwarenessContext | None = None, + cortex: Cortex | None = None, + episodic: EpisodicMemory | None = None, + runtime_context: str = "", + workspace: Workspace | None = None, + console: Console | None = None, + coding_provider: str = "anthropic", + coding_api_key: str = "", + coding_base_url: str = "", + initial_history: list[dict] | None = None, + history_store: HistoryStore | None = None, + session_id: str | None = None, + proactive_dashboards: bool = False, + ) -> None: + self._llm = llm_client + self._self_awareness = self_awareness + self._cortex = cortex + self._episodic = episodic + self._runtime_context = runtime_context + self._proactive_dashboards = proactive_dashboards + self._workspace = workspace + self._console = console + self._history: list[dict] = list(initial_history) if initial_history else [] + self._pending_memory_confirmations: list = [] + self._turn_count = ( + sum(1 for m in self._history if m.get("role") == "user") + if initial_history + else 0 + ) + self._history_store = history_store + self._session_id = session_id + self._cancel_event = asyncio.Event() + self._escape_watcher: "EscapeWatcher | None" = None + self._active_datasource: str | None = None + self._scratchpads = ScratchpadManager( + coding_provider=coding_provider, + coding_model=getattr(llm_client, "coding_model", ""), + coding_api_key=coding_api_key, + coding_base_url=coding_base_url, + workspace_path=workspace.base if workspace else None, + ) + + @property + def history(self) -> list[dict]: + return self._history + + def repair_history(self) -> None: + """Fix dangling tool_use blocks left by mid-stream cancellation. + + The Anthropic API requires every tool_use to be followed by a + tool_result. If we cancelled mid-turn, the last assistant message + may contain tool_use blocks with no corresponding tool_result in + the next message. Append synthetic tool_results so the + conversation can continue. + """ + if not self._history: + return + last = self._history[-1] + if last.get("role") != "assistant": + return + content = last.get("content") + if not isinstance(content, list): + return + tool_ids = [ + block["id"] + for block in content + if isinstance(block, dict) and block.get("type") == "tool_use" + ] + if not tool_ids: + return + self._history.append( + { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": tid, + "content": "Cancelled by user.", + } + for tid in tool_ids + ], + } + ) + + def _persist_history(self) -> None: + """Save current history to disk if a history store is configured.""" + if self._history_store and self._session_id: + self._history_store.save(self._session_id, self._history) + + async def _build_system_prompt(self, user_message: str = "") -> str: + import datetime as _dt + _now = _dt.datetime.now() + _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p") + + prompt = CHAT_SYSTEM_PROMPT.format( + runtime_context=self._runtime_context, + visualizations_section=build_visualizations_prompt( + self._proactive_dashboards + ), + current_datetime=_current_datetime, + ) + # Inject memory context (replaces old self_awareness) + if self._cortex is not None: + memory_section = await self._cortex.build_memory_context(user_message) + if memory_section: + prompt += memory_section + elif self._self_awareness is not None: + # Fallback for legacy usage (tests, etc.) + sa_section = self._self_awareness.build_prompt_section() + if sa_section: + prompt += sa_section + # Inject anton.md project context (user-written takes priority) + if self._workspace is not None: + md_context = self._workspace.build_anton_md_context() + if md_context: + prompt += md_context + # Inject connected datasource context without credentials + ds_ctx = build_datasource_context(active_only=self._active_datasource) + if ds_ctx: + prompt += ds_ctx + return prompt + + # Packages the LLM is most likely to care about when writing scratchpad code. + _NOTABLE_PACKAGES: set[str] = { + "numpy", + "pandas", + "matplotlib", + "seaborn", + "scipy", + "scikit-learn", + "requests", + "httpx", + "aiohttp", + "beautifulsoup4", + "lxml", + "pillow", + "sympy", + "networkx", + "sqlalchemy", + "pydantic", + "rich", + "tqdm", + "click", + "fastapi", + "flask", + "django", + "openai", + "anthropic", + "tiktoken", + "transformers", + "torch", + "polars", + "pyarrow", + "openpyxl", + "xlsxwriter", + "plotly", + "bokeh", + "altair", + "pytest", + "hypothesis", + "yaml", + "pyyaml", + "toml", + "tomli", + "tomllib", + "jinja2", + "markdown", + "pygments", + "cryptography", + "paramiko", + "boto3", + } + + def _build_tools(self) -> list[dict]: + scratchpad_tool = dict(SCRATCHPAD_TOOL) + pkg_list = self._scratchpads._available_packages + if pkg_list: + notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES) + if notable: + pkg_line = ", ".join(notable) + extra = f"\n\nInstalled packages ({len(pkg_list)} total, notable: {pkg_line})." + else: + extra = f"\n\nInstalled packages: {len(pkg_list)} total (standard library plus dependencies)." + scratchpad_tool["description"] = SCRATCHPAD_TOOL["description"] + extra + + # Inject scratchpad wisdom from memory (procedural priming) + if self._cortex is not None: + wisdom = self._cortex.get_scratchpad_context() + if wisdom: + scratchpad_tool[ + "description" + ] += f"\n\nLessons from past sessions:\n{wisdom}" + + tools = [scratchpad_tool] + if self._cortex is not None: + tools.append(MEMORIZE_TOOL) + elif self._self_awareness is not None: + # Legacy fallback + from anton.tools import MEMORIZE_TOOL as _MT + + tools.append(_MT) + if self._episodic is not None and self._episodic.enabled: + tools.append(RECALL_TOOL) + tools.append(CONNECT_DATASOURCE_TOOL) + tools.append(PUBLISH_TOOL) + return tools + + async def close(self) -> None: + """Clean up scratchpads and other resources.""" + await self._scratchpads.close_all() + + async def _summarize_history(self) -> None: + """Compress old conversation turns into a summary using the coding model. + + Splits history into old (first 60%) and recent (last 40%), keeping at + least 4 recent turns. The old portion is summarized by the fast coding + model and replaced with a single user message. + """ + if len(self._history) < 6: + return # Too short to summarize + + min_recent = 4 + split = max(int(len(self._history) * 0.6), 1) + # Ensure we keep at least min_recent turns + split = min(split, len(self._history) - min_recent) + if split < 2: + return + + # Walk split backward to avoid breaking tool_use / tool_result pairs. + # A user message containing tool_result blocks must stay with the + # preceding assistant message that contains the matching tool_use. + while split > 1: + msg = self._history[split] + if msg.get("role") != "user": + break + content = msg.get("content") + if not isinstance(content, list): + break + has_tool_result = any( + isinstance(b, dict) and b.get("type") == "tool_result" for b in content + ) + if not has_tool_result: + break + # This user message has tool_results — keep it (and its paired + # assistant message) in the recent portion. + split -= 1 + # Also pull back over the preceding assistant message so the + # pair stays together. + if split > 1 and self._history[split].get("role") == "assistant": + split -= 1 + + if split < 2: + return + + old_turns = self._history[:split] + recent_turns = self._history[split:] + + # Serialize old turns into text for summarization + lines: list[str] = [] + for msg in old_turns: + role = msg.get("role", "unknown") + content = msg.get("content", "") + if isinstance(content, str): + lines.append(f"[{role}]: {content[:2000]}") + elif isinstance(content, list): + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + lines.append(f"[{role}]: {block['text'][:1000]}") + elif block.get("type") == "tool_use": + lines.append( + f"[{role}/tool_use]: {block.get('name', '')}({str(block.get('input', ''))[:500]})" + ) + elif block.get("type") == "tool_result": + lines.append( + f"[tool_result]: {str(block.get('content', ''))[:500]}" + ) + + old_text = "\n".join(lines) + # Cap at ~8000 chars to avoid overloading the summarizer + if len(old_text) > 8000: + old_text = old_text[:8000] + "\n... (truncated)" + + try: + summary_response = await self._llm.code( + system=( + "Summarize this conversation history concisely. Preserve:\n" + "- Key decisions and conclusions\n" + "- Important data/results discovered\n" + "- Variable names and values that are still relevant\n" + "- Errors encountered and how they were resolved\n" + "Keep it under 2000 tokens. Use bullet points." + ), + messages=[{"role": "user", "content": old_text}], + max_tokens=2048, + ) + summary = summary_response.content or "(summary unavailable)" + except Exception: + # If summarization fails, just do a simple truncation + summary = f"(Earlier conversation with {len(old_turns)} turns — summarization failed)" + + summary_msg = { + "role": "user", + "content": f"[Context summary of earlier conversation]\n{summary}", + } + + # If the recent portion starts with a user message, insert a minimal + # assistant separator to avoid consecutive user messages (API error). + if recent_turns and recent_turns[0].get("role") == "user": + self._history = [ + summary_msg, + {"role": "assistant", "content": "Understood."}, + *recent_turns, + ] + else: + self._history = [summary_msg] + recent_turns + + def _compact_scratchpads(self) -> bool: + """Compact all active scratchpads. Returns True if any were compacted.""" + compacted = False + for pad in self._scratchpads._pads.values(): + if pad._compact_cells(): + compacted = True + return compacted + + async def turn(self, user_input: str | list[dict]) -> str: + self._history.append({"role": "user", "content": user_input}) + + user_msg_str = user_input if isinstance(user_input, str) else "" + system = await self._build_system_prompt(user_msg_str) + tools = self._build_tools() + + try: + response = await self._llm.plan( + system=system, + messages=self._history, + tools=tools, + ) + except ContextOverflowError: + await self._summarize_history() + self._compact_scratchpads() + response = await self._llm.plan( + system=system, + messages=self._history, + tools=tools, + ) + + # Proactive compaction + if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD: + await self._summarize_history() + self._compact_scratchpads() + + # Handle tool calls + tool_round = 0 + error_streak: dict[str, int] = {} + resilience_nudged: set[str] = set() + + while response.tool_calls: + tool_round += 1 + if tool_round > _MAX_TOOL_ROUNDS: + self._history.append( + {"role": "assistant", "content": response.content or ""} + ) + self._history.append( + { + "role": "user", + "content": ( + f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. " + "Pause here. Summarize what you have accomplished so far and what remains. " + "If you believe you are on a good track and can finish the task with more steps, " + "tell the user and ask if they'd like you to continue. " + "Do NOT retry automatically — wait for the user's response." + ), + } + ) + response = await self._llm.plan( + system=system, + messages=self._history, + ) + break + + # Build assistant message with content blocks + assistant_content: list[dict] = [] + if response.content: + assistant_content.append({"type": "text", "text": response.content}) + for tc in response.tool_calls: + assistant_content.append( + { + "type": "tool_use", + "id": tc.id, + "name": tc.name, + "input": tc.input, + } + ) + self._history.append({"role": "assistant", "content": assistant_content}) + + # Process each tool call via registry + tool_results: list[dict] = [] + for tc in response.tool_calls: + try: + result_text = await dispatch_tool(self, tc.name, tc.input) + except Exception as exc: + result_text = f"Tool '{tc.name}' failed: {exc}" + + result_text = scrub_credentials(result_text) + result_text = _apply_error_tracking( + result_text, + tc.name, + error_streak, + resilience_nudged, + ) + + tool_results.append( + { + "type": "tool_result", + "tool_use_id": tc.id, + "content": result_text, + } + ) + + self._history.append({"role": "user", "content": tool_results}) + + # Get follow-up from LLM + try: + response = await self._llm.plan( + system=system, + messages=self._history, + tools=tools, + ) + except ContextOverflowError: + await self._summarize_history() + self._compact_scratchpads() + response = await self._llm.plan( + system=system, + messages=self._history, + tools=tools, + ) + + # Proactive compaction during tool loop + if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD: + await self._summarize_history() + self._compact_scratchpads() + + # Text-only response + reply = response.content or "" + self._history.append({"role": "assistant", "content": reply}) + + # Periodic memory vacuum (Systems Consolidation) + if self._cortex is not None and self._cortex.mode != "off": + self._cortex.maybe_vacuum() + + return reply + + async def turn_stream( + self, user_input: str | list[dict] + ) -> AsyncIterator[StreamEvent]: + """Streaming version of turn(). Yields events as they arrive.""" + self._history.append({"role": "user", "content": user_input}) + + # Log user input to episodic memory + if self._episodic is not None: + content = ( + user_input if isinstance(user_input, str) else str(user_input)[:2000] + ) + self._episodic.log_turn(self._turn_count + 1, "user", content) + + user_msg_str = user_input if isinstance(user_input, str) else "" + assistant_text_parts: list[str] = [] + _max_auto_retries = 2 + _retry_count = 0 + + while True: + try: + async for event in self._stream_and_handle_tools(user_msg_str): + if isinstance(event, StreamTextDelta): + assistant_text_parts.append(event.text) + yield event + break # completed successfully + except Exception as _agent_exc: + _retry_count += 1 + if _retry_count <= _max_auto_retries: + # Inject the error into history and let the LLM try to recover + self._history.append( + { + "role": "user", + "content": ( + f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n" + "If you can diagnose and fix the issue, continue working on the task. " + "Adjust your approach to avoid the same error. " + "If this is unrecoverable, summarize what you accomplished and suggest next steps." + ), + } + ) + # Continue the while loop — _stream_and_handle_tools will be called + # again with the error context now in history + continue + else: + # Exhausted retries — stop and summarize for the user + self._history.append( + { + "role": "user", + "content": ( + f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n" + "Stop retrying. Please:\n" + "1. Summarize what you accomplished so far.\n" + "2. Explain what went wrong in plain language.\n" + "3. Suggest next steps — what the user can try (e.g. rephrase, " + "simplify the request, or ask you to continue from where you left off).\n" + "Be concise and helpful." + ), + } + ) + try: + async for event in self._llm.plan_stream( + system=await self._build_system_prompt(user_msg_str), + messages=self._history, + ): + if isinstance(event, StreamTextDelta): + assistant_text_parts.append(event.text) + yield event + except Exception: + fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request." + assistant_text_parts.append(fallback) + yield StreamTextDelta(text=fallback) + break + + # Log assistant response to episodic memory + if self._episodic is not None and assistant_text_parts: + self._episodic.log_turn( + self._turn_count + 1, + "assistant", + "".join(assistant_text_parts)[:2000], + ) + + # Identity extraction (Default Mode Network — every 5 turns) + self._turn_count += 1 + self._persist_history() + if self._cortex is not None and self._cortex.mode != "off": + if self._turn_count % 5 == 0 and isinstance(user_input, str): + asyncio.create_task(self._cortex.maybe_update_identity(user_input)) + # Periodic memory vacuum (Systems Consolidation) + self._cortex.maybe_vacuum() + + async def _stream_and_handle_tools( + self, user_message: str = "" + ) -> AsyncIterator[StreamEvent]: + """Stream one LLM call, handle tool loops, yield all events.""" + system = await self._build_system_prompt(user_message) + tools = self._build_tools() + + # Guard against summarizing an already-summarized history within the same + # turn (e.g. ContextOverflowError on first call + pressure > threshold on + # the tool-loop follow-up would previously produce a summary of a summary). + _compacted_this_turn = False + + response: StreamComplete | None = None + + try: + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + except ContextOverflowError: + await self._summarize_history() + self._compact_scratchpads() + _compacted_this_turn = True + yield StreamContextCompacted( + message="Context was getting long — older history has been summarized." + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + + if response is None: + return + + llm_response = response.response + + # Detect max_tokens truncation — the LLM was cut off mid-response. + # Inject a continuation prompt so it can finish what it was doing. + if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls: + self._history.append( + {"role": "assistant", "content": llm_response.content or ""} + ) + self._history.append( + { + "role": "user", + "content": ( + "SYSTEM: Your response was truncated because it exceeded the output token limit. " + "Continue exactly where you left off. If you were about to call a tool, " + "call it now. If the code you were writing was too long, split it into smaller parts." + ), + } + ) + response = None + try: + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + except ContextOverflowError: + if not _compacted_this_turn: + await self._summarize_history() + self._compact_scratchpads() + _compacted_this_turn = True + yield StreamContextCompacted( + message="Context was getting long — older history has been summarized." + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + + if response is None: + return + llm_response = response.response + + # Proactive compaction + if ( + not _compacted_this_turn + and llm_response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD + ): + await self._summarize_history() + self._compact_scratchpads() + _compacted_this_turn = True + yield StreamContextCompacted( + message="Context was getting long — older history has been summarized." + ) + + # Tool-call loop with circuit breaker, wrapped in a completion + # verification outer loop that can restart the tool loop if the + # task isn't actually done yet. + continuation = 0 + _max_rounds_hit = False + + while True: # Completion verification loop + tool_round = 0 + error_streak: dict[str, int] = {} + resilience_nudged: set[str] = set() + + while llm_response.tool_calls: + tool_round += 1 + if tool_round > _MAX_TOOL_ROUNDS: + _max_rounds_hit = True + self._history.append( + {"role": "assistant", "content": llm_response.content or ""} + ) + self._history.append( + { + "role": "user", + "content": ( + f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. " + "Pause here. Summarize what you have accomplished so far and what remains. " + "If you believe you are on a good track and can finish the task with more steps, " + "tell the user and ask if they'd like you to continue. " + "Do NOT retry automatically — wait for the user's response." + ), + } + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + ): + yield event + break + + # Build assistant message with content blocks + assistant_content: list[dict] = [] + if llm_response.content: + assistant_content.append( + {"type": "text", "text": llm_response.content} + ) + for tc in llm_response.tool_calls: + assistant_content.append( + { + "type": "tool_use", + "id": tc.id, + "name": tc.name, + "input": tc.input, + } + ) + self._history.append( + {"role": "assistant", "content": assistant_content} + ) + + # Process each tool call + tool_results: list[dict] = [] + for tc in llm_response.tool_calls: + if self._episodic is not None: + self._episodic.log_turn( + self._turn_count + 1, + "tool_call", + str(tc.input)[:2000], + tool=tc.name, + ) + + try: + if tc.name == "scratchpad" and tc.input.get("action") == "exec": + # Inline streaming exec — yields progress events + prep = await prepare_scratchpad_exec(self, tc.input) + if isinstance(prep, str): + result_text = prep + else: + ( + pad, + code, + description, + estimated_time, + estimated_seconds, + ) = prep + yield StreamTaskProgress( + phase="scratchpad_start", + message=description or "Running code", + eta_seconds=estimated_seconds, + ) + import time as _time + + _sp_t0 = _time.monotonic() + from anton.scratchpad import Cell + + cell = None + async for item in pad.execute_streaming( + code, + description=description, + estimated_time=estimated_time, + estimated_seconds=estimated_seconds, + cancel_event=self._cancel_event, + ): + if isinstance(item, str): + yield StreamTaskProgress( + phase="scratchpad", message=item + ) + elif isinstance(item, Cell): + cell = item + _sp_elapsed = _time.monotonic() - _sp_t0 + yield StreamTaskProgress( + phase="scratchpad_done", + message=description or "Done", + eta_seconds=_sp_elapsed, + ) + result_text = ( + format_cell_result(cell) + if cell + else "No result produced." + ) + if self._episodic is not None and cell is not None: + self._episodic.log_turn( + self._turn_count + 1, + "scratchpad", + (cell.stdout or "")[:2000], + description=description, + ) + elif tc.name == "connect_new_datasource" or ( + tc.name == "publish_or_preview" and tc.input.get("action") == "publish" + ): + # Interactive tool — pause spinner AND escape watcher + yield StreamTaskProgress( + phase="interactive", + message="", + ) + if self._escape_watcher: + self._escape_watcher.pause() + result_text = await dispatch_tool(self, tc.name, tc.input) + if self._escape_watcher: + self._escape_watcher.resume() + yield StreamTaskProgress( + phase="analyzing", + message="Analyzing results...", + ) + else: + result_text = await dispatch_tool(self, tc.name, tc.input) + if ( + tc.name == "scratchpad" + and tc.input.get("action") == "dump" + ): + yield StreamToolResult(content=result_text) + result_text = ( + "The full notebook has been displayed to the user above. " + "Do not repeat it. Here is the content for your reference:\n\n" + + result_text + ) + except Exception as exc: + result_text = f"Tool '{tc.name}' failed: {exc}" + + if self._episodic is not None: + self._episodic.log_turn( + self._turn_count + 1, + "tool_result", + result_text[:2000], + tool=tc.name, + ) + result_text = scrub_credentials(result_text) + result_text = _apply_error_tracking( + result_text, tc.name, error_streak, resilience_nudged + ) + tool_results.append( + { + "type": "tool_result", + "tool_use_id": tc.id, + "content": result_text, + } + ) + + self._history.append({"role": "user", "content": tool_results}) + + # Signal that tools are done and LLM is now analyzing + yield StreamTaskProgress( + phase="analyzing", message="Analyzing results..." + ) + + # Stream follow-up + response = None + try: + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + except ContextOverflowError: + if not _compacted_this_turn: + await self._summarize_history() + self._compact_scratchpads() + _compacted_this_turn = True + yield StreamContextCompacted( + message="Context was getting long — older history has been summarized." + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + + if response is None: + return + llm_response = response.response + + # Detect max_tokens truncation inside tool loop + if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls: + self._history.append( + {"role": "assistant", "content": llm_response.content or ""} + ) + self._history.append( + { + "role": "user", + "content": ( + "SYSTEM: Your response was truncated because it exceeded the output token limit. " + "Continue exactly where you left off. If you were about to call a tool, " + "call it now. If the code you were writing was too long, split it into smaller parts." + ), + } + ) + response = None + try: + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + except ContextOverflowError: + if not _compacted_this_turn: + await self._summarize_history() + self._compact_scratchpads() + _compacted_this_turn = True + yield StreamContextCompacted( + message="Context was getting long — older history has been summarized." + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + + if response is None: + return + llm_response = response.response + + # Proactive compaction during tool loop + if ( + not _compacted_this_turn + and llm_response.usage.context_pressure + > _CONTEXT_PRESSURE_THRESHOLD + ): + await self._summarize_history() + self._compact_scratchpads() + _compacted_this_turn = True + yield StreamContextCompacted( + message="Context was getting long — older history has been summarized." + ) + + # --- Completion verification --- + # Only verify when tools were actually used (not for simple Q&A) + # and we haven't hit the max-rounds hard stop. + if tool_round == 0 or _max_rounds_hit: + break + + # Append the assistant's final text so the verifier can see it + reply = llm_response.content or "" + self._history.append({"role": "assistant", "content": reply}) + + if continuation >= _MAX_CONTINUATIONS: + # Budget exhausted — ask LLM to diagnose and present to user + self._history.append( + { + "role": "user", + "content": ( + "SYSTEM: You have attempted to complete this task multiple times " + "but verification indicates it is still not done. Do NOT try again. " + "Instead:\n" + "1. Summarize exactly what was accomplished so far.\n" + "2. Identify the specific blocker or failure preventing completion.\n" + "3. Suggest concrete next steps the user can take to unblock this.\n" + "Be honest and specific — do not be vague about what went wrong." + ), + } + ) + yield StreamTaskProgress( + phase="analyzing", message="Diagnosing incomplete task..." + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + ): + yield event + # Consolidation still runs after diagnosis + break + + # Ask the LLM to self-assess completion. + # Use a copy of history with a trailing user message so models + # that don't support assistant-prefill won't reject the request. + verify_messages = list(self._history) + [ + { + "role": "user", + "content": ( + "SYSTEM: Evaluate whether the task the user originally requested " + "has been fully completed based on the conversation above." + ), + } + ] + verification = await self._llm.plan( + system=( + "You are a task-completion verifier. Given the conversation, determine " + "whether the user's original request has been fully completed.\n\n" + "Respond with EXACTLY one of these lines, followed by a brief reason:\n" + "STATUS: COMPLETE — \n" + "STATUS: INCOMPLETE — \n" + "STATUS: STUCK — \n\n" + "COMPLETE = the task is done or the response fully answers the question.\n" + "INCOMPLETE = more work can be done to finish the task.\n" + "STUCK = a blocker prevents completion (missing info, permissions, etc).\n\n" + "Be strict: if the user asked for X and only part of X was delivered, " + "that is INCOMPLETE, not COMPLETE. But if the user asked a question " + "and the assistant answered it, that is COMPLETE even without tool use." + ), + messages=verify_messages, + max_tokens=256, + ) + + status_text = (verification.content or "").strip().upper() + if "STATUS: COMPLETE" in status_text: + break + if "STATUS: STUCK" in status_text: + # Stuck — inject diagnosis request and let the LLM explain + reason = (verification.content or "").strip() + self._history.append( + { + "role": "user", + "content": ( + f"SYSTEM: Task verification determined this task is stuck.\n" + f"Verifier assessment: {reason}\n\n" + "Explain to the user what went wrong, what you tried, and " + "suggest specific next steps they can take to unblock this." + ), + } + ) + yield StreamTaskProgress( + phase="analyzing", message="Diagnosing blocked task..." + ) + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + ): + yield event + break + + # INCOMPLETE — continue working + continuation += 1 + reason = (verification.content or "").strip() + self._history.append( + { + "role": "user", + "content": ( + f"SYSTEM: Task verification determined this task is not yet complete " + f"(attempt {continuation}/{_MAX_CONTINUATIONS}).\n" + f"Verifier assessment: {reason}\n\n" + "Continue working on the original request. Pick up where you left off " + "and finish the remaining work. Do not repeat work already done." + ), + } + ) + yield StreamTaskProgress( + phase="analyzing", + message=f"Task incomplete — continuing ({continuation}/{_MAX_CONTINUATIONS})...", + ) + + # Re-enter tool loop: get next LLM response with tools available + response = None + async for event in self._llm.plan_stream( + system=system, + messages=self._history, + tools=tools, + ): + yield event + if isinstance(event, StreamComplete): + response = event + if response is None: + return + llm_response = response.response + # Loop back to the top of the completion verification loop + + # Text-only final response — append to history (if not already appended + # by the verification block above). + if not self._history or self._history[-1].get("role") != "assistant": + reply = llm_response.content or "" + self._history.append({"role": "assistant", "content": reply}) + + # Consolidation: replay scratchpad sessions to extract lessons + if self._cortex is not None and self._cortex.mode != "off": + self._maybe_consolidate_scratchpads() + + def _maybe_consolidate_scratchpads(self) -> None: + """Check if any scratchpad sessions warrant consolidation and fire it off.""" + from anton.memory.consolidator import Consolidator + + consolidator = Consolidator() + for pad in self._scratchpads._pads.values(): + cells = list(pad.cells) + if consolidator.should_replay(cells): + asyncio.create_task(self._consolidate(cells)) + + async def _consolidate(self, cells: list) -> None: + """Run offline consolidation on a completed scratchpad session.""" + from anton.memory.consolidator import Consolidator + + consolidator = Consolidator() + engrams = await consolidator.replay_and_extract(cells, self._llm) + if not engrams or self._cortex is None: + return + + auto_encode = [e for e in engrams if not self._cortex.encoding_gate(e)] + needs_confirm = [e for e in engrams if self._cortex.encoding_gate(e)] + + if auto_encode: + await self._cortex.encode(auto_encode) + + if needs_confirm: + self._pending_memory_confirmations.extend(needs_confirm) From 7d29c061cc88d30e5eaaff2322b13d342e4d6c14 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 15:49:30 -0700 Subject: [PATCH 004/134] updated existing chat implementation based on core --- anton/chat.py | 1166 +------------------------------------------------ 1 file changed, 1 insertion(+), 1165 deletions(-) diff --git a/anton/chat.py b/anton/chat.py index 4722c1f9..5bd77801 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -1,14 +1,10 @@ from __future__ import annotations import asyncio -import json as _json import os import urllib.error -import re as _re import sys -import uuid import time -from collections.abc import AsyncIterator, Callable from pathlib import Path from typing import TYPE_CHECKING @@ -16,18 +12,15 @@ from anton.clipboard import ( cleanup_old_uploads, - clipboard_unavailable_reason, grab_clipboard, is_clipboard_supported, parse_dropped_paths as _parse_dropped_paths, save_clipboard_image, ) -from anton.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt +from anton.core.session import ChatSession from anton.llm.provider import ( - ContextOverflowError, StreamComplete, StreamContextCompacted, - StreamEvent, StreamTaskProgress, StreamTextDelta, StreamToolResult, @@ -35,22 +28,10 @@ StreamToolUseEnd, StreamToolUseStart, ) -from anton.scratchpad import ScratchpadManager -from anton.tools import ( - CONNECT_DATASOURCE_TOOL, - MEMORIZE_TOOL, - PUBLISH_TOOL, - RECALL_TOOL, - SCRATCHPAD_TOOL, - dispatch_tool, - format_cell_result, - prepare_scratchpad_exec, -) from anton.checks import TokenLimitInfo, TokenLimitStatus, check_minds_token_limits from anton.commands.setup import ( handle_memory, handle_setup, - handle_setup_memory, handle_setup_models, ) from anton.commands.ui import handle_theme, print_slash_help @@ -69,12 +50,6 @@ handle_test_datasource, ) from anton.utils.prompt import ( - MINDS_KEYS, - LLM_KEYS, - SECRET_PATTERNS, - mask_secret, - is_secret_key, - display_value, prompt_or_cancel, prompt_minds_api_key, ) @@ -83,30 +58,19 @@ normalize_minds_url, describe_minds_connection_error, list_minds, - get_mind, - refresh_knowledge, list_datasources, test_llm, ) from anton.data_vault import DataVault from anton.utils.datasources import ( - build_datasource_context, register_secret_vars, - restore_namespaced_env, - remove_engine_block, - scrub_credentials, - parse_connection_slug, ) from anton.datasource_registry import ( - DatasourceEngine, - DatasourceField, DatasourceRegistry, ) -from anton.llm.openai import build_chat_completion_kwargs from prompt_toolkit import PromptSession from prompt_toolkit.formatted_text import HTML -from prompt_toolkit.key_binding import KeyBindings from prompt_toolkit.styles import Style as PTStyle from rich.prompt import Confirm, Prompt @@ -138,1134 +102,6 @@ TOKEN_STATUS_CACHE_TTL = 60.0 - -class ChatSession: - """Manages a multi-turn conversation with tool-call delegation.""" - - def __init__( - self, - llm_client: LLMClient, - *, - self_awareness: SelfAwarenessContext | None = None, - cortex: Cortex | None = None, - episodic: EpisodicMemory | None = None, - runtime_context: str = "", - workspace: Workspace | None = None, - console: Console | None = None, - coding_provider: str = "anthropic", - coding_api_key: str = "", - coding_base_url: str = "", - initial_history: list[dict] | None = None, - history_store: HistoryStore | None = None, - session_id: str | None = None, - proactive_dashboards: bool = False, - ) -> None: - self._llm = llm_client - self._self_awareness = self_awareness - self._cortex = cortex - self._episodic = episodic - self._runtime_context = runtime_context - self._proactive_dashboards = proactive_dashboards - self._workspace = workspace - self._console = console - self._history: list[dict] = list(initial_history) if initial_history else [] - self._pending_memory_confirmations: list = [] - self._turn_count = ( - sum(1 for m in self._history if m.get("role") == "user") - if initial_history - else 0 - ) - self._history_store = history_store - self._session_id = session_id - self._cancel_event = asyncio.Event() - self._escape_watcher: "EscapeWatcher | None" = None - self._active_datasource: str | None = None - self._scratchpads = ScratchpadManager( - coding_provider=coding_provider, - coding_model=getattr(llm_client, "coding_model", ""), - coding_api_key=coding_api_key, - coding_base_url=coding_base_url, - workspace_path=workspace.base if workspace else None, - ) - - @property - def history(self) -> list[dict]: - return self._history - - def repair_history(self) -> None: - """Fix dangling tool_use blocks left by mid-stream cancellation. - - The Anthropic API requires every tool_use to be followed by a - tool_result. If we cancelled mid-turn, the last assistant message - may contain tool_use blocks with no corresponding tool_result in - the next message. Append synthetic tool_results so the - conversation can continue. - """ - if not self._history: - return - last = self._history[-1] - if last.get("role") != "assistant": - return - content = last.get("content") - if not isinstance(content, list): - return - tool_ids = [ - block["id"] - for block in content - if isinstance(block, dict) and block.get("type") == "tool_use" - ] - if not tool_ids: - return - self._history.append( - { - "role": "user", - "content": [ - { - "type": "tool_result", - "tool_use_id": tid, - "content": "Cancelled by user.", - } - for tid in tool_ids - ], - } - ) - - def _persist_history(self) -> None: - """Save current history to disk if a history store is configured.""" - if self._history_store and self._session_id: - self._history_store.save(self._session_id, self._history) - - async def _build_system_prompt(self, user_message: str = "") -> str: - import datetime as _dt - _now = _dt.datetime.now() - _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p") - - prompt = CHAT_SYSTEM_PROMPT.format( - runtime_context=self._runtime_context, - visualizations_section=build_visualizations_prompt( - self._proactive_dashboards - ), - current_datetime=_current_datetime, - ) - # Inject memory context (replaces old self_awareness) - if self._cortex is not None: - memory_section = await self._cortex.build_memory_context(user_message) - if memory_section: - prompt += memory_section - elif self._self_awareness is not None: - # Fallback for legacy usage (tests, etc.) - sa_section = self._self_awareness.build_prompt_section() - if sa_section: - prompt += sa_section - # Inject anton.md project context (user-written takes priority) - if self._workspace is not None: - md_context = self._workspace.build_anton_md_context() - if md_context: - prompt += md_context - # Inject connected datasource context without credentials - ds_ctx = build_datasource_context(active_only=self._active_datasource) - if ds_ctx: - prompt += ds_ctx - return prompt - - # Packages the LLM is most likely to care about when writing scratchpad code. - _NOTABLE_PACKAGES: set[str] = { - "numpy", - "pandas", - "matplotlib", - "seaborn", - "scipy", - "scikit-learn", - "requests", - "httpx", - "aiohttp", - "beautifulsoup4", - "lxml", - "pillow", - "sympy", - "networkx", - "sqlalchemy", - "pydantic", - "rich", - "tqdm", - "click", - "fastapi", - "flask", - "django", - "openai", - "anthropic", - "tiktoken", - "transformers", - "torch", - "polars", - "pyarrow", - "openpyxl", - "xlsxwriter", - "plotly", - "bokeh", - "altair", - "pytest", - "hypothesis", - "yaml", - "pyyaml", - "toml", - "tomli", - "tomllib", - "jinja2", - "markdown", - "pygments", - "cryptography", - "paramiko", - "boto3", - } - - def _build_tools(self) -> list[dict]: - scratchpad_tool = dict(SCRATCHPAD_TOOL) - pkg_list = self._scratchpads._available_packages - if pkg_list: - notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES) - if notable: - pkg_line = ", ".join(notable) - extra = f"\n\nInstalled packages ({len(pkg_list)} total, notable: {pkg_line})." - else: - extra = f"\n\nInstalled packages: {len(pkg_list)} total (standard library plus dependencies)." - scratchpad_tool["description"] = SCRATCHPAD_TOOL["description"] + extra - - # Inject scratchpad wisdom from memory (procedural priming) - if self._cortex is not None: - wisdom = self._cortex.get_scratchpad_context() - if wisdom: - scratchpad_tool[ - "description" - ] += f"\n\nLessons from past sessions:\n{wisdom}" - - tools = [scratchpad_tool] - if self._cortex is not None: - tools.append(MEMORIZE_TOOL) - elif self._self_awareness is not None: - # Legacy fallback - from anton.tools import MEMORIZE_TOOL as _MT - - tools.append(_MT) - if self._episodic is not None and self._episodic.enabled: - tools.append(RECALL_TOOL) - tools.append(CONNECT_DATASOURCE_TOOL) - tools.append(PUBLISH_TOOL) - return tools - - async def close(self) -> None: - """Clean up scratchpads and other resources.""" - await self._scratchpads.close_all() - - async def _summarize_history(self) -> None: - """Compress old conversation turns into a summary using the coding model. - - Splits history into old (first 60%) and recent (last 40%), keeping at - least 4 recent turns. The old portion is summarized by the fast coding - model and replaced with a single user message. - """ - if len(self._history) < 6: - return # Too short to summarize - - min_recent = 4 - split = max(int(len(self._history) * 0.6), 1) - # Ensure we keep at least min_recent turns - split = min(split, len(self._history) - min_recent) - if split < 2: - return - - # Walk split backward to avoid breaking tool_use / tool_result pairs. - # A user message containing tool_result blocks must stay with the - # preceding assistant message that contains the matching tool_use. - while split > 1: - msg = self._history[split] - if msg.get("role") != "user": - break - content = msg.get("content") - if not isinstance(content, list): - break - has_tool_result = any( - isinstance(b, dict) and b.get("type") == "tool_result" for b in content - ) - if not has_tool_result: - break - # This user message has tool_results — keep it (and its paired - # assistant message) in the recent portion. - split -= 1 - # Also pull back over the preceding assistant message so the - # pair stays together. - if split > 1 and self._history[split].get("role") == "assistant": - split -= 1 - - if split < 2: - return - - old_turns = self._history[:split] - recent_turns = self._history[split:] - - # Serialize old turns into text for summarization - lines: list[str] = [] - for msg in old_turns: - role = msg.get("role", "unknown") - content = msg.get("content", "") - if isinstance(content, str): - lines.append(f"[{role}]: {content[:2000]}") - elif isinstance(content, list): - for block in content: - if isinstance(block, dict): - if block.get("type") == "text": - lines.append(f"[{role}]: {block['text'][:1000]}") - elif block.get("type") == "tool_use": - lines.append( - f"[{role}/tool_use]: {block.get('name', '')}({str(block.get('input', ''))[:500]})" - ) - elif block.get("type") == "tool_result": - lines.append( - f"[tool_result]: {str(block.get('content', ''))[:500]}" - ) - - old_text = "\n".join(lines) - # Cap at ~8000 chars to avoid overloading the summarizer - if len(old_text) > 8000: - old_text = old_text[:8000] + "\n... (truncated)" - - try: - summary_response = await self._llm.code( - system=( - "Summarize this conversation history concisely. Preserve:\n" - "- Key decisions and conclusions\n" - "- Important data/results discovered\n" - "- Variable names and values that are still relevant\n" - "- Errors encountered and how they were resolved\n" - "Keep it under 2000 tokens. Use bullet points." - ), - messages=[{"role": "user", "content": old_text}], - max_tokens=2048, - ) - summary = summary_response.content or "(summary unavailable)" - except Exception: - # If summarization fails, just do a simple truncation - summary = f"(Earlier conversation with {len(old_turns)} turns — summarization failed)" - - summary_msg = { - "role": "user", - "content": f"[Context summary of earlier conversation]\n{summary}", - } - - # If the recent portion starts with a user message, insert a minimal - # assistant separator to avoid consecutive user messages (API error). - if recent_turns and recent_turns[0].get("role") == "user": - self._history = [ - summary_msg, - {"role": "assistant", "content": "Understood."}, - *recent_turns, - ] - else: - self._history = [summary_msg] + recent_turns - - def _compact_scratchpads(self) -> bool: - """Compact all active scratchpads. Returns True if any were compacted.""" - compacted = False - for pad in self._scratchpads._pads.values(): - if pad._compact_cells(): - compacted = True - return compacted - - async def turn(self, user_input: str | list[dict]) -> str: - self._history.append({"role": "user", "content": user_input}) - - user_msg_str = user_input if isinstance(user_input, str) else "" - system = await self._build_system_prompt(user_msg_str) - tools = self._build_tools() - - try: - response = await self._llm.plan( - system=system, - messages=self._history, - tools=tools, - ) - except ContextOverflowError: - await self._summarize_history() - self._compact_scratchpads() - response = await self._llm.plan( - system=system, - messages=self._history, - tools=tools, - ) - - # Proactive compaction - if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD: - await self._summarize_history() - self._compact_scratchpads() - - # Handle tool calls - tool_round = 0 - error_streak: dict[str, int] = {} - resilience_nudged: set[str] = set() - - while response.tool_calls: - tool_round += 1 - if tool_round > _MAX_TOOL_ROUNDS: - self._history.append( - {"role": "assistant", "content": response.content or ""} - ) - self._history.append( - { - "role": "user", - "content": ( - f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. " - "Pause here. Summarize what you have accomplished so far and what remains. " - "If you believe you are on a good track and can finish the task with more steps, " - "tell the user and ask if they'd like you to continue. " - "Do NOT retry automatically — wait for the user's response." - ), - } - ) - response = await self._llm.plan( - system=system, - messages=self._history, - ) - break - - # Build assistant message with content blocks - assistant_content: list[dict] = [] - if response.content: - assistant_content.append({"type": "text", "text": response.content}) - for tc in response.tool_calls: - assistant_content.append( - { - "type": "tool_use", - "id": tc.id, - "name": tc.name, - "input": tc.input, - } - ) - self._history.append({"role": "assistant", "content": assistant_content}) - - # Process each tool call via registry - tool_results: list[dict] = [] - for tc in response.tool_calls: - try: - result_text = await dispatch_tool(self, tc.name, tc.input) - except Exception as exc: - result_text = f"Tool '{tc.name}' failed: {exc}" - - result_text = scrub_credentials(result_text) - result_text = _apply_error_tracking( - result_text, - tc.name, - error_streak, - resilience_nudged, - ) - - tool_results.append( - { - "type": "tool_result", - "tool_use_id": tc.id, - "content": result_text, - } - ) - - self._history.append({"role": "user", "content": tool_results}) - - # Get follow-up from LLM - try: - response = await self._llm.plan( - system=system, - messages=self._history, - tools=tools, - ) - except ContextOverflowError: - await self._summarize_history() - self._compact_scratchpads() - response = await self._llm.plan( - system=system, - messages=self._history, - tools=tools, - ) - - # Proactive compaction during tool loop - if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD: - await self._summarize_history() - self._compact_scratchpads() - - # Text-only response - reply = response.content or "" - self._history.append({"role": "assistant", "content": reply}) - - # Periodic memory vacuum (Systems Consolidation) - if self._cortex is not None and self._cortex.mode != "off": - self._cortex.maybe_vacuum() - - return reply - - async def turn_stream( - self, user_input: str | list[dict] - ) -> AsyncIterator[StreamEvent]: - """Streaming version of turn(). Yields events as they arrive.""" - self._history.append({"role": "user", "content": user_input}) - - # Log user input to episodic memory - if self._episodic is not None: - content = ( - user_input if isinstance(user_input, str) else str(user_input)[:2000] - ) - self._episodic.log_turn(self._turn_count + 1, "user", content) - - user_msg_str = user_input if isinstance(user_input, str) else "" - assistant_text_parts: list[str] = [] - _max_auto_retries = 2 - _retry_count = 0 - - while True: - try: - async for event in self._stream_and_handle_tools(user_msg_str): - if isinstance(event, StreamTextDelta): - assistant_text_parts.append(event.text) - yield event - break # completed successfully - except Exception as _agent_exc: - _retry_count += 1 - if _retry_count <= _max_auto_retries: - # Inject the error into history and let the LLM try to recover - self._history.append( - { - "role": "user", - "content": ( - f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n" - "If you can diagnose and fix the issue, continue working on the task. " - "Adjust your approach to avoid the same error. " - "If this is unrecoverable, summarize what you accomplished and suggest next steps." - ), - } - ) - # Continue the while loop — _stream_and_handle_tools will be called - # again with the error context now in history - continue - else: - # Exhausted retries — stop and summarize for the user - self._history.append( - { - "role": "user", - "content": ( - f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n" - "Stop retrying. Please:\n" - "1. Summarize what you accomplished so far.\n" - "2. Explain what went wrong in plain language.\n" - "3. Suggest next steps — what the user can try (e.g. rephrase, " - "simplify the request, or ask you to continue from where you left off).\n" - "Be concise and helpful." - ), - } - ) - try: - async for event in self._llm.plan_stream( - system=await self._build_system_prompt(user_msg_str), - messages=self._history, - ): - if isinstance(event, StreamTextDelta): - assistant_text_parts.append(event.text) - yield event - except Exception: - fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request." - assistant_text_parts.append(fallback) - yield StreamTextDelta(text=fallback) - break - - # Log assistant response to episodic memory - if self._episodic is not None and assistant_text_parts: - self._episodic.log_turn( - self._turn_count + 1, - "assistant", - "".join(assistant_text_parts)[:2000], - ) - - # Identity extraction (Default Mode Network — every 5 turns) - self._turn_count += 1 - self._persist_history() - if self._cortex is not None and self._cortex.mode != "off": - if self._turn_count % 5 == 0 and isinstance(user_input, str): - asyncio.create_task(self._cortex.maybe_update_identity(user_input)) - # Periodic memory vacuum (Systems Consolidation) - self._cortex.maybe_vacuum() - - async def _stream_and_handle_tools( - self, user_message: str = "" - ) -> AsyncIterator[StreamEvent]: - """Stream one LLM call, handle tool loops, yield all events.""" - system = await self._build_system_prompt(user_message) - tools = self._build_tools() - - # Guard against summarizing an already-summarized history within the same - # turn (e.g. ContextOverflowError on first call + pressure > threshold on - # the tool-loop follow-up would previously produce a summary of a summary). - _compacted_this_turn = False - - response: StreamComplete | None = None - - try: - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - except ContextOverflowError: - await self._summarize_history() - self._compact_scratchpads() - _compacted_this_turn = True - yield StreamContextCompacted( - message="Context was getting long — older history has been summarized." - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - - if response is None: - return - - llm_response = response.response - - # Detect max_tokens truncation — the LLM was cut off mid-response. - # Inject a continuation prompt so it can finish what it was doing. - if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls: - self._history.append( - {"role": "assistant", "content": llm_response.content or ""} - ) - self._history.append( - { - "role": "user", - "content": ( - "SYSTEM: Your response was truncated because it exceeded the output token limit. " - "Continue exactly where you left off. If you were about to call a tool, " - "call it now. If the code you were writing was too long, split it into smaller parts." - ), - } - ) - response = None - try: - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - except ContextOverflowError: - if not _compacted_this_turn: - await self._summarize_history() - self._compact_scratchpads() - _compacted_this_turn = True - yield StreamContextCompacted( - message="Context was getting long — older history has been summarized." - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - - if response is None: - return - llm_response = response.response - - # Proactive compaction - if ( - not _compacted_this_turn - and llm_response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD - ): - await self._summarize_history() - self._compact_scratchpads() - _compacted_this_turn = True - yield StreamContextCompacted( - message="Context was getting long — older history has been summarized." - ) - - # Tool-call loop with circuit breaker, wrapped in a completion - # verification outer loop that can restart the tool loop if the - # task isn't actually done yet. - continuation = 0 - _max_rounds_hit = False - - while True: # Completion verification loop - tool_round = 0 - error_streak: dict[str, int] = {} - resilience_nudged: set[str] = set() - - while llm_response.tool_calls: - tool_round += 1 - if tool_round > _MAX_TOOL_ROUNDS: - _max_rounds_hit = True - self._history.append( - {"role": "assistant", "content": llm_response.content or ""} - ) - self._history.append( - { - "role": "user", - "content": ( - f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. " - "Pause here. Summarize what you have accomplished so far and what remains. " - "If you believe you are on a good track and can finish the task with more steps, " - "tell the user and ask if they'd like you to continue. " - "Do NOT retry automatically — wait for the user's response." - ), - } - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - ): - yield event - break - - # Build assistant message with content blocks - assistant_content: list[dict] = [] - if llm_response.content: - assistant_content.append( - {"type": "text", "text": llm_response.content} - ) - for tc in llm_response.tool_calls: - assistant_content.append( - { - "type": "tool_use", - "id": tc.id, - "name": tc.name, - "input": tc.input, - } - ) - self._history.append( - {"role": "assistant", "content": assistant_content} - ) - - # Process each tool call - tool_results: list[dict] = [] - for tc in llm_response.tool_calls: - if self._episodic is not None: - self._episodic.log_turn( - self._turn_count + 1, - "tool_call", - str(tc.input)[:2000], - tool=tc.name, - ) - - try: - if tc.name == "scratchpad" and tc.input.get("action") == "exec": - # Inline streaming exec — yields progress events - prep = await prepare_scratchpad_exec(self, tc.input) - if isinstance(prep, str): - result_text = prep - else: - ( - pad, - code, - description, - estimated_time, - estimated_seconds, - ) = prep - yield StreamTaskProgress( - phase="scratchpad_start", - message=description or "Running code", - eta_seconds=estimated_seconds, - ) - import time as _time - - _sp_t0 = _time.monotonic() - from anton.scratchpad import Cell - - cell = None - async for item in pad.execute_streaming( - code, - description=description, - estimated_time=estimated_time, - estimated_seconds=estimated_seconds, - cancel_event=self._cancel_event, - ): - if isinstance(item, str): - yield StreamTaskProgress( - phase="scratchpad", message=item - ) - elif isinstance(item, Cell): - cell = item - _sp_elapsed = _time.monotonic() - _sp_t0 - yield StreamTaskProgress( - phase="scratchpad_done", - message=description or "Done", - eta_seconds=_sp_elapsed, - ) - result_text = ( - format_cell_result(cell) - if cell - else "No result produced." - ) - if self._episodic is not None and cell is not None: - self._episodic.log_turn( - self._turn_count + 1, - "scratchpad", - (cell.stdout or "")[:2000], - description=description, - ) - elif tc.name == "connect_new_datasource" or ( - tc.name == "publish_or_preview" and tc.input.get("action") == "publish" - ): - # Interactive tool — pause spinner AND escape watcher - yield StreamTaskProgress( - phase="interactive", - message="", - ) - if self._escape_watcher: - self._escape_watcher.pause() - result_text = await dispatch_tool(self, tc.name, tc.input) - if self._escape_watcher: - self._escape_watcher.resume() - yield StreamTaskProgress( - phase="analyzing", - message="Analyzing results...", - ) - else: - result_text = await dispatch_tool(self, tc.name, tc.input) - if ( - tc.name == "scratchpad" - and tc.input.get("action") == "dump" - ): - yield StreamToolResult(content=result_text) - result_text = ( - "The full notebook has been displayed to the user above. " - "Do not repeat it. Here is the content for your reference:\n\n" - + result_text - ) - except Exception as exc: - result_text = f"Tool '{tc.name}' failed: {exc}" - - if self._episodic is not None: - self._episodic.log_turn( - self._turn_count + 1, - "tool_result", - result_text[:2000], - tool=tc.name, - ) - result_text = scrub_credentials(result_text) - result_text = _apply_error_tracking( - result_text, tc.name, error_streak, resilience_nudged - ) - tool_results.append( - { - "type": "tool_result", - "tool_use_id": tc.id, - "content": result_text, - } - ) - - self._history.append({"role": "user", "content": tool_results}) - - # Signal that tools are done and LLM is now analyzing - yield StreamTaskProgress( - phase="analyzing", message="Analyzing results..." - ) - - # Stream follow-up - response = None - try: - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - except ContextOverflowError: - if not _compacted_this_turn: - await self._summarize_history() - self._compact_scratchpads() - _compacted_this_turn = True - yield StreamContextCompacted( - message="Context was getting long — older history has been summarized." - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - - if response is None: - return - llm_response = response.response - - # Detect max_tokens truncation inside tool loop - if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls: - self._history.append( - {"role": "assistant", "content": llm_response.content or ""} - ) - self._history.append( - { - "role": "user", - "content": ( - "SYSTEM: Your response was truncated because it exceeded the output token limit. " - "Continue exactly where you left off. If you were about to call a tool, " - "call it now. If the code you were writing was too long, split it into smaller parts." - ), - } - ) - response = None - try: - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - except ContextOverflowError: - if not _compacted_this_turn: - await self._summarize_history() - self._compact_scratchpads() - _compacted_this_turn = True - yield StreamContextCompacted( - message="Context was getting long — older history has been summarized." - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - - if response is None: - return - llm_response = response.response - - # Proactive compaction during tool loop - if ( - not _compacted_this_turn - and llm_response.usage.context_pressure - > _CONTEXT_PRESSURE_THRESHOLD - ): - await self._summarize_history() - self._compact_scratchpads() - _compacted_this_turn = True - yield StreamContextCompacted( - message="Context was getting long — older history has been summarized." - ) - - # --- Completion verification --- - # Only verify when tools were actually used (not for simple Q&A) - # and we haven't hit the max-rounds hard stop. - if tool_round == 0 or _max_rounds_hit: - break - - # Append the assistant's final text so the verifier can see it - reply = llm_response.content or "" - self._history.append({"role": "assistant", "content": reply}) - - if continuation >= _MAX_CONTINUATIONS: - # Budget exhausted — ask LLM to diagnose and present to user - self._history.append( - { - "role": "user", - "content": ( - "SYSTEM: You have attempted to complete this task multiple times " - "but verification indicates it is still not done. Do NOT try again. " - "Instead:\n" - "1. Summarize exactly what was accomplished so far.\n" - "2. Identify the specific blocker or failure preventing completion.\n" - "3. Suggest concrete next steps the user can take to unblock this.\n" - "Be honest and specific — do not be vague about what went wrong." - ), - } - ) - yield StreamTaskProgress( - phase="analyzing", message="Diagnosing incomplete task..." - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - ): - yield event - # Consolidation still runs after diagnosis - break - - # Ask the LLM to self-assess completion. - # Use a copy of history with a trailing user message so models - # that don't support assistant-prefill won't reject the request. - verify_messages = list(self._history) + [ - { - "role": "user", - "content": ( - "SYSTEM: Evaluate whether the task the user originally requested " - "has been fully completed based on the conversation above." - ), - } - ] - verification = await self._llm.plan( - system=( - "You are a task-completion verifier. Given the conversation, determine " - "whether the user's original request has been fully completed.\n\n" - "Respond with EXACTLY one of these lines, followed by a brief reason:\n" - "STATUS: COMPLETE — \n" - "STATUS: INCOMPLETE — \n" - "STATUS: STUCK — \n\n" - "COMPLETE = the task is done or the response fully answers the question.\n" - "INCOMPLETE = more work can be done to finish the task.\n" - "STUCK = a blocker prevents completion (missing info, permissions, etc).\n\n" - "Be strict: if the user asked for X and only part of X was delivered, " - "that is INCOMPLETE, not COMPLETE. But if the user asked a question " - "and the assistant answered it, that is COMPLETE even without tool use." - ), - messages=verify_messages, - max_tokens=256, - ) - - status_text = (verification.content or "").strip().upper() - if "STATUS: COMPLETE" in status_text: - break - if "STATUS: STUCK" in status_text: - # Stuck — inject diagnosis request and let the LLM explain - reason = (verification.content or "").strip() - self._history.append( - { - "role": "user", - "content": ( - f"SYSTEM: Task verification determined this task is stuck.\n" - f"Verifier assessment: {reason}\n\n" - "Explain to the user what went wrong, what you tried, and " - "suggest specific next steps they can take to unblock this." - ), - } - ) - yield StreamTaskProgress( - phase="analyzing", message="Diagnosing blocked task..." - ) - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - ): - yield event - break - - # INCOMPLETE — continue working - continuation += 1 - reason = (verification.content or "").strip() - self._history.append( - { - "role": "user", - "content": ( - f"SYSTEM: Task verification determined this task is not yet complete " - f"(attempt {continuation}/{_MAX_CONTINUATIONS}).\n" - f"Verifier assessment: {reason}\n\n" - "Continue working on the original request. Pick up where you left off " - "and finish the remaining work. Do not repeat work already done." - ), - } - ) - yield StreamTaskProgress( - phase="analyzing", - message=f"Task incomplete — continuing ({continuation}/{_MAX_CONTINUATIONS})...", - ) - - # Re-enter tool loop: get next LLM response with tools available - response = None - async for event in self._llm.plan_stream( - system=system, - messages=self._history, - tools=tools, - ): - yield event - if isinstance(event, StreamComplete): - response = event - if response is None: - return - llm_response = response.response - # Loop back to the top of the completion verification loop - - # Text-only final response — append to history (if not already appended - # by the verification block above). - if not self._history or self._history[-1].get("role") != "assistant": - reply = llm_response.content or "" - self._history.append({"role": "assistant", "content": reply}) - - # Consolidation: replay scratchpad sessions to extract lessons - if self._cortex is not None and self._cortex.mode != "off": - self._maybe_consolidate_scratchpads() - - def _maybe_consolidate_scratchpads(self) -> None: - """Check if any scratchpad sessions warrant consolidation and fire it off.""" - from anton.memory.consolidator import Consolidator - - consolidator = Consolidator() - for pad in self._scratchpads._pads.values(): - cells = list(pad.cells) - if consolidator.should_replay(cells): - asyncio.create_task(self._consolidate(cells)) - - async def _consolidate(self, cells: list) -> None: - """Run offline consolidation on a completed scratchpad session.""" - from anton.memory.consolidator import Consolidator - - consolidator = Consolidator() - engrams = await consolidator.replay_and_extract(cells, self._llm) - if not engrams or self._cortex is None: - return - - auto_encode = [e for e in engrams if not self._cortex.encoding_gate(e)] - needs_confirm = [e for e in engrams if self._cortex.encoding_gate(e)] - - if auto_encode: - await self._cortex.encode(auto_encode) - - if needs_confirm: - self._pending_memory_confirmations.extend(needs_confirm) - - -def _apply_error_tracking( - result_text: str, - tool_name: str, - error_streak: dict[str, int], - resilience_nudged: set[str], -) -> str: - """Track consecutive errors per tool and append nudge/circuit-breaker messages.""" - is_error = any( - marker in result_text - for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:") - ) - if is_error: - error_streak[tool_name] = error_streak.get(tool_name, 0) + 1 - else: - error_streak[tool_name] = 0 - resilience_nudged.discard(tool_name) - - streak = error_streak.get(tool_name, 0) - if streak >= _RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged: - result_text += _RESILIENCE_NUDGE - resilience_nudged.add(tool_name) - - if streak >= _MAX_CONSECUTIVE_ERRORS: - result_text += ( - f"\n\nSYSTEM: The '{tool_name}' tool has failed {_MAX_CONSECUTIVE_ERRORS} times " - "in a row. Stop retrying this approach. Either try a completely different " - "strategy or tell the user what's going wrong so they can help." - ) - - return result_text - - - async def _handle_connect( console: Console, settings: AntonSettings, From 53de84f28ee3f1012c7e7702084ea9b6d430311c Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 16:02:45 -0700 Subject: [PATCH 005/134] added the core pkg for tools --- anton/core/tools/tool_defs.py | 0 anton/core/tools/tool_handlers.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 anton/core/tools/tool_defs.py create mode 100644 anton/core/tools/tool_handlers.py diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py new file mode 100644 index 00000000..e69de29b diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py new file mode 100644 index 00000000..e69de29b From 18ff4c394a8d0f3111c1effdb24f4cc975ca3e95 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 16:05:17 -0700 Subject: [PATCH 006/134] moved core tools --- anton/core/tools/tool_defs.py | 145 +++++++++++++ anton/core/tools/tool_handlers.py | 196 +++++++++++++++++ anton/tools.py | 340 ------------------------------ 3 files changed, 341 insertions(+), 340 deletions(-) diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py index e69de29b..a687d202 100644 --- a/anton/core/tools/tool_defs.py +++ b/anton/core/tools/tool_defs.py @@ -0,0 +1,145 @@ +SCRATCHPAD_TOOL = { + "name": "scratchpad", + "description": ( + "Run Python code in a persistent scratchpad. Use this whenever you need to " + "count characters, do math, parse data, transform text, or any task that " + "benefits from precise computation rather than guessing. Variables, imports, " + "and data persist across cells — like a notebook you drive programmatically.\n\n" + "Actions:\n" + "- exec: Run code in the scratchpad (creates it if needed)\n" + "- view: See all cells and their outputs\n" + "- reset: Restart the process, clearing all state (installed packages survive)\n" + "- remove: Kill the scratchpad and delete its environment\n" + "- dump: Show a clean notebook-style summary of cells (code + truncated output)\n" + "- install: Install Python packages into the scratchpad's environment. " + "Packages persist across resets.\n\n" + "Use print() to produce output. Host Python packages are available by default. " + "Include a 'packages' array on exec calls for any libraries your code needs — " + "they'll be auto-installed before the cell runs (already-installed ones are skipped).\n" + "get_llm() returns a pre-configured LLM client (sync) — call " + "llm.complete(system=..., messages=[...]) for AI-powered computation.\n" + "llm.generate_object(MyModel, system=..., messages=[...]) extracts structured " + "data into Pydantic models. Supports single models and list[Model].\n" + "agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) " + "runs a tool-call loop where the LLM reasons and calls your tools iteratively. " + "handle_tool(name, inputs) -> str is a plain sync function.\n" + "sample(var) inspects any variable with type-aware formatting — DataFrames get " + "shape/dtypes/head, dicts get keys/values, lists get length/items. " + "Defaults to 'preview' mode (compact); use sample(var, mode='full') for complete dump.\n" + "All .anton/.env secrets are available as environment variables (os.environ).\n\n" + "IMPORTANT: Cells have an inactivity timeout of 30 seconds — if a cell produces " + "no output and no progress() calls for 30s, it is killed and all state is lost. " + "For long-running code (API calls, data extraction, heavy computation), call " + "progress(message) periodically to signal work is ongoing and reset the timer. " + "The total timeout scales from your estimated_execution_time_seconds " + "(roughly 2x the estimate). You MUST provide estimated_execution_time_seconds " + "for every exec call. For very long operations, provide a realistic estimate " + "and use progress() to keep the cell alive." + ), + "input_schema": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]}, + "name": {"type": "string", "description": "Scratchpad name"}, + "code": { + "type": "string", + "description": "Python code (exec only). Use print() for output.", + }, + "packages": { + "type": "array", + "items": {"type": "string"}, + "description": "Package names needed by this cell (exec or install). " + "Listed after code so you know exactly what to include. " + "Already-installed packages are skipped automatically.", + }, + "one_line_description": { + "type": "string", + "description": "Brief description of what this cell does (e.g. 'Scrape listing prices'). Required for exec.", + }, + "estimated_execution_time_seconds": { + "type": "integer", + "description": "Estimated execution time in seconds. Drives the total timeout (roughly 2x estimate). Use progress() for long cells.", + }, + }, + "required": ["action", "name"], + }, +} + + +MEMORIZE_TOOL = { + "name": "memorize", + "description": ( + "Encode a rule or lesson into long-term memory for future sessions. " + "Use this when you learn something important, discover a useful pattern, " + "or the user asks you to remember something.\n\n" + "Entry kinds:\n" + "- always: Something to always do ('Use httpx instead of requests')\n" + "- never: Something to never do ('Never use time.sleep() in scratchpad')\n" + "- when: Conditional rule ('If paginated API → use async + progress()')\n" + "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n" + "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')" + ), + "input_schema": { + "type": "object", + "properties": { + "entries": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The memory to encode", + }, + "kind": { + "type": "string", + "enum": ["always", "never", "when", "lesson", "profile"], + }, + "scope": { + "type": "string", + "enum": ["global", "project"], + }, + "topic": { + "type": "string", + "description": "Topic slug for lessons (e.g. 'api-coingecko')", + }, + }, + "required": ["text", "kind", "scope"], + }, + }, + }, + "required": ["entries"], + }, +} + + +RECALL_TOOL = { + "name": "recall", + "description": ( + "Search your episodic memory — an archive of past conversations. " + "ONLY use this when the user explicitly asks about a previous conversation " + "or session (e.g. 'what did we talk about last time?', 'remember when we...', " + "'have we discussed X before?'). Do NOT use this for questions about code, " + "files, or data in the workspace — use the scratchpad to explore those directly.\n\n" + "Returns timestamped episodes matching the query (newest first). " + "A single call is enough — do not call multiple times with different queries." + ), + "input_schema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search term to find in past conversations.", + }, + "max_results": { + "type": "integer", + "description": "Maximum episodes to return (default 20).", + }, + "days_back": { + "type": "integer", + "description": "Only search episodes from the last N days.", + }, + }, + "required": ["query"], + }, +} \ No newline at end of file diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py index e69de29b..ebe5dd30 100644 --- a/anton/core/tools/tool_handlers.py +++ b/anton/core/tools/tool_handlers.py @@ -0,0 +1,196 @@ +from anton.core.session import ChatSession + + +async def handle_recall(session: ChatSession, tc_input: dict) -> str: + """Process a recall tool call — search episodic memory.""" + if session._episodic is None or not session._episodic.enabled: + return "Episodic memory is not available." + + query = tc_input.get("query", "") + if not query: + return "No query provided." + + kwargs: dict = {} + if "max_results" in tc_input: + kwargs["max_results"] = int(tc_input["max_results"]) + if "days_back" in tc_input: + kwargs["days_back"] = int(tc_input["days_back"]) + + return session._episodic.recall_formatted(query, **kwargs) + + +async def handle_memorize(session: ChatSession, tc_input: dict) -> str: + """Process a memorize tool call and return a result string. + + Encoding is fire-and-forget so it never blocks scratchpad execution. + """ + import asyncio + + if session._cortex is None: + return "Memory system not available." + + if session._cortex.mode == "off": + return "Memory encoding is disabled. Change memory mode via /setup to enable." + + from anton.memory.hippocampus import Engram + + raw_entries = tc_input.get("entries", []) + if not raw_entries: + return "No entries provided." + + engrams: list[Engram] = [] + for entry in raw_entries: + if not isinstance(entry, dict) or "text" not in entry: + continue + + kind = entry.get("kind", "lesson") + if kind not in ("always", "never", "when", "lesson", "profile"): + kind = "lesson" + + scope = entry.get("scope", "project") + if scope not in ("global", "project"): + scope = "project" + + # User-sourced memories (via explicit tool call) get high confidence + engrams.append(Engram( + text=entry["text"], + kind=kind, + scope=scope, + confidence="high", + topic=entry.get("topic", ""), + source="user", + )) + + if not engrams: + return "No valid entries provided." + + # Always encode immediately via fire-and-forget — the LLM explicitly + # chose to memorize these, so we never interrupt the user mid-turn + # with confirmation prompts. Confirmations are reserved for the + # post-turn consolidator (lessons extracted from scratchpad sessions). + async def _encode_bg(cortex, entries): + try: + await cortex.encode(entries) + except Exception: + pass # Best-effort; don't disrupt the conversation + + asyncio.create_task(_encode_bg(session._cortex, engrams)) + + descriptions = [f"Encoded {e.kind}: {e.text}" for e in engrams] + return "Memory updated: " + "; ".join(descriptions) + + +async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict): + """Validate and prepare a scratchpad exec call. + + Returns (pad, code, description, estimated_time, estimated_seconds) or + a str error message if validation fails. + """ + name = tc_input.get("name", "") + code = tc_input.get("code", "") + if not code or not code.strip(): + return "No code provided." + + pad = await session._scratchpads.get_or_create(name) + + # Auto-install packages before running the cell + packages = tc_input.get("packages", []) + if packages: + install_result = await pad.install_packages(packages) + if "Install failed" in install_result or "timed out" in install_result: + return install_result + + description = tc_input.get("one_line_description", "") + estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0) + if isinstance(estimated_seconds, str): + try: + estimated_seconds = int(estimated_seconds) + except ValueError: + estimated_seconds = 0 + + estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else "" + return pad, code, description, estimated_time, estimated_seconds + + +def format_cell_result(cell) -> str: + """Format a Cell into a tool result string. + + Every section is labeled so the LLM can tell what came from where: + [output] — print() / stdout from the cell code + [logs] — library logging (httpx, urllib3, etc.) captured at INFO+ + [stderr] — warnings and stderr writes + [error] — Python traceback if the cell raised an exception + """ + parts: list[str] = [] + if cell.stdout: + stdout = cell.stdout + if len(stdout) > 10_000: + stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)" + parts.append(f"[output]\n{stdout}") + if cell.logs if hasattr(cell, "logs") else False: + logs = cell.logs.strip() + if len(logs) > 3_000: + logs = logs[:3_000] + "\n... (logs truncated)" + parts.append(f"[logs]\n{logs}") + if cell.stderr: + parts.append(f"[stderr]\n{cell.stderr}") + if cell.error: + parts.append(f"[error]\n{cell.error}") + if not parts: + return "Code executed successfully (no output)." + return "\n".join(parts) + + +async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str: + """Dispatch a scratchpad tool call by action.""" + action = tc_input.get("action", "") + name = tc_input.get("name", "") + + if not name: + return "Scratchpad name is required." + + if action == "exec": + result = await prepare_scratchpad_exec(session, tc_input) + if isinstance(result, str): + return result + pad, code, description, estimated_time, estimated_seconds = result + + cell = await pad.execute( + code, + description=description, + estimated_time=estimated_time, + estimated_seconds=estimated_seconds, + ) + return format_cell_result(cell) + + elif action == "view": + pad = session._scratchpads._pads.get(name) + if pad is None: + return f"No scratchpad named '{name}'." + return pad.view() + + elif action == "reset": + pad = session._scratchpads._pads.get(name) + if pad is None: + return f"No scratchpad named '{name}'." + await pad.reset() + return f"Scratchpad '{name}' reset. All state cleared." + + elif action == "remove": + return await session._scratchpads.remove(name) + + elif action == "dump": + pad = session._scratchpads._pads.get(name) + if pad is None: + return f"No scratchpad named '{name}'." + return pad.render_notebook() + + elif action == "install": + packages = tc_input.get("packages", []) + if not packages: + return "No packages specified." + pad = await session._scratchpads.get_or_create(name) + return await pad.install_packages(packages) + + else: + return f"Unknown scratchpad action: {action}" diff --git a/anton/tools.py b/anton/tools.py index 2254b39a..f8797aef 100644 --- a/anton/tools.py +++ b/anton/tools.py @@ -60,84 +60,6 @@ def build_tool_schemas(available: list[str]) -> list[dict]: if t.name in available ] - -MEMORIZE_TOOL = { - "name": "memorize", - "description": ( - "Encode a rule or lesson into long-term memory for future sessions. " - "Use this when you learn something important, discover a useful pattern, " - "or the user asks you to remember something.\n\n" - "Entry kinds:\n" - "- always: Something to always do ('Use httpx instead of requests')\n" - "- never: Something to never do ('Never use time.sleep() in scratchpad')\n" - "- when: Conditional rule ('If paginated API → use async + progress()')\n" - "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n" - "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')" - ), - "input_schema": { - "type": "object", - "properties": { - "entries": { - "type": "array", - "items": { - "type": "object", - "properties": { - "text": { - "type": "string", - "description": "The memory to encode", - }, - "kind": { - "type": "string", - "enum": ["always", "never", "when", "lesson", "profile"], - }, - "scope": { - "type": "string", - "enum": ["global", "project"], - }, - "topic": { - "type": "string", - "description": "Topic slug for lessons (e.g. 'api-coingecko')", - }, - }, - "required": ["text", "kind", "scope"], - }, - }, - }, - "required": ["entries"], - }, -} - -RECALL_TOOL = { - "name": "recall", - "description": ( - "Search your episodic memory — an archive of past conversations. " - "ONLY use this when the user explicitly asks about a previous conversation " - "or session (e.g. 'what did we talk about last time?', 'remember when we...', " - "'have we discussed X before?'). Do NOT use this for questions about code, " - "files, or data in the workspace — use the scratchpad to explore those directly.\n\n" - "Returns timestamped episodes matching the query (newest first). " - "A single call is enough — do not call multiple times with different queries." - ), - "input_schema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Search term to find in past conversations.", - }, - "max_results": { - "type": "integer", - "description": "Maximum episodes to return (default 20).", - }, - "days_back": { - "type": "integer", - "description": "Only search episodes from the last N days.", - }, - }, - "required": ["query"], - }, -} - CONNECT_DATASOURCE_TOOL = { "name": "connect_new_datasource", "description": ( @@ -197,268 +119,6 @@ def build_tool_schemas(available: list[str]) -> list[dict]: } -SCRATCHPAD_TOOL = { - "name": "scratchpad", - "description": ( - "Run Python code in a persistent scratchpad. Use this whenever you need to " - "count characters, do math, parse data, transform text, or any task that " - "benefits from precise computation rather than guessing. Variables, imports, " - "and data persist across cells — like a notebook you drive programmatically.\n\n" - "Actions:\n" - "- exec: Run code in the scratchpad (creates it if needed)\n" - "- view: See all cells and their outputs\n" - "- reset: Restart the process, clearing all state (installed packages survive)\n" - "- remove: Kill the scratchpad and delete its environment\n" - "- dump: Show a clean notebook-style summary of cells (code + truncated output)\n" - "- install: Install Python packages into the scratchpad's environment. " - "Packages persist across resets.\n\n" - "Use print() to produce output. Host Python packages are available by default. " - "Include a 'packages' array on exec calls for any libraries your code needs — " - "they'll be auto-installed before the cell runs (already-installed ones are skipped).\n" - "get_llm() returns a pre-configured LLM client (sync) — call " - "llm.complete(system=..., messages=[...]) for AI-powered computation.\n" - "llm.generate_object(MyModel, system=..., messages=[...]) extracts structured " - "data into Pydantic models. Supports single models and list[Model].\n" - "agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) " - "runs a tool-call loop where the LLM reasons and calls your tools iteratively. " - "handle_tool(name, inputs) -> str is a plain sync function.\n" - "sample(var) inspects any variable with type-aware formatting — DataFrames get " - "shape/dtypes/head, dicts get keys/values, lists get length/items. " - "Defaults to 'preview' mode (compact); use sample(var, mode='full') for complete dump.\n" - "All .anton/.env secrets are available as environment variables (os.environ).\n\n" - "IMPORTANT: Cells have an inactivity timeout of 30 seconds — if a cell produces " - "no output and no progress() calls for 30s, it is killed and all state is lost. " - "For long-running code (API calls, data extraction, heavy computation), call " - "progress(message) periodically to signal work is ongoing and reset the timer. " - "The total timeout scales from your estimated_execution_time_seconds " - "(roughly 2x the estimate). You MUST provide estimated_execution_time_seconds " - "for every exec call. For very long operations, provide a realistic estimate " - "and use progress() to keep the cell alive." - ), - "input_schema": { - "type": "object", - "properties": { - "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]}, - "name": {"type": "string", "description": "Scratchpad name"}, - "code": { - "type": "string", - "description": "Python code (exec only). Use print() for output.", - }, - "packages": { - "type": "array", - "items": {"type": "string"}, - "description": "Package names needed by this cell (exec or install). " - "Listed after code so you know exactly what to include. " - "Already-installed packages are skipped automatically.", - }, - "one_line_description": { - "type": "string", - "description": "Brief description of what this cell does (e.g. 'Scrape listing prices'). Required for exec.", - }, - "estimated_execution_time_seconds": { - "type": "integer", - "description": "Estimated execution time in seconds. Drives the total timeout (roughly 2x estimate). Use progress() for long cells.", - }, - }, - "required": ["action", "name"], - }, -} - -async def handle_recall(session: ChatSession, tc_input: dict) -> str: - """Process a recall tool call — search episodic memory.""" - if session._episodic is None or not session._episodic.enabled: - return "Episodic memory is not available." - - query = tc_input.get("query", "") - if not query: - return "No query provided." - - kwargs: dict = {} - if "max_results" in tc_input: - kwargs["max_results"] = int(tc_input["max_results"]) - if "days_back" in tc_input: - kwargs["days_back"] = int(tc_input["days_back"]) - - return session._episodic.recall_formatted(query, **kwargs) - - -async def handle_memorize(session: ChatSession, tc_input: dict) -> str: - """Process a memorize tool call and return a result string. - - Encoding is fire-and-forget so it never blocks scratchpad execution. - """ - import asyncio - - if session._cortex is None: - return "Memory system not available." - - if session._cortex.mode == "off": - return "Memory encoding is disabled. Change memory mode via /setup to enable." - - from anton.memory.hippocampus import Engram - - raw_entries = tc_input.get("entries", []) - if not raw_entries: - return "No entries provided." - - engrams: list[Engram] = [] - for entry in raw_entries: - if not isinstance(entry, dict) or "text" not in entry: - continue - - kind = entry.get("kind", "lesson") - if kind not in ("always", "never", "when", "lesson", "profile"): - kind = "lesson" - - scope = entry.get("scope", "project") - if scope not in ("global", "project"): - scope = "project" - - # User-sourced memories (via explicit tool call) get high confidence - engrams.append(Engram( - text=entry["text"], - kind=kind, - scope=scope, - confidence="high", - topic=entry.get("topic", ""), - source="user", - )) - - if not engrams: - return "No valid entries provided." - - # Always encode immediately via fire-and-forget — the LLM explicitly - # chose to memorize these, so we never interrupt the user mid-turn - # with confirmation prompts. Confirmations are reserved for the - # post-turn consolidator (lessons extracted from scratchpad sessions). - async def _encode_bg(cortex, entries): - try: - await cortex.encode(entries) - except Exception: - pass # Best-effort; don't disrupt the conversation - - asyncio.create_task(_encode_bg(session._cortex, engrams)) - - descriptions = [f"Encoded {e.kind}: {e.text}" for e in engrams] - return "Memory updated: " + "; ".join(descriptions) - - -async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict): - """Validate and prepare a scratchpad exec call. - - Returns (pad, code, description, estimated_time, estimated_seconds) or - a str error message if validation fails. - """ - name = tc_input.get("name", "") - code = tc_input.get("code", "") - if not code or not code.strip(): - return "No code provided." - - pad = await session._scratchpads.get_or_create(name) - - # Auto-install packages before running the cell - packages = tc_input.get("packages", []) - if packages: - install_result = await pad.install_packages(packages) - if "Install failed" in install_result or "timed out" in install_result: - return install_result - - description = tc_input.get("one_line_description", "") - estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0) - if isinstance(estimated_seconds, str): - try: - estimated_seconds = int(estimated_seconds) - except ValueError: - estimated_seconds = 0 - - estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else "" - return pad, code, description, estimated_time, estimated_seconds - - -def format_cell_result(cell) -> str: - """Format a Cell into a tool result string. - - Every section is labeled so the LLM can tell what came from where: - [output] — print() / stdout from the cell code - [logs] — library logging (httpx, urllib3, etc.) captured at INFO+ - [stderr] — warnings and stderr writes - [error] — Python traceback if the cell raised an exception - """ - parts: list[str] = [] - if cell.stdout: - stdout = cell.stdout - if len(stdout) > 10_000: - stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)" - parts.append(f"[output]\n{stdout}") - if cell.logs if hasattr(cell, "logs") else False: - logs = cell.logs.strip() - if len(logs) > 3_000: - logs = logs[:3_000] + "\n... (logs truncated)" - parts.append(f"[logs]\n{logs}") - if cell.stderr: - parts.append(f"[stderr]\n{cell.stderr}") - if cell.error: - parts.append(f"[error]\n{cell.error}") - if not parts: - return "Code executed successfully (no output)." - return "\n".join(parts) - - -async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str: - """Dispatch a scratchpad tool call by action.""" - action = tc_input.get("action", "") - name = tc_input.get("name", "") - - if not name: - return "Scratchpad name is required." - - if action == "exec": - result = await prepare_scratchpad_exec(session, tc_input) - if isinstance(result, str): - return result - pad, code, description, estimated_time, estimated_seconds = result - - cell = await pad.execute( - code, - description=description, - estimated_time=estimated_time, - estimated_seconds=estimated_seconds, - ) - return format_cell_result(cell) - - elif action == "view": - pad = session._scratchpads._pads.get(name) - if pad is None: - return f"No scratchpad named '{name}'." - return pad.view() - - elif action == "reset": - pad = session._scratchpads._pads.get(name) - if pad is None: - return f"No scratchpad named '{name}'." - await pad.reset() - return f"Scratchpad '{name}' reset. All state cleared." - - elif action == "remove": - return await session._scratchpads.remove(name) - - elif action == "dump": - pad = session._scratchpads._pads.get(name) - if pad is None: - return f"No scratchpad named '{name}'." - return pad.render_notebook() - - elif action == "install": - packages = tc_input.get("packages", []) - if not packages: - return "No packages specified." - pad = await session._scratchpads.get_or_create(name) - return await pad.install_packages(packages) - - else: - return f"Unknown scratchpad action: {action}" - - async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str: """Handle connect_new_datasource tool call — interactive connection flow.""" engine = tc_input.get("engine", "") From 1ab00f91721818cc07539fad08ad5a73d1114146 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 17:48:52 -0700 Subject: [PATCH 007/134] reintroduced ToolDef class and impl core tools --- anton/core/tools/tool_defs.py | 47 ++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py index a687d202..ce221e3b 100644 --- a/anton/core/tools/tool_defs.py +++ b/anton/core/tools/tool_defs.py @@ -1,6 +1,20 @@ -SCRATCHPAD_TOOL = { - "name": "scratchpad", - "description": ( +from anton.core.tools.tool_handlers import handle_scratchpad, handle_memorize, handle_recall + +from dataclasses import dataclass +from typing import Callable + + +@dataclass +class ToolDef: + name: str + description: str + input_schema: dict + handler: Callable # async (session, tc_input) -> str + + +SCRATCHPAD_TOOL = ToolDef( + name = "scratchpad", + description = ( "Run Python code in a persistent scratchpad. Use this whenever you need to " "count characters, do math, parse data, transform text, or any task that " "benefits from precise computation rather than guessing. Variables, imports, " @@ -36,7 +50,7 @@ "for every exec call. For very long operations, provide a realistic estimate " "and use progress() to keep the cell alive." ), - "input_schema": { + input_schema = { "type": "object", "properties": { "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]}, @@ -63,12 +77,13 @@ }, "required": ["action", "name"], }, -} + handler = handle_scratchpad, +) -MEMORIZE_TOOL = { - "name": "memorize", - "description": ( +MEMORIZE_TOOL = ToolDef( + name = "memorize", + description = ( "Encode a rule or lesson into long-term memory for future sessions. " "Use this when you learn something important, discover a useful pattern, " "or the user asks you to remember something.\n\n" @@ -79,7 +94,7 @@ "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n" "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')" ), - "input_schema": { + input_schema = { "type": "object", "properties": { "entries": { @@ -110,12 +125,13 @@ }, "required": ["entries"], }, -} + handler = handle_memorize, +) -RECALL_TOOL = { - "name": "recall", - "description": ( +RECALL_TOOL = ToolDef( + name = "recall", + description = ( "Search your episodic memory — an archive of past conversations. " "ONLY use this when the user explicitly asks about a previous conversation " "or session (e.g. 'what did we talk about last time?', 'remember when we...', " @@ -124,7 +140,7 @@ "Returns timestamped episodes matching the query (newest first). " "A single call is enough — do not call multiple times with different queries." ), - "input_schema": { + input_schema = { "type": "object", "properties": { "query": { @@ -142,4 +158,5 @@ }, "required": ["query"], }, -} \ No newline at end of file + handler = handle_recall, +) \ No newline at end of file From 75df939f50019aa985a4c1a06aa96a2002d4df7c Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 17:49:05 -0700 Subject: [PATCH 008/134] removed invalid dispatch --- anton/core/tools/tool_handlers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py index ebe5dd30..33246553 100644 --- a/anton/core/tools/tool_handlers.py +++ b/anton/core/tools/tool_handlers.py @@ -1,4 +1,7 @@ -from anton.core.session import ChatSession +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from anton.chat_session import ChatSession async def handle_recall(session: ChatSession, tc_input: dict) -> str: From 42470231ea02f67d18964a34697d0d794c004d7c Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 17:49:27 -0700 Subject: [PATCH 009/134] introduced ToolRegistry --- anton/core/tools/registry.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 anton/core/tools/registry.py diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py new file mode 100644 index 00000000..59a13e74 --- /dev/null +++ b/anton/core/tools/registry.py @@ -0,0 +1,25 @@ +from anton.core.tools.tool_defs import MEMORIZE_TOOL, RECALL_TOOL, SCRATCHPAD_TOOL, ToolDef + + +class ToolRegistry: + """ + Registry of tools available to the LLM. + """ + def __init__(self) -> None: + # Register core tools. + self._tools = [SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL] + + def register_tool(self, tool_def: ToolDef) -> None: + """ + Register a new (extra to core) tool. + """ + self._tools.append(tool_def) + + def dispatch_tool(self, tool_name: str, tc_input: dict) -> str: + """ + Dispatch a tool call by name. Returns result text. + """ + tool_def = next((tool for tool in self._tools if tool.name == tool_name), None) + if tool_def is None: + raise ValueError(f"Tool {tool_name} not found") + return tool_def.handler(tc_input) From 3eec212b4ecbb25ebfee0341dd224547fdd26d45 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 17:51:26 -0700 Subject: [PATCH 010/134] removed core tool registration --- anton/core/tools/registry.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index 59a13e74..6ef01d29 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -1,4 +1,7 @@ -from anton.core.tools.tool_defs import MEMORIZE_TOOL, RECALL_TOOL, SCRATCHPAD_TOOL, ToolDef +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from anton.core.tools.tool_defs import ToolDef class ToolRegistry: @@ -7,7 +10,7 @@ class ToolRegistry: """ def __init__(self) -> None: # Register core tools. - self._tools = [SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL] + self._tools = [] def register_tool(self, tool_def: ToolDef) -> None: """ From cf519d09d41c1499f696207e5fb71bbb3706feb8 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 18:04:25 -0700 Subject: [PATCH 011/134] fixed tool registration and dispatch logic --- anton/core/session.py | 51 ++++++++++++-------------- anton/core/tools/registry.py | 10 ++++-- anton/tools.py | 69 +----------------------------------- 3 files changed, 31 insertions(+), 99 deletions(-) diff --git a/anton/core/session.py b/anton/core/session.py index b4253542..068db4a8 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -15,16 +15,8 @@ StreamToolResult, ) from anton.scratchpad import ScratchpadManager -from anton.tools import ( - CONNECT_DATASOURCE_TOOL, - MEMORIZE_TOOL, - PUBLISH_TOOL, - RECALL_TOOL, - SCRATCHPAD_TOOL, - dispatch_tool, - format_cell_result, - prepare_scratchpad_exec, -) +from anton.core.tools.registry import ToolRegistry +from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef from anton.utils.datasources import ( build_datasource_context, @@ -109,6 +101,7 @@ def __init__( history_store: HistoryStore | None = None, session_id: str | None = None, proactive_dashboards: bool = False, + tools: list[ToolDef] | None = None, ) -> None: self._llm = llm_client self._self_awareness = self_awareness @@ -116,6 +109,7 @@ def __init__( self._episodic = episodic self._runtime_context = runtime_context self._proactive_dashboards = proactive_dashboards + self._extra_tools = tools self._workspace = workspace self._console = console self._history: list[dict] = list(initial_history) if initial_history else [] @@ -137,6 +131,7 @@ def __init__( coding_base_url=coding_base_url, workspace_path=workspace.base if workspace else None, ) + self.tool_registry = ToolRegistry() @property def history(self) -> list[dict]: @@ -270,7 +265,13 @@ async def _build_system_prompt(self, user_message: str = "") -> str: } def _build_tools(self) -> list[dict]: - scratchpad_tool = dict(SCRATCHPAD_TOOL) + self._build_core_tools() + for tool in self._extra_tools: + self.tool_registry.register_tool(tool) + return self.tool_registry.dump() + + def _build_core_tools(self) -> None: + scratchpad_tool = SCRATCHPAD_TOOL pkg_list = self._scratchpads._available_packages if pkg_list: notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES) @@ -279,29 +280,21 @@ def _build_tools(self) -> list[dict]: extra = f"\n\nInstalled packages ({len(pkg_list)} total, notable: {pkg_line})." else: extra = f"\n\nInstalled packages: {len(pkg_list)} total (standard library plus dependencies)." - scratchpad_tool["description"] = SCRATCHPAD_TOOL["description"] + extra + scratchpad_tool.description = scratchpad_tool.description + extra # Inject scratchpad wisdom from memory (procedural priming) if self._cortex is not None: wisdom = self._cortex.get_scratchpad_context() if wisdom: - scratchpad_tool[ - "description" - ] += f"\n\nLessons from past sessions:\n{wisdom}" + scratchpad_tool.description += f"\n\nLessons from past sessions:\n{wisdom}" - tools = [scratchpad_tool] - if self._cortex is not None: - tools.append(MEMORIZE_TOOL) - elif self._self_awareness is not None: - # Legacy fallback - from anton.tools import MEMORIZE_TOOL as _MT + self.tool_registry.register_tool(scratchpad_tool) + + if self._cortex is not None or self._self_awareness is not None: + self.tool_registry.register_tool(MEMORIZE_TOOL) - tools.append(_MT) if self._episodic is not None and self._episodic.enabled: - tools.append(RECALL_TOOL) - tools.append(CONNECT_DATASOURCE_TOOL) - tools.append(PUBLISH_TOOL) - return tools + self.tool_registry.register_tool(RECALL_TOOL) async def close(self) -> None: """Clean up scratchpads and other resources.""" @@ -496,7 +489,7 @@ async def turn(self, user_input: str | list[dict]) -> str: tool_results: list[dict] = [] for tc in response.tool_calls: try: - result_text = await dispatch_tool(self, tc.name, tc.input) + result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input) except Exception as exc: result_text = f"Tool '{tc.name}' failed: {exc}" @@ -875,7 +868,7 @@ async def _stream_and_handle_tools( ) if self._escape_watcher: self._escape_watcher.pause() - result_text = await dispatch_tool(self, tc.name, tc.input) + result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input) if self._escape_watcher: self._escape_watcher.resume() yield StreamTaskProgress( @@ -883,7 +876,7 @@ async def _stream_and_handle_tools( message="Analyzing results...", ) else: - result_text = await dispatch_tool(self, tc.name, tc.input) + result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input) if ( tc.name == "scratchpad" and tc.input.get("action") == "dump" diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index 6ef01d29..a73b6679 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -18,11 +18,17 @@ def register_tool(self, tool_def: ToolDef) -> None: """ self._tools.append(tool_def) - def dispatch_tool(self, tool_name: str, tc_input: dict) -> str: + async def dispatch_tool(self, tool_name: str, tc_input: dict) -> str: """ Dispatch a tool call by name. Returns result text. """ tool_def = next((tool for tool in self._tools if tool.name == tool_name), None) if tool_def is None: raise ValueError(f"Tool {tool_name} not found") - return tool_def.handler(tc_input) + return await tool_def.handler(tc_input) + + def dump(self) -> list[dict]: + """ + Dump the registry as a list of tool definitions. + """ + return [tool.model_dump() for tool in self._tools] diff --git a/anton/tools.py b/anton/tools.py index f8797aef..d39a70c2 100644 --- a/anton/tools.py +++ b/anton/tools.py @@ -2,64 +2,12 @@ from __future__ import annotations -from collections.abc import Callable -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING if TYPE_CHECKING: from anton.chat import ChatSession -@dataclass -class ToolDef: - name: str - description: str - input_schema: dict - handler: Callable # async (session, tc_input) -> str - stream_handler: Callable | None = None # async generator version - - -_registry: dict[str, ToolDef] = {} - - -def tool(name: str, *, description: str, input_schema: dict): - """Decorator to register a tool with its handler.""" - def decorator(fn): - _registry[name] = ToolDef( - name=name, - description=description, - input_schema=input_schema, - handler=fn, - ) - return fn - return decorator - - -def tool_stream(name: str): - """Decorator to register a streaming handler for an existing tool.""" - def decorator(fn): - if name in _registry: - _registry[name].stream_handler = fn - return fn - return decorator - - -def get_tool(name: str) -> ToolDef | None: - return _registry.get(name) - - -def all_tools() -> list[ToolDef]: - return list(_registry.values()) - - -def build_tool_schemas(available: list[str]) -> list[dict]: - """Build API-ready tool schema dicts for the given tool names.""" - return [ - {"name": t.name, "description": t.description, "input_schema": t.input_schema} - for t in _registry.values() - if t.name in available - ] - CONNECT_DATASOURCE_TOOL = { "name": "connect_new_datasource", "description": ( @@ -272,18 +220,3 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str return f"Published successfully!\nView URL: {view_url}" - -async def dispatch_tool(session: ChatSession, tool_name: str, tc_input: dict) -> str: - """Dispatch a tool call by name. Returns result text.""" - if tool_name == "memorize": - return await handle_memorize(session, tc_input) - elif tool_name == "scratchpad": - return await handle_scratchpad(session, tc_input) - elif tool_name == "recall": - return await handle_recall(session, tc_input) - elif tool_name == "connect_new_datasource": - return await handle_connect_datasource(session, tc_input) - elif tool_name == "publish_or_preview": - return await handle_publish_or_preview(session, tc_input) - else: - return f"Unknown tool: {tool_name}" From 687f37193341383df80bde66861f41ff005256e4 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 18:11:27 -0700 Subject: [PATCH 012/134] handled extra tool registration --- anton/chat.py | 2 + anton/tools.py | 131 +++++++++++++++++++++++++------------------------ 2 files changed, 68 insertions(+), 65 deletions(-) diff --git a/anton/chat.py b/anton/chat.py index 5bd77801..d579a8b3 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -49,6 +49,7 @@ handle_connect_datasource, handle_test_datasource, ) +from anton.tools import CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL from anton.utils.prompt import ( prompt_or_cancel, prompt_minds_api_key, @@ -1015,6 +1016,7 @@ async def _chat_loop( history_store=history_store, session_id=current_session_id, proactive_dashboards=settings.proactive_dashboards, + tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL], ) # Handle --resume flag at startup diff --git a/anton/tools.py b/anton/tools.py index d39a70c2..3ce6d92c 100644 --- a/anton/tools.py +++ b/anton/tools.py @@ -1,76 +1,16 @@ -"""Dynamic tool registry — decorator-based registration for chat tools.""" - -from __future__ import annotations +"""Extra tools for the open source terminal agent.""" from typing import TYPE_CHECKING -if TYPE_CHECKING: - from anton.chat import ChatSession - - -CONNECT_DATASOURCE_TOOL = { - "name": "connect_new_datasource", - "description": ( - "Connect a new data source to Anton's Local Vault. Call this when the user " - "asks a question that requires data from a source that isn't connected yet " - "(e.g. email, database, CRM, API). This starts an interactive connection flow " - "where the user enters their credentials.\n\n" - "Pass the datasource type/name (e.g. 'gmail', 'postgres', 'salesforce', 'hubspot'). " - "Anton will match it to the right connector and guide the user through setup.\n\n" - "Do NOT print any message before calling this tool — it handles the user-facing output." - ), - "input_schema": { - "type": "object", - "properties": { - "engine": { - "type": "string", - "description": "The datasource type or name (e.g. 'gmail', 'postgres', 'snowflake', 'hubspot')", - }, - "reason": { - "type": "string", - "description": "Brief explanation of why this datasource is needed", - }, - }, - "required": ["engine"], - }, -} +from anton.core.tools.tool_defs import ToolDef -PUBLISH_TOOL = { - "name": "publish_or_preview", - "description": ( - "Call this after generating an HTML dashboard or report in .anton/output/. " - "Actions: 'ask' (default) prompts the user to preview/publish/skip interactively. " - "'preview' opens the file in the browser immediately. " - "'publish' publishes to the web immediately. " - "Use 'preview' or 'publish' when the user has already stated their intent. " - "Use 'ask' after generating a new dashboard to let the user choose." - ), - "input_schema": { - "type": "object", - "properties": { - "file_path": { - "type": "string", - "description": "Path to the HTML file (e.g. .anton/output/dashboard.html)", - }, - "title": { - "type": "string", - "description": "Short title describing the dashboard (e.g. 'BTC & Macro Dashboard')", - }, - "action": { - "type": "string", - "enum": ["ask", "preview", "publish"], - "description": "What to do: 'ask' prompts user, 'preview' opens locally, 'publish' publishes to web", - }, - }, - "required": ["file_path"], - }, -} +if TYPE_CHECKING: + from anton.core.session import ChatSession async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str: """Handle connect_new_datasource tool call — interactive connection flow.""" engine = tc_input.get("engine", "") - reason = tc_input.get("reason", "") if not engine: return "Engine name is required." @@ -84,7 +24,6 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str ) from anton.commands.datasource import handle_connect_datasource - from anton.utils.prompt import prompt_or_cancel from anton.data_vault import DataVault # Check which connections exist before @@ -134,6 +73,35 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str ) +CONNECT_DATASOURCE_TOOL = ToolDef( + name = "connect_new_datasource", + description = ( + "Connect a new data source to Anton's Local Vault. Call this when the user " + "asks a question that requires data from a source that isn't connected yet " + "(e.g. email, database, CRM, API). This starts an interactive connection flow " + "where the user enters their credentials.\n\n" + "Pass the datasource type/name (e.g. 'gmail', 'postgres', 'salesforce', 'hubspot'). " + "Anton will match it to the right connector and guide the user through setup.\n\n" + "Do NOT print any message before calling this tool — it handles the user-facing output." + ), + input_schema = { + "type": "object", + "properties": { + "engine": { + "type": "string", + "description": "The datasource type or name (e.g. 'gmail', 'postgres', 'snowflake', 'hubspot')", + }, + "reason": { + "type": "string", + "description": "Brief explanation of why this datasource is needed", + }, + }, + "required": ["engine"], + }, + handler = handle_connect_datasource, +) + + async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str: """Interactive preview/publish flow after dashboard creation.""" import os @@ -161,6 +129,7 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str # Publish flow from anton.config.settings import AntonSettings from anton.publisher import publish + from anton.utils.prompt import prompt_or_cancel settings = AntonSettings() @@ -220,3 +189,35 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str return f"Published successfully!\nView URL: {view_url}" + +PUBLISH_TOOL = ToolDef( + name = "publish_or_preview", + description = ( + "Call this after generating an HTML dashboard or report in .anton/output/. " + "Actions: 'ask' (default) prompts the user to preview/publish/skip interactively. " + "'preview' opens the file in the browser immediately. " + "'publish' publishes to the web immediately. " + "Use 'preview' or 'publish' when the user has already stated their intent. " + "Use 'ask' after generating a new dashboard to let the user choose." + ), + input_schema = { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Path to the HTML file (e.g. .anton/output/dashboard.html)", + }, + "title": { + "type": "string", + "description": "Short title describing the dashboard (e.g. 'BTC & Macro Dashboard')", + }, + "action": { + "type": "string", + "enum": ["ask", "preview", "publish"], + "description": "What to do: 'ask' prompts user, 'preview' opens locally, 'publish' publishes to web", + }, + }, + "required": ["file_path"], + }, + handler = handle_publish_or_preview, +) From 01e3a9bc11c9c01e5529eeb2458f6b077ead4594 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 18:25:36 -0700 Subject: [PATCH 013/134] introduced core utils and fixed broken imports --- anton/core/session.py | 4 +- anton/core/tools/tool_handlers.py | 63 +----------------------------- anton/core/utils/__init__.py | 0 anton/core/utils/scratchpad.py | 65 +++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 62 deletions(-) create mode 100644 anton/core/utils/__init__.py create mode 100644 anton/core/utils/scratchpad.py diff --git a/anton/core/session.py b/anton/core/session.py index 068db4a8..ac7266d6 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -17,6 +17,7 @@ from anton.scratchpad import ScratchpadManager from anton.core.tools.registry import ToolRegistry from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef +from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result from anton.utils.datasources import ( build_datasource_context, @@ -26,6 +27,7 @@ if TYPE_CHECKING: from rich.console import Console from anton.context.self_awareness import SelfAwarenessContext + from anton.chat_ui import EscapeWatcher from anton.llm.client import LLMClient from anton.memory.cortex import Cortex from anton.memory.episodes import EpisodicMemory @@ -122,7 +124,7 @@ def __init__( self._history_store = history_store self._session_id = session_id self._cancel_event = asyncio.Event() - self._escape_watcher: "EscapeWatcher | None" = None + self._escape_watcher: EscapeWatcher | None = None self._active_datasource: str | None = None self._scratchpads = ScratchpadManager( coding_provider=coding_provider, diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py index 33246553..16420f87 100644 --- a/anton/core/tools/tool_handlers.py +++ b/anton/core/tools/tool_handlers.py @@ -1,5 +1,7 @@ from typing import TYPE_CHECKING +from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result + if TYPE_CHECKING: from anton.chat_session import ChatSession @@ -83,67 +85,6 @@ async def _encode_bg(cortex, entries): return "Memory updated: " + "; ".join(descriptions) -async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict): - """Validate and prepare a scratchpad exec call. - - Returns (pad, code, description, estimated_time, estimated_seconds) or - a str error message if validation fails. - """ - name = tc_input.get("name", "") - code = tc_input.get("code", "") - if not code or not code.strip(): - return "No code provided." - - pad = await session._scratchpads.get_or_create(name) - - # Auto-install packages before running the cell - packages = tc_input.get("packages", []) - if packages: - install_result = await pad.install_packages(packages) - if "Install failed" in install_result or "timed out" in install_result: - return install_result - - description = tc_input.get("one_line_description", "") - estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0) - if isinstance(estimated_seconds, str): - try: - estimated_seconds = int(estimated_seconds) - except ValueError: - estimated_seconds = 0 - - estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else "" - return pad, code, description, estimated_time, estimated_seconds - - -def format_cell_result(cell) -> str: - """Format a Cell into a tool result string. - - Every section is labeled so the LLM can tell what came from where: - [output] — print() / stdout from the cell code - [logs] — library logging (httpx, urllib3, etc.) captured at INFO+ - [stderr] — warnings and stderr writes - [error] — Python traceback if the cell raised an exception - """ - parts: list[str] = [] - if cell.stdout: - stdout = cell.stdout - if len(stdout) > 10_000: - stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)" - parts.append(f"[output]\n{stdout}") - if cell.logs if hasattr(cell, "logs") else False: - logs = cell.logs.strip() - if len(logs) > 3_000: - logs = logs[:3_000] + "\n... (logs truncated)" - parts.append(f"[logs]\n{logs}") - if cell.stderr: - parts.append(f"[stderr]\n{cell.stderr}") - if cell.error: - parts.append(f"[error]\n{cell.error}") - if not parts: - return "Code executed successfully (no output)." - return "\n".join(parts) - - async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str: """Dispatch a scratchpad tool call by action.""" action = tc_input.get("action", "") diff --git a/anton/core/utils/__init__.py b/anton/core/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/anton/core/utils/scratchpad.py b/anton/core/utils/scratchpad.py new file mode 100644 index 00000000..a94dc6c4 --- /dev/null +++ b/anton/core/utils/scratchpad.py @@ -0,0 +1,65 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from anton.core.session import ChatSession + + +async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict): + """Validate and prepare a scratchpad exec call. + + Returns (pad, code, description, estimated_time, estimated_seconds) or + a str error message if validation fails. + """ + name = tc_input.get("name", "") + code = tc_input.get("code", "") + if not code or not code.strip(): + return "No code provided." + + pad = await session._scratchpads.get_or_create(name) + + # Auto-install packages before running the cell + packages = tc_input.get("packages", []) + if packages: + install_result = await pad.install_packages(packages) + if "Install failed" in install_result or "timed out" in install_result: + return install_result + + description = tc_input.get("one_line_description", "") + estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0) + if isinstance(estimated_seconds, str): + try: + estimated_seconds = int(estimated_seconds) + except ValueError: + estimated_seconds = 0 + + estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else "" + return pad, code, description, estimated_time, estimated_seconds + + +def format_cell_result(cell) -> str: + """Format a Cell into a tool result string. + + Every section is labeled so the LLM can tell what came from where: + [output] — print() / stdout from the cell code + [logs] — library logging (httpx, urllib3, etc.) captured at INFO+ + [stderr] — warnings and stderr writes + [error] — Python traceback if the cell raised an exception + """ + parts: list[str] = [] + if cell.stdout: + stdout = cell.stdout + if len(stdout) > 10_000: + stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)" + parts.append(f"[output]\n{stdout}") + if cell.logs if hasattr(cell, "logs") else False: + logs = cell.logs.strip() + if len(logs) > 3_000: + logs = logs[:3_000] + "\n... (logs truncated)" + parts.append(f"[logs]\n{logs}") + if cell.stderr: + parts.append(f"[stderr]\n{cell.stderr}") + if cell.error: + parts.append(f"[error]\n{cell.error}") + if not parts: + return "Code executed successfully (no output)." + return "\n".join(parts) \ No newline at end of file From bdf79cd756248a5c550f2172e489ce3dd1a283ec Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 18:40:45 -0700 Subject: [PATCH 014/134] fixed another broken import --- anton/chat_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anton/chat_session.py b/anton/chat_session.py index 3e76878d..a042ab52 100644 --- a/anton/chat_session.py +++ b/anton/chat_session.py @@ -13,7 +13,7 @@ if TYPE_CHECKING: from anton.chat import ChatSession from anton.memory.cortex import Cortex - from anton.memory.episodic import EpisodicMemory + from anton.memory.episodes import EpisodicMemory from anton.memory.history_store import HistoryStore from anton.workspace import Workspace From 95ee00f4b9402a0c1356fa89ccb540caa77c4e18 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 18:48:43 -0700 Subject: [PATCH 015/134] fixed more imports --- anton/core/tools/registry.py | 1 + anton/core/tools/tool_handlers.py | 1 + anton/core/utils/scratchpad.py | 1 + anton/tools.py | 1 + 4 files changed, 4 insertions(+) diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index a73b6679..4e1109a5 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py index 16420f87..82adefd6 100644 --- a/anton/core/tools/tool_handlers.py +++ b/anton/core/tools/tool_handlers.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import TYPE_CHECKING from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result diff --git a/anton/core/utils/scratchpad.py b/anton/core/utils/scratchpad.py index a94dc6c4..18d7efd9 100644 --- a/anton/core/utils/scratchpad.py +++ b/anton/core/utils/scratchpad.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/anton/tools.py b/anton/tools.py index 3ce6d92c..11e0fa54 100644 --- a/anton/tools.py +++ b/anton/tools.py @@ -1,5 +1,6 @@ """Extra tools for the open source terminal agent.""" +from __future__ import annotations from typing import TYPE_CHECKING from anton.core.tools.tool_defs import ToolDef From f835fdfe7556562b945af6ac9036f837efb9499d Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 19:00:27 -0700 Subject: [PATCH 016/134] fixed tool dump --- anton/core/tools/registry.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index 4e1109a5..81b6a640 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -1,4 +1,5 @@ from __future__ import annotations +from dataclasses import asdict from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -32,4 +33,10 @@ def dump(self) -> list[dict]: """ Dump the registry as a list of tool definitions. """ - return [tool.model_dump() for tool in self._tools] + tool_defs = [] + for tool_def in self._tools: + # Remove the handler from the tool definition. + tool_def = asdict(tool_def) + tool_def.pop("handler") + tool_defs.append(tool_def) + return tool_defs From 69113530d063fcc3b334fd7be123fd963a0db604 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 7 Apr 2026 19:04:00 -0700 Subject: [PATCH 017/134] fixed duplicate tool registration --- anton/core/session.py | 7 ++++--- anton/core/tools/registry.py | 7 +++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/anton/core/session.py b/anton/core/session.py index ac7266d6..4c64e836 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -267,9 +267,10 @@ async def _build_system_prompt(self, user_message: str = "") -> str: } def _build_tools(self) -> list[dict]: - self._build_core_tools() - for tool in self._extra_tools: - self.tool_registry.register_tool(tool) + if not self.tool_registry: + self._build_core_tools() + for tool in self._extra_tools: + self.tool_registry.register_tool(tool) return self.tool_registry.dump() def _build_core_tools(self) -> None: diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index 81b6a640..c3f5f69c 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -14,6 +14,12 @@ def __init__(self) -> None: # Register core tools. self._tools = [] + def __bool__(self) -> bool: + """ + Return True if there are any tools registered. + """ + return bool(self._tools) + def register_tool(self, tool_def: ToolDef) -> None: """ Register a new (extra to core) tool. @@ -32,6 +38,7 @@ async def dispatch_tool(self, tool_name: str, tc_input: dict) -> str: def dump(self) -> list[dict]: """ Dump the registry as a list of tool definitions. + This is used to build the tools list for the LLM. As a result, the handler is not needed. """ tool_defs = [] for tool_def in self._tools: From 45da437e3581eb8119ad783423a495fafd23ad06 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Wed, 8 Apr 2026 15:50:43 +0200 Subject: [PATCH 018/134] Make the choices for cresting a new connection --- anton/commands/datasource.py | 155 ++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 48 deletions(-) diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py index 9652367b..1e69a5b4 100644 --- a/anton/commands/datasource.py +++ b/anton/commands/datasource.py @@ -515,6 +515,40 @@ async def handle_add_custom_datasource( return engine_def, credentials +async def _reconnect_to_saved( + console: Console, + session: "ChatSession", + vault: "DataVault", + registry: "DatasourceRegistry", + slug: str, + conn: dict, +) -> "ChatSession": + """Inject env for a saved connection and mark it as the active datasource.""" + restore_namespaced_env(vault) + session._active_datasource = slug + recon_engine_def = registry.get(conn["engine"]) + if recon_engine_def: + register_secret_vars(recon_engine_def, engine=conn["engine"], name=conn["name"]) + engine_label = recon_engine_def.display_name + else: + engine_label = conn["engine"] + console.print() + console.print( + f'[anton.success] ✓ Reconnected to [bold]"{slug}"[/bold].[/]' + ) + console.print() + session._history.append( + { + "role": "assistant", + "content": ( + f'I\'ve reconnected to the {engine_label} connection "{slug}" ' + f"in the Local Vault. I can now query this data source when needed." + ), + } + ) + return session + + async def handle_connect_datasource( console: Console, scratchpads: ScratchpadManager, @@ -654,17 +688,19 @@ async def handle_connect_datasource( display_engines = popular_engines + other_engines + custom_engines saved_connections = vault.list_connections() - # Build deduplicated list of saved connection display entries - saved_entries: list[tuple[str, str]] = [] # (slug, display_name) + # Build deduplicated list of engine types from saved connections (one per engine) + seen_engines: set[str] = set() + recent_engine_entries: list[tuple[str, str]] = [] # (engine_slug, display_name) for c in saved_connections: - slug = f"{c['engine']}-{c['name']}" - engine = registry.get(c["engine"]) - label = engine.display_name if engine else c["engine"] - saved_entries.append((slug, label)) + if c["engine"] not in seen_engines: + seen_engines.add(c["engine"]) + engine_obj = registry.get(c["engine"]) + label = engine_obj.display_name if engine_obj else c["engine"] + recent_engine_entries.append((c["engine"], label)) def print_sections() -> None: console.print( - "[anton.cyan](anton)[/] Choose a data source:\n" + "[anton.cyan](anton)[/] Select a data source to create a new connection:\n" ) console.print(" [bold] Primary") console.print( @@ -676,10 +712,10 @@ def print_sections() -> None: for i, e in enumerate(popular_engines, 1): console.print(f" [bold]{i:>2}.[/bold] {e.display_name}") console.print() - if saved_entries: + if recent_engine_entries: start = len(popular_engines) + 1 - console.print(" [bold] Recent connections") - for i, (slug, label) in enumerate(saved_entries, start): + console.print(" [bold] Recently used data sources") + for i, (_, label) in enumerate(recent_engine_entries, start): console.print(f" [bold]{i:>2}.[/bold] {label}") console.print() @@ -697,28 +733,72 @@ def print_all() -> None: console.print(f" [bold]{i:>2}.[/bold] {e.display_name}{star}") console.print() - if prefill: - answer = prefill - else: + async def get_create_new_answer() -> str | None: print_sections() console.print( " [anton.muted]Don't see yours? Type a datasource name (e.g., GitHub, Gmail, Jira, ...)\n" " It can be virtually any datasource — we'll figure out the details together.[/]" ) console.print() - answer = await prompt_or_cancel( + ans = await prompt_or_cancel( "(anton) Enter a number or type a datasource name", ) - if answer is None: - return session - if answer.strip().lower() == "all": + if ans is None: + return None + if ans.strip().lower() == "all": console.print() print_all() - answer = await prompt_or_cancel( + ans = await prompt_or_cancel( "(anton) Enter a number or type a name", ) - if answer is None: + return ans + + if prefill: + answer = prefill + elif saved_connections: + console.print() + console.print("[anton.cyan](anton)[/] What would you like to do?\n") + console.print(" [bold] 1.[/bold] Use an existing connection") + console.print(" [bold] 2.[/bold] Create a new connection") + console.print() + top_choice = await prompt_or_cancel( + "(anton) Enter a number", choices=["1", "2"] + ) + if top_choice is None: + return session + + if top_choice == "1": + console.print() + console.print("[anton.cyan](anton)[/] Your saved connections:\n") + for i, c in enumerate(saved_connections, 1): + conn_slug = f"{c['engine']}-{c['name']}" + engine_obj = registry.get(c["engine"]) + engine_label = engine_obj.display_name if engine_obj else c["engine"] + console.print( + f" [bold]{i:>2}.[/bold] {conn_slug}" + f" [dim]— {engine_label}[/]" + ) + console.print() + pick = await prompt_or_cancel( + "(anton) Enter a number", + choices=[str(i) for i in range(1, len(saved_connections) + 1)], + ) + if pick is None: return session + picked_conn = saved_connections[int(pick) - 1] + picked_slug = f"{picked_conn['engine']}-{picked_conn['name']}" + return await _reconnect_to_saved( + console, session, vault, registry, picked_slug, picked_conn + ) + + # top_choice == "2": create new connection + answer = await get_create_new_answer() + if answer is None: + return session + else: + answer = await get_create_new_answer() + if answer is None: + return session stripped_answer = answer.strip() known_slugs = { @@ -726,36 +806,16 @@ def print_all() -> None: } if stripped_answer in known_slugs: conn = known_slugs[stripped_answer] - restore_namespaced_env(vault) - session._active_datasource = stripped_answer - recon_engine_def = registry.get(conn["engine"]) - if recon_engine_def: - register_secret_vars(recon_engine_def, engine=conn["engine"], name=conn["name"]) - engine_label = recon_engine_def.display_name - else: - engine_label = conn["engine"] - console.print() - console.print( - f'[anton.success] ✓ Reconnected to [bold]"{stripped_answer}"[/bold].[/]' + return await _reconnect_to_saved( + console, session, vault, registry, stripped_answer, conn ) - console.print() - session._history.append( - { - "role": "assistant", - "content": ( - f'I\'ve reconnected to the {engine_label} connection "{stripped_answer}" ' - f"in the Local Vault. I can now query this data source when needed." - ), - } - ) - return session engine_def: DatasourceEngine | None = None custom_source = False llm_recognised = False - # Saved connections are numbered after popular engines + # Recently used data sources are numbered after popular engines saved_start = len(popular_engines) + 1 - max_num = len(popular_engines) + len(saved_entries) + max_num = len(popular_engines) + len(recent_engine_entries) if stripped_answer.isdigit() or (stripped_answer.lstrip("-").isdigit()): pick_num = int(stripped_answer) @@ -763,11 +823,10 @@ def print_all() -> None: custom_source = True elif 1 <= pick_num <= len(popular_engines): engine_def = popular_engines[pick_num - 1] - elif saved_entries and saved_start <= pick_num <= max_num: - # User picked a recent connection type — start a new connection of that engine - picked_slug, picked_label = saved_entries[pick_num - saved_start] - picked_engine = picked_slug.split("-", 1)[0] - engine_def = registry.get(picked_engine) + elif recent_engine_entries and saved_start <= pick_num <= max_num: + # User picked a recently used data source — start a new connection of that engine + picked_engine_slug, _ = recent_engine_entries[pick_num - saved_start] + engine_def = registry.get(picked_engine_slug) if engine_def is None: custom_source = True else: From bcaca1046244c5fff82650dd9a680904509435a6 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Wed, 8 Apr 2026 08:57:44 -0700 Subject: [PATCH 019/134] fixed merge drift --- anton/chat.py | 7 +------ anton/core/session.py | 4 ++++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/anton/chat.py b/anton/chat.py index c88473d7..a4945f5e 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -19,7 +19,6 @@ ) from anton.core.session import ChatSession from anton.llm.provider import ( - ContextOverflowError, TokenLimitExceeded, StreamComplete, StreamContextCompacted, @@ -75,17 +74,13 @@ from prompt_toolkit import PromptSession from prompt_toolkit.formatted_text import HTML from prompt_toolkit.styles import Style as PTStyle -from rich.prompt import Confirm, Prompt +from rich.prompt import Prompt if TYPE_CHECKING: from rich.console import Console from anton.config.settings import AntonSettings - from anton.context.self_awareness import SelfAwarenessContext - from anton.llm.client import LLMClient - from anton.memory.cortex import Cortex from anton.memory.episodes import EpisodicMemory - from anton.memory.history_store import HistoryStore from anton.workspace import Workspace diff --git a/anton/core/session.py b/anton/core/session.py index 4c64e836..f5020418 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -13,6 +13,7 @@ StreamTaskProgress, StreamTextDelta, StreamToolResult, + TokenLimitExceeded ) from anton.scratchpad import ScratchpadManager from anton.core.tools.registry import ToolRegistry @@ -571,6 +572,9 @@ async def turn_stream( yield event break # completed successfully except Exception as _agent_exc: + # Token/billing limit — don't retry, let the chat loop handle it + if isinstance(_agent_exc, TokenLimitExceeded): + raise _retry_count += 1 if _retry_count <= _max_auto_retries: # Inject the error into history and let the LLM try to recover From 154989db1bd92c7f3d72fb1b2579e593fc95935e Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Wed, 8 Apr 2026 09:02:58 -0700 Subject: [PATCH 020/134] bumped version for release --- anton/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anton/__init__.py b/anton/__init__.py index c72e3798..8c0d5d5b 100644 --- a/anton/__init__.py +++ b/anton/__init__.py @@ -1 +1 @@ -__version__ = "1.1.4" +__version__ = "2.0.0" From ca73b7d9d24f69cfac04a133b541ee417e6a9645 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Wed, 8 Apr 2026 09:27:47 -0700 Subject: [PATCH 021/134] fixed broken tool dispatch --- anton/core/session.py | 14 ++++++++++---- anton/core/tools/registry.py | 7 +++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/anton/core/session.py b/anton/core/session.py index f5020418..b6b9bf51 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -112,7 +112,7 @@ def __init__( self._episodic = episodic self._runtime_context = runtime_context self._proactive_dashboards = proactive_dashboards - self._extra_tools = tools + self._extra_tools = tools or [] self._workspace = workspace self._console = console self._history: list[dict] = list(initial_history) if initial_history else [] @@ -493,7 +493,9 @@ async def turn(self, user_input: str | list[dict]) -> str: tool_results: list[dict] = [] for tc in response.tool_calls: try: - result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input) + result_text = await self.tool_registry.dispatch_tool( + self, tc.name, tc.input + ) except Exception as exc: result_text = f"Tool '{tc.name}' failed: {exc}" @@ -875,7 +877,9 @@ async def _stream_and_handle_tools( ) if self._escape_watcher: self._escape_watcher.pause() - result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input) + result_text = await self.tool_registry.dispatch_tool( + self, tc.name, tc.input + ) if self._escape_watcher: self._escape_watcher.resume() yield StreamTaskProgress( @@ -883,7 +887,9 @@ async def _stream_and_handle_tools( message="Analyzing results...", ) else: - result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input) + result_text = await self.tool_registry.dispatch_tool( + self, tc.name, tc.input + ) if ( tc.name == "scratchpad" and tc.input.get("action") == "dump" diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py index c3f5f69c..2579b5ff 100644 --- a/anton/core/tools/registry.py +++ b/anton/core/tools/registry.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from anton.core.session import ChatSession from anton.core.tools.tool_defs import ToolDef @@ -26,14 +27,16 @@ def register_tool(self, tool_def: ToolDef) -> None: """ self._tools.append(tool_def) - async def dispatch_tool(self, tool_name: str, tc_input: dict) -> str: + async def dispatch_tool( + self, session: "ChatSession", tool_name: str, tc_input: dict + ) -> str: """ Dispatch a tool call by name. Returns result text. """ tool_def = next((tool for tool in self._tools if tool.name == tool_name), None) if tool_def is None: raise ValueError(f"Tool {tool_name} not found") - return await tool_def.handler(tc_input) + return await tool_def.handler(session, tc_input) def dump(self) -> list[dict]: """ From 62f4aec0d1cd6771eed8d2dbfbe562b06755c316 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Wed, 8 Apr 2026 09:29:21 -0700 Subject: [PATCH 022/134] fixed missing imports --- anton/chat.py | 2 +- anton/core/session.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/anton/chat.py b/anton/chat.py index a4945f5e..000aa3c1 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -17,7 +17,7 @@ parse_dropped_paths as _parse_dropped_paths, save_clipboard_image, ) -from anton.core.session import ChatSession +from anton.core.session import ChatSession, TOKEN_STATUS_CACHE_TTL from anton.llm.provider import ( TokenLimitExceeded, StreamComplete, diff --git a/anton/core/session.py b/anton/core/session.py index b6b9bf51..13938158 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -36,6 +36,7 @@ from anton.workspace import Workspace +# TODO: Move to settings? _MAX_TOOL_ROUNDS = 25 # Hard limit on consecutive tool-call rounds per turn _MAX_CONTINUATIONS = 3 # Max times the verification loop can restart the tool loop _CONTEXT_PRESSURE_THRESHOLD = 0.7 # Trigger compaction when context is 70% full From e04c2be7ffab49f348aca47ccbd5eba2bb0f27de Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Wed, 8 Apr 2026 09:31:01 -0700 Subject: [PATCH 023/134] fixed unit tests --- tests/test_chat_context.py | 6 ++-- tests/test_chat_scratchpad.py | 59 +++++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py index e06f4276..ddf70b82 100644 --- a/tests/test_chat_context.py +++ b/tests/test_chat_context.py @@ -11,7 +11,7 @@ from anton.chat import ChatSession, _handle_connect from anton.minds_client import describe_minds_connection_error from anton.config.settings import AntonSettings -from anton.tools import MEMORIZE_TOOL +from anton.core.tools.tool_defs import MEMORIZE_TOOL from anton.context.self_awareness import SelfAwarenessContext from anton.llm.provider import LLMResponse, ToolCall, Usage from anton.workspace import Workspace @@ -81,8 +81,8 @@ def cortex(memory_dirs): class TestMemorizeTool: def test_tool_definition_structure(self): - assert MEMORIZE_TOOL["name"] == "memorize" - props = MEMORIZE_TOOL["input_schema"]["properties"] + assert MEMORIZE_TOOL.name == "memorize" + props = MEMORIZE_TOOL.input_schema["properties"] assert "entries" in props async def test_memorize_creates_rule(self, cortex, memory_dirs): diff --git a/tests/test_chat_scratchpad.py b/tests/test_chat_scratchpad.py index 45926e83..e69356da 100644 --- a/tests/test_chat_scratchpad.py +++ b/tests/test_chat_scratchpad.py @@ -1,14 +1,25 @@ from __future__ import annotations +from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest -from anton.chat import SCRATCHPAD_TOOL, ChatSession +from anton.core.session import ChatSession +from anton.core.tools.tool_defs import SCRATCHPAD_TOOL from anton.commands.session import handle_resume from anton.llm.provider import LLMResponse, StreamComplete, StreamToolResult, ToolCall, Usage +@pytest.fixture() +def workspace(): + # Keep scratchpad venvs inside the repo workspace (pytest runs sandboxed and + # can't write to the real home directory). + base = Path(__file__).resolve().parents[1] / ".pytest-workspace" + base.mkdir(parents=True, exist_ok=True) + return MagicMock(base=base) + + def _text_response(text: str) -> LLMResponse: return LLMResponse( content=text, @@ -39,29 +50,29 @@ def _scratchpad_response( class TestScratchpadToolDefinition: def test_tool_definition_structure(self): - assert SCRATCHPAD_TOOL["name"] == "scratchpad" - props = SCRATCHPAD_TOOL["input_schema"]["properties"] + assert SCRATCHPAD_TOOL.name == "scratchpad" + props = SCRATCHPAD_TOOL.input_schema["properties"] assert "action" in props assert "name" in props assert "code" in props assert "packages" in props - assert SCRATCHPAD_TOOL["input_schema"]["required"] == ["action", "name"] + assert SCRATCHPAD_TOOL.input_schema["required"] == ["action", "name"] def test_tool_has_install_action(self): - actions = SCRATCHPAD_TOOL["input_schema"]["properties"]["action"]["enum"] + actions = SCRATCHPAD_TOOL.input_schema["properties"]["action"]["enum"] assert "install" in actions def test_packages_property_is_array_of_strings(self): - packages_prop = SCRATCHPAD_TOOL["input_schema"]["properties"]["packages"] + packages_prop = SCRATCHPAD_TOOL.input_schema["properties"]["packages"] assert packages_prop["type"] == "array" assert packages_prop["items"] == {"type": "string"} - async def test_scratchpad_tool_in_tools(self): + async def test_scratchpad_tool_in_tools(self, workspace): """scratchpad should always be in _build_tools() output.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock(return_value=_text_response("Hi!")) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: await session.turn("hello") @@ -74,7 +85,7 @@ async def test_scratchpad_tool_in_tools(self): class TestScratchpadExecViaChat: - async def test_scratchpad_exec_via_chat(self): + async def test_scratchpad_exec_via_chat(self, workspace): """exec action flows through and returns output.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock( @@ -84,7 +95,7 @@ async def test_scratchpad_exec_via_chat(self): ] ) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: reply = await session.turn("what is 7 * 6?") @@ -100,7 +111,7 @@ async def test_scratchpad_exec_via_chat(self): class TestScratchpadViewViaChat: - async def test_scratchpad_view_via_chat(self): + async def test_scratchpad_view_via_chat(self, workspace): """view action returns cell history.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock( @@ -111,7 +122,7 @@ async def test_scratchpad_view_via_chat(self): ] ) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: await session.turn("run and show") @@ -129,7 +140,7 @@ async def test_scratchpad_view_via_chat(self): class TestScratchpadRemoveViaChat: - async def test_scratchpad_remove_via_chat(self): + async def test_scratchpad_remove_via_chat(self, workspace): """remove action cleans up the scratchpad.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock( @@ -140,7 +151,7 @@ async def test_scratchpad_remove_via_chat(self): ] ) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: await session.turn("create and remove") @@ -155,7 +166,7 @@ async def test_scratchpad_remove_via_chat(self): class TestScratchpadDumpViaChat: - async def test_scratchpad_dump_via_chat(self): + async def test_scratchpad_dump_via_chat(self, workspace): """dump action flows through chat, returns markdown with code fences.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock( @@ -169,7 +180,7 @@ async def test_scratchpad_dump_via_chat(self): ] ) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: await session.turn("show me my work") @@ -202,7 +213,7 @@ async def __anext__(self): class TestScratchpadDumpStreaming: - async def test_scratchpad_dump_streams_tool_result(self): + async def test_scratchpad_dump_streams_tool_result(self, workspace): """dump action yields a StreamToolResult for display, but sends a short summary back to the LLM to avoid it parroting the full notebook.""" mock_llm = AsyncMock() @@ -231,7 +242,7 @@ def fake_plan_stream(**kwargs): mock_llm.plan_stream = fake_plan_stream - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: events = [] async for event in session.turn_stream("show work"): @@ -255,7 +266,7 @@ def fake_plan_stream(**kwargs): class TestScratchpadStreaming: - async def test_scratchpad_in_streaming_path(self): + async def test_scratchpad_in_streaming_path(self, workspace): """scratchpad exec should work in turn_stream() too.""" tool_response = _scratchpad_response("Computing.", "exec", "s", "print(99)") final_response = _text_response("Got 99.") @@ -274,7 +285,7 @@ def fake_plan_stream(**kwargs): mock_llm.plan_stream = fake_plan_stream - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: events = [] async for event in session.turn_stream("compute 99"): @@ -294,7 +305,7 @@ def fake_plan_stream(**kwargs): class TestScratchpadInstallViaChat: - async def test_install_action_dispatch(self): + async def test_install_action_dispatch(self, workspace): """install action flows through chat and returns pip output.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock( @@ -306,7 +317,7 @@ async def test_install_action_dispatch(self): ] ) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: reply = await session.turn("install cowsay") @@ -320,7 +331,7 @@ async def test_install_action_dispatch(self): finally: await session.close() - async def test_install_empty_packages_via_chat(self): + async def test_install_empty_packages_via_chat(self, workspace): """install with no packages returns a message without crashing.""" mock_llm = AsyncMock() mock_llm.plan = AsyncMock( @@ -330,7 +341,7 @@ async def test_install_empty_packages_via_chat(self): ] ) - session = ChatSession(mock_llm) + session = ChatSession(mock_llm, workspace=workspace) try: await session.turn("install nothing") From fd5b05254024aacc4de676d2fb6398c68411fac2 Mon Sep 17 00:00:00 2001 From: Konstantin Sivakov Date: Wed, 8 Apr 2026 18:47:19 +0200 Subject: [PATCH 024/134] Mark stub-only the failing test --- tests/e2e/scenarios/test_error_handling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/scenarios/test_error_handling.py b/tests/e2e/scenarios/test_error_handling.py index 922406ae..731d19f5 100644 --- a/tests/e2e/scenarios/test_error_handling.py +++ b/tests/e2e/scenarios/test_error_handling.py @@ -78,6 +78,7 @@ def log_message(self, *_): pass assert_not_output(result, "Traceback (most recent call last)") +@pytest.mark.stub_only def test_large_input_no_crash(cfg, stub, tmp_path): stub.queue_text("Got your big message.") stub.queue_verification_ok() From 2b4d2318f477f57d7f7d08d3ca1c933998bbc8b5 Mon Sep 17 00:00:00 2001 From: Jorge Torres Date: Wed, 8 Apr 2026 11:04:29 -0700 Subject: [PATCH 025/134] calling publish tool --- anton/tools.py | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/anton/tools.py b/anton/tools.py index 11e0fa54..94cb828d 100644 --- a/anton/tools.py +++ b/anton/tools.py @@ -130,39 +130,20 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str # Publish flow from anton.config.settings import AntonSettings from anton.publisher import publish - from anton.utils.prompt import prompt_or_cancel settings = AntonSettings() if not settings.minds_api_key: + console.print() console.print(" [anton.muted]To publish you need a free Minds account.[/]") + console.print(" [anton.muted]Run [bold]/publish[/bold] to set up your API key and publish.[/]") console.print() - has_key = await prompt_or_cancel( - " (anton) Do you have an mdb.ai API key?", - choices=["y", "n"], - choices_display="y/n", - default="y", + return ( + "STOP: No Minds API key configured. Do NOT call this tool again. " + "Tell the user to run the /publish command to set up their mdb.ai API key " + "and publish their dashboard. The /publish command handles the interactive " + "API key setup flow." ) - if has_key is None: - console.print() - return "User cancelled publish." - if has_key == "n": - webbrowser.open( - "https://mdb.ai/auth/realms/mindsdb/protocol/openid-connect/registrations" - "?client_id=public-client&response_type=code&scope=openid" - "&redirect_uri=https%3A%2F%2Fmdb.ai" - ) - console.print() - - api_key = await prompt_or_cancel(" (anton) API key", password=True) - if api_key is None or not api_key.strip(): - console.print() - return "User cancelled publish." - api_key = api_key.strip() - settings.minds_api_key = api_key - if session._workspace: - session._workspace.set_secret("ANTON_MINDS_API_KEY", api_key) - console.print() from rich.live import Live from rich.spinner import Spinner From 9ffbc4b126526222757cf2964b5068dcb0a5da9e Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Wed, 8 Apr 2026 11:57:35 -0700 Subject: [PATCH 026/134] copied llm to core --- anton/core/llm/anthropic.py | 196 +++++++++++++++++ anton/core/llm/client.py | 112 ++++++++++ anton/core/llm/openai.py | 394 ++++++++++++++++++++++++++++++++++ anton/core/llm/prompts.py | 415 ++++++++++++++++++++++++++++++++++++ anton/core/llm/provider.py | 165 ++++++++++++++ 5 files changed, 1282 insertions(+) create mode 100644 anton/core/llm/anthropic.py create mode 100644 anton/core/llm/client.py create mode 100644 anton/core/llm/openai.py create mode 100644 anton/core/llm/prompts.py create mode 100644 anton/core/llm/provider.py diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py new file mode 100644 index 00000000..c940b4e5 --- /dev/null +++ b/anton/core/llm/anthropic.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +import json +from collections.abc import AsyncIterator + +import anthropic + +from .provider import ( + ContextOverflowError, + LLMProvider, + LLMResponse, + StreamComplete, + StreamEvent, + StreamTextDelta, + StreamToolUseDelta, + StreamToolUseEnd, + StreamToolUseStart, + ToolCall, + Usage, + compute_context_pressure, +) + + +class AnthropicProvider(LLMProvider): + def __init__(self, api_key: str | None = None) -> None: + kwargs = {} + if api_key: + kwargs["api_key"] = api_key + self._client = anthropic.AsyncAnthropic(**kwargs) + + async def complete( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + tool_choice: dict | None = None, + max_tokens: int = 4096, + ) -> LLMResponse: + kwargs: dict = { + "model": model, + "max_tokens": max_tokens, + "system": system, + "messages": messages, + } + if tools: + kwargs["tools"] = tools + if tool_choice: + kwargs["tool_choice"] = tool_choice + + try: + response = await self._client.messages.create(**kwargs) + except anthropic.BadRequestError as exc: + msg = str(exc).lower() + if "prompt is too long" in msg or "context limit" in msg: + raise ContextOverflowError(str(exc)) from exc + raise + except anthropic.APIStatusError as exc: + if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"): + msg = f"Server returned 429 — {exc.body['detail']}" + msg += " Visit https://mdb.ai to upgrade or to top up your tokens." + from anton.llm.provider import TokenLimitExceeded + raise TokenLimitExceeded(msg) from exc + else: + msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment." + raise ConnectionError(msg) from exc + except anthropic.APIConnectionError as exc: + raise ConnectionError( + "Could not reach the LLM server — check your connection or try again in a moment." + ) from exc + + content_text = "" + tool_calls: list[ToolCall] = [] + + for block in response.content: + if block.type == "text": + content_text += block.text + elif block.type == "tool_use": + tool_calls.append( + ToolCall(id=block.id, name=block.name, input=block.input) + ) + + input_tokens = response.usage.input_tokens + return LLMResponse( + content=content_text, + tool_calls=tool_calls, + usage=Usage( + input_tokens=input_tokens, + output_tokens=response.usage.output_tokens, + context_pressure=compute_context_pressure(model, input_tokens), + ), + stop_reason=response.stop_reason, + ) + + async def stream( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + max_tokens: int = 4096, + ) -> AsyncIterator[StreamEvent]: + kwargs: dict = { + "model": model, + "max_tokens": max_tokens, + "system": system, + "messages": messages, + } + if tools: + kwargs["tools"] = tools + + content_text = "" + tool_calls: list[ToolCall] = [] + input_tokens = 0 + output_tokens = 0 + stop_reason: str | None = None + + # Track content blocks by index for tool correlation + blocks: dict[int, dict] = {} + + try: + async with self._client.messages.stream(**kwargs) as stream: + async for event in stream: + if event.type == "message_start": + usage = event.message.usage + input_tokens = usage.input_tokens + output_tokens = getattr(usage, "output_tokens", 0) + + elif event.type == "content_block_start": + idx = event.index + block = event.content_block + if block.type == "tool_use": + blocks[idx] = {"type": "tool_use", "id": block.id, "name": block.name, "json_parts": []} + yield StreamToolUseStart(id=block.id, name=block.name) + else: + blocks[idx] = {"type": "text"} + + elif event.type == "content_block_delta": + idx = event.index + delta = event.delta + if delta.type == "text_delta": + content_text += delta.text + yield StreamTextDelta(text=delta.text) + elif delta.type == "input_json_delta": + info = blocks.get(idx, {}) + if info.get("type") == "tool_use": + info["json_parts"].append(delta.partial_json) + yield StreamToolUseDelta(id=info["id"], json_delta=delta.partial_json) + + elif event.type == "content_block_stop": + idx = event.index + info = blocks.get(idx, {}) + if info.get("type") == "tool_use": + raw_json = "".join(info["json_parts"]) + parsed_input = json.loads(raw_json) if raw_json else {} + tool_calls.append( + ToolCall(id=info["id"], name=info["name"], input=parsed_input) + ) + yield StreamToolUseEnd(id=info["id"]) + + elif event.type == "message_delta": + stop_reason = event.delta.stop_reason + output_tokens = event.usage.output_tokens + except anthropic.BadRequestError as exc: + msg = str(exc).lower() + if "prompt is too long" in msg or "context limit" in msg: + raise ContextOverflowError(str(exc)) from exc + raise + except anthropic.APIStatusError as exc: + if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"): + msg = f"Server returned 429 — {exc.body['detail']}" + msg += " Visit https://mdb.ai to upgrade or to top up your tokens." + from anton.llm.provider import TokenLimitExceeded + raise TokenLimitExceeded(msg) from exc + else: + msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment." + raise ConnectionError(msg) from exc + except anthropic.APIConnectionError as exc: + raise ConnectionError( + "Could not reach the LLM server — check your connection or try again in a moment." + ) from exc + + yield StreamComplete( + response=LLMResponse( + content=content_text, + tool_calls=tool_calls, + usage=Usage( + input_tokens=input_tokens, + output_tokens=output_tokens, + context_pressure=compute_context_pressure(model, input_tokens), + ), + stop_reason=stop_reason, + ) + ) diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py new file mode 100644 index 00000000..8a773608 --- /dev/null +++ b/anton/core/llm/client.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +from collections.abc import AsyncIterator +from typing import TYPE_CHECKING + +from .provider import LLMProvider, LLMResponse, StreamEvent + +if TYPE_CHECKING: + from anton.config.settings import AntonSettings + + +class LLMClient: + def __init__( + self, + *, + planning_provider: LLMProvider, + planning_model: str, + coding_provider: LLMProvider, + coding_model: str, + max_tokens: int = 8192, + ) -> None: + self._planning_provider = planning_provider + self._planning_model = planning_model + self._coding_provider = coding_provider + self._coding_model = coding_model + self._max_tokens = max_tokens + + async def plan( + self, + *, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + max_tokens: int | None = None, + ) -> LLMResponse: + return await self._planning_provider.complete( + model=self._planning_model, + system=system, + messages=messages, + tools=tools, + max_tokens=max_tokens or self._max_tokens, + ) + + async def plan_stream( + self, + *, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + max_tokens: int | None = None, + ) -> AsyncIterator[StreamEvent]: + async for event in self._planning_provider.stream( + model=self._planning_model, + system=system, + messages=messages, + tools=tools, + max_tokens=max_tokens or self._max_tokens, + ): + yield event + + @property + def coding_provider(self) -> LLMProvider: + """The LLM provider used for coding/skill execution.""" + return self._coding_provider + + @property + def coding_model(self) -> str: + """The model name used for coding/skill execution.""" + return self._coding_model + + async def code( + self, + *, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + max_tokens: int | None = None, + ) -> LLMResponse: + return await self._coding_provider.complete( + model=self._coding_model, + system=system, + messages=messages, + tools=tools, + max_tokens=max_tokens or self._max_tokens, + ) + + @classmethod + def from_settings(cls, settings: AntonSettings) -> LLMClient: + from anton.llm.anthropic import AnthropicProvider + from anton.llm.openai import OpenAIProvider + + providers = { + "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key), + "openai": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify), + "openai-compatible": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify), + } + + planning_factory = providers.get(settings.planning_provider) + coding_factory = providers.get(settings.coding_provider) + + if planning_factory is None: + raise ValueError(f"Unknown planning provider: {settings.planning_provider}") + if coding_factory is None: + raise ValueError(f"Unknown coding provider: {settings.coding_provider}") + + return cls( + planning_provider=planning_factory(), + planning_model=settings.planning_model, + coding_provider=coding_factory(), + coding_model=settings.coding_model, + max_tokens=getattr(settings, "max_tokens", 8192), + ) diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py new file mode 100644 index 00000000..26236faf --- /dev/null +++ b/anton/core/llm/openai.py @@ -0,0 +1,394 @@ +from __future__ import annotations + +import json +from collections.abc import AsyncIterator + +import openai + +from .provider import ( + ContextOverflowError, + LLMProvider, + LLMResponse, + StreamComplete, + StreamEvent, + StreamTextDelta, + StreamToolUseDelta, + StreamToolUseEnd, + StreamToolUseStart, + ToolCall, + Usage, + compute_context_pressure, +) + + +def _translate_tools(tools: list[dict]) -> list[dict]: + """Anthropic tool format -> OpenAI function-calling format.""" + result = [] + for tool in tools: + result.append({ + "type": "function", + "function": { + "name": tool["name"], + "description": tool.get("description", ""), + "parameters": tool.get("input_schema", {}), + }, + }) + return result + + +def _translate_tool_choice(tool_choice: dict) -> dict | str: + """Anthropic tool_choice -> OpenAI tool_choice.""" + tc_type = tool_choice.get("type") + if tc_type == "tool": + return {"type": "function", "function": {"name": tool_choice["name"]}} + if tc_type == "any": + return "required" + if tc_type == "auto": + return "auto" + return "auto" + + +def _translate_messages(system: str, messages: list[dict]) -> list[dict]: + """Convert Anthropic-style messages to OpenAI chat format. + + Handles: + - system prompt -> {"role": "system", ...} + - plain text messages pass through + - assistant messages with tool_use content blocks -> tool_calls array + - user messages with tool_result content blocks -> role:tool messages + """ + result: list[dict] = [] + if system: + result.append({"role": "system", "content": system}) + + for msg in messages: + role = msg["role"] + content = msg.get("content") + + # Plain string content — pass through + if isinstance(content, str): + result.append({"role": role, "content": content}) + continue + + # Content is a list of blocks (Anthropic format) + if isinstance(content, list): + if role == "assistant": + result.extend(_translate_assistant_blocks(content)) + elif role == "user": + result.extend(_translate_user_blocks(content)) + else: + # Fallback: join text blocks + text = " ".join( + b.get("text", "") for b in content if b.get("type") == "text" + ) + result.append({"role": role, "content": text or ""}) + continue + + # Fallback + result.append({"role": role, "content": str(content) if content else ""}) + + return result + + +def _translate_assistant_blocks(blocks: list[dict]) -> list[dict]: + """Convert assistant content blocks to OpenAI message(s).""" + text_parts: list[str] = [] + tool_calls: list[dict] = [] + + for block in blocks: + if block.get("type") == "text": + text_parts.append(block["text"]) + elif block.get("type") == "tool_use": + tool_calls.append({ + "id": block["id"], + "type": "function", + "function": { + "name": block["name"], + "arguments": json.dumps(block.get("input", {})), + }, + }) + + msg: dict = {"role": "assistant"} + content = "\n".join(text_parts) if text_parts else None + msg["content"] = content + if tool_calls: + msg["tool_calls"] = tool_calls + return [msg] + + +def _translate_user_blocks(blocks: list[dict]) -> list[dict]: + """Convert user content blocks (including tool_result and image) to OpenAI messages.""" + result: list[dict] = [] + content_parts: list[dict] = [] # Accumulates text + image_url blocks + + for block in blocks: + if block.get("type") == "tool_result": + # Flush any accumulated content parts first + if content_parts: + result.append({"role": "user", "content": content_parts}) + content_parts = [] + # tool_result -> role:tool message + content = block.get("content", "") + if isinstance(content, list): + content = "\n".join( + b.get("text", "") for b in content if b.get("type") == "text" + ) + result.append({ + "role": "tool", + "tool_call_id": block["tool_use_id"], + "content": str(content), + }) + elif block.get("type") == "text": + content_parts.append({"type": "text", "text": block.get("text", "")}) + elif block.get("type") == "image": + # Anthropic image block -> OpenAI image_url block + source = block.get("source", {}) + if source.get("type") == "base64": + media_type = source.get("media_type", "image/png") + data = source.get("data", "") + content_parts.append({ + "type": "image_url", + "image_url": {"url": f"data:{media_type};base64,{data}"}, + }) + + if content_parts: + # If only text parts, flatten to a simple string for compatibility + if all(p.get("type") == "text" for p in content_parts): + result.append({ + "role": "user", + "content": "\n".join(p["text"] for p in content_parts), + }) + else: + result.append({"role": "user", "content": content_parts}) + + return result + + +def build_chat_completion_kwargs( + *, + model: str, + messages: list[dict], + max_tokens: int, + stream: bool = False, +) -> dict: + """Build chat.completions kwargs using modern OpenAI parameter names.""" + kwargs: dict = { + "model": model, + "messages": messages, + "max_completion_tokens": max_tokens, + } + if stream: + kwargs["stream"] = True + kwargs["stream_options"] = {"include_usage": True} + return kwargs + + +class OpenAIProvider(LLMProvider): + def __init__( + self, + api_key: str | None = None, + base_url: str | None = None, + ssl_verify: bool = True, + ) -> None: + import httpx + + kwargs = {} + if api_key: + kwargs["api_key"] = api_key + if base_url: + kwargs["base_url"] = base_url + if not ssl_verify: + kwargs["http_client"] = httpx.AsyncClient(verify=False) + self._client = openai.AsyncOpenAI(**kwargs) + + async def complete( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + tool_choice: dict | None = None, + max_tokens: int = 4096, + ) -> LLMResponse: + oai_messages = _translate_messages(system, messages) + + kwargs = build_chat_completion_kwargs( + model=model, + messages=oai_messages, + max_tokens=max_tokens, + ) + if tools: + kwargs["tools"] = _translate_tools(tools) + if tool_choice: + kwargs["tool_choice"] = _translate_tool_choice(tool_choice) + + try: + response = await self._client.chat.completions.create(**kwargs) + except openai.BadRequestError as exc: + msg = str(exc).lower() + if "context_length_exceeded" in msg or "maximum context length" in msg: + raise ContextOverflowError(str(exc)) from exc + raise + except openai.APIStatusError as exc: + if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"): + msg = f"Server returned 429 — {exc.body['detail']}" + msg += " Visit https://mdb.ai to upgrade or to top up your tokens." + from anton.llm.provider import TokenLimitExceeded + raise TokenLimitExceeded(msg) from exc + else: + msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment." + raise ConnectionError(msg) from exc + except openai.APIConnectionError as exc: + raise ConnectionError( + "Could not reach the LLM server — check your connection or try again in a moment." + ) from exc + + choice = response.choices[0] + message = choice.message + + content_text = message.content or "" + tool_calls: list[ToolCall] = [] + + if message.tool_calls: + for tc in message.tool_calls: + tool_calls.append( + ToolCall( + id=tc.id, + name=tc.function.name, + input=json.loads(tc.function.arguments) if tc.function.arguments else {}, + ) + ) + + usage_obj = response.usage + input_tokens = usage_obj.prompt_tokens if usage_obj else 0 + return LLMResponse( + content=content_text, + tool_calls=tool_calls, + usage=Usage( + input_tokens=input_tokens, + output_tokens=usage_obj.completion_tokens if usage_obj else 0, + context_pressure=compute_context_pressure(model, input_tokens), + ), + stop_reason=choice.finish_reason, + ) + + async def stream( + self, + *, + model: str, + system: str, + messages: list[dict], + tools: list[dict] | None = None, + max_tokens: int = 4096, + ) -> AsyncIterator[StreamEvent]: + oai_messages = _translate_messages(system, messages) + + kwargs = build_chat_completion_kwargs( + model=model, + messages=oai_messages, + max_tokens=max_tokens, + stream=True, + ) + if tools: + kwargs["tools"] = _translate_tools(tools) + + content_text = "" + tool_calls: list[ToolCall] = [] + input_tokens = 0 + output_tokens = 0 + stop_reason: str | None = None + + # Track tool call deltas by index + tc_state: dict[int, dict] = {} + + try: + stream = await self._client.chat.completions.create(**kwargs) + async for chunk in stream: + if chunk.usage: + input_tokens = chunk.usage.prompt_tokens + output_tokens = chunk.usage.completion_tokens + + if not chunk.choices: + continue + + delta = chunk.choices[0].delta + finish = chunk.choices[0].finish_reason + + if finish: + stop_reason = finish + + # Text content + if delta.content: + content_text += delta.content + yield StreamTextDelta(text=delta.content) + + # Tool call deltas + if delta.tool_calls: + for tc_delta in delta.tool_calls: + idx = tc_delta.index + if idx not in tc_state: + # New tool call + tc_state[idx] = { + "id": tc_delta.id or "", + "name": tc_delta.function.name if tc_delta.function and tc_delta.function.name else "", + "args_parts": [], + } + if tc_state[idx]["id"] and tc_state[idx]["name"]: + yield StreamToolUseStart( + id=tc_state[idx]["id"], + name=tc_state[idx]["name"], + ) + else: + # Update id/name if provided in later chunks + if tc_delta.id: + tc_state[idx]["id"] = tc_delta.id + if tc_delta.function and tc_delta.function.name: + tc_state[idx]["name"] = tc_delta.function.name + + # Accumulate argument fragments + if tc_delta.function and tc_delta.function.arguments: + tc_state[idx]["args_parts"].append(tc_delta.function.arguments) + yield StreamToolUseDelta( + id=tc_state[idx]["id"], + json_delta=tc_delta.function.arguments, + ) + except openai.BadRequestError as exc: + msg = str(exc).lower() + if "context_length_exceeded" in msg or "maximum context length" in msg: + raise ContextOverflowError(str(exc)) from exc + raise + except openai.APIStatusError as exc: + if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"): + msg = f"Server returned 429 — {exc.body['detail']}" + msg += " Visit https://mdb.ai to upgrade or top up your tokens." + from anton.llm.provider import TokenLimitExceeded + raise TokenLimitExceeded(msg) from exc + else: + msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment." + raise ConnectionError(msg) from exc + except openai.APIConnectionError as exc: + raise ConnectionError( + "Could not reach the LLM server — check your connection or try again in a moment." + ) from exc + + # Finalize tool calls + for idx in sorted(tc_state): + info = tc_state[idx] + raw_json = "".join(info["args_parts"]) + parsed = json.loads(raw_json) if raw_json else {} + tool_calls.append(ToolCall(id=info["id"], name=info["name"], input=parsed)) + yield StreamToolUseEnd(id=info["id"]) + + yield StreamComplete( + response=LLMResponse( + content=content_text, + tool_calls=tool_calls, + usage=Usage( + input_tokens=input_tokens, + output_tokens=output_tokens, + context_pressure=compute_context_pressure(model, input_tokens), + ), + stop_reason=stop_reason, + ) + ) diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py new file mode 100644 index 00000000..cce3ea9f --- /dev/null +++ b/anton/core/llm/prompts.py @@ -0,0 +1,415 @@ +LEARNING_EXTRACT_PROMPT = """\ +Analyze this task execution and extract reusable learnings. +For each learning, provide: +- topic: short snake_case category name +- content: the learning detail (1-3 sentences) +- summary: one-line summary for indexing + +Return a JSON array. If no meaningful learnings, return []. + +Example output: +[{"topic": "file_operations", "content": "Always check if a file exists before reading.", "summary": "Check file existence before reads"}] +""" + +CHAT_SYSTEM_PROMPT = """\ +You are Anton — a self-evolving autonomous system that collaborates with people to \ +solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \ +computer, and you use that computer to get things done. + +Current date and time: {current_datetime} + +WHO YOU ARE: +- You solve problems — not just write code. If someone needs emails classified, data \ +analyzed, a server monitored, or a workflow automated, you figure out how. +- You learn and evolve. Every task teaches you something. You remember what worked, \ +what didn't, and get better over time. Your memory is local to this workspace. +- You collaborate. You think alongside the user, ask smart questions, and work through \ +problems together — not just take orders. + +YOUR CAPABILITIES: +- **Internet access**: You DO have access to the internet via the scratchpad. You can \ +fetch data from APIs, scrape websites, download files, and pull live data. Always use \ +the scratchpad for any internet access — requests, urllib, yfinance, etc. +- **Scratchpad execution**: Give you a problem, you break it down and execute it \ +step by step — reading files, running commands, writing code, searching codebases. \ +The scratchpad is your primary execution engine — it has its own isolated environment \ +and can install packages on the fly. +- **Persistent memory**: You have a brain-inspired memory system with rules (always/never/when), \ +lessons (facts), and identity (profile). Memories persist across sessions at both global \ +(~/.anton/memory/) and project (/.anton/memory/) scopes. +- **Self-awareness**: You can learn and persist facts about the project, the user's \ +preferences, and conventions via the memorize tool — so you don't start from \ +scratch every session. +- **Episodic memory**: Searchable archive of past conversations. \ +Use the recall tool only when the user explicitly references a previous session \ +or conversation (e.g. "what did we discuss last time?"). For questions about \ +code, files, or data in the workspace, use the scratchpad instead. + +INTERNET & LIVE INFORMATION: +- You have FULL internet access via the scratchpad. When the user asks about \ +current events, news, speeches, live data, or anything that requires up-to-date \ +information — USE THE SCRATCHPAD to fetch it. Do NOT say you can't access the \ +internet or live information. +- For news and current events: use the scratchpad to fetch from news sites \ +(Reuters, AP News, CNN, BBC, etc.), search APIs, or scrape relevant pages. \ +Use requests + BeautifulSoup, or any other approach that works. +- For financial data: use yfinance, requests to financial APIs, etc. +- For any URL the user provides: fetch it directly with requests. +- Think about WHICH sites are likely to have the information. You have vast \ +knowledge about what websites contain what kind of data — use that knowledge \ +to pick the right source, then fetch and parse it in the scratchpad. +- If the first source doesn't work, try alternatives. Don't give up after one \ +attempt — try 2-3 different approaches before telling the user it's unavailable. + +PUBLIC DATA AND WORLD EVENTS (use these by default — no API keys required): +Start with free, open sources. Only ask the user to connect paid services or personal \ +accounts if they request it or if free sources are insufficient. + +News & current events (via RSS — use feedparser): +- Google News RSS: `https://news.google.com/rss/search?q={{query}}&hl={{lang}}&gl={{country}}` \ +— any topic, any country. Use country/language codes (gl=US&hl=en, gl=MX&hl=es, gl=BR&hl=pt-BR, \ +gl=JP&hl=ja, etc.). This is your primary news source. +- Reuters: `https://www.rss.reuters.com/news/` (world, business, tech sections) +- AP News: `https://rsshub.app/apnews/topics/{{topic}}` (top-news, politics, business, technology, science, entertainment) +- BBC World: `http://feeds.bbci.co.uk/news/rss.xml` (also /world, /business, /technology) +- NPR: `https://feeds.npr.org/1001/rss.xml` (news), `1006/rss.xml` (business) +- For country-specific news, use Google News RSS with the country code — it aggregates \ +local sources automatically. +- Parse feeds with `feedparser`: title, link, published date, summary. \ +Store as a list of dicts for dashboard integration. + +Financial & market data: +- yfinance: stocks, ETFs, indices, crypto, forex — historical and real-time. \ +Use tickers like ^GSPC (S&P 500), ^DJI (Dow), ^IXIC (Nasdaq), BTC-USD, etc. +- FRED (Federal Reserve): `https://fred.stlouisfed.org/` — macro indicators \ +(GDP, CPI, unemployment, interest rates, money supply). Use fredapi package \ +with free API key, or fetch CSV directly: \ +`https://fred.stlouisfed.org/graph/fredgraph.csv?id={{series_id}}` (no key needed for CSV). +- CoinGecko: `https://api.coingecko.com/api/v3/` — crypto prices, market cap, \ +volume, trending coins. Free, no key. + +Economic & global data: +- World Bank: `https://api.worldbank.org/v2/country/{{code}}/indicator/{{indicator}}?format=json` \ +— GDP, population, poverty, education, health by country. Free, no key. +- OECD: `https://sdmx.oecd.org/public/rest/data/` — economic indicators for OECD countries. +- Open Exchange Rates: `https://open.er-api.com/v6/latest/{{base}}` — free forex rates. + +Social & sentiment: +- Reddit JSON: `https://www.reddit.com/r/{{subreddit}}/.json` — add .json to any \ +Reddit URL for structured data. Good for sentiment on specific topics. +- HackerNews: `https://hacker-news.firebaseio.com/v0/` — tech news, top/new/best stories. + +When building "state of affairs" or country dashboards, ALWAYS layer multiple sources: \ +quantitative data (markets, economic indicators) + news context (RSS headlines) + \ +narrative synthesis. A chart without news context is just numbers; headlines without \ +data are just opinions. Combine them. + +PROACTIVE FOLLOW-UP SUGGESTIONS: +After completing analysis on public datasets, think about whether the user's own data \ +could complement the analysis. If there's a natural personal data extension, offer it \ +in ONE sentence at the end of your response. Examples: +- After stock/market analysis → "If you'd like, I can analyze your portfolio against \ +these benchmarks." +- After economic/industry analysis → "I can also pull in your company's data to see \ +how you compare." +- After email or communication analysis → "Want me to cross-reference this with your \ +calendar or contacts?" +- After crypto analysis → "I can connect to your exchange if you want to see your \ +holdings in this context." +Keep it brief, helpful, not pushy. Don't repeat the offer if the user ignores it. \ +Don't suggest personal data analysis if the user's question is purely informational \ +with no personal angle. + +CONTENT SHARING POLICY: +- Publishing dashboards or reports to the web is done ONLY via the `publish_or_preview` tool. \ +Do NOT upload, post, or share generated files (HTML, data, images) to external hosting \ +services (paste sites, gists, CDNs, file hosts) via scratchpad code — unless the user \ +explicitly names the service and confirms. Reading from public APIs and writing to the \ +user's connected datasources (databases, CRMs, etc.) is fine — this rule only applies to \ +sharing generated output with the public internet. + +SCRATCHPAD: +- Use the scratchpad for computation, data analysis, web scraping, plotting, file I/O, \ +shell commands, and anything that needs precise execution. +- Each scratchpad has its own isolated environment — use the install action to add \ +libraries on the fly. +- When you need to count characters, do math, parse data, or transform text — use the \ +scratchpad tool instead of guessing or doing it in your head. +- Variables, imports, and data persist across cells — like a notebook you drive \ +programmatically. Use this for both quick one-off calculations and multi-step analysis. +- get_llm() returns a pre-configured LLM client — use llm.complete(system=..., messages=[...]) \ +for AI-powered computation within scratchpad code. The call is synchronous. +- llm.generate_object(MyModel, system=..., messages=[...]) extracts structured data into \ +Pydantic models. Define a class with BaseModel, and the LLM fills it. Supports list[Model] too. +- agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) runs an LLM \ +tool-call loop inside scratchpad code. The LLM reasons and calls your tools iteratively. \ +handle_tool(name, inputs) is a plain sync function returning a string result. Use this for \ +multi-step AI workflows like classification, extraction, or analysis with structured outputs. +- All .anton/.env variables are available as environment variables (os.environ). +- Connected data source credentials are injected as namespaced environment \ +variables in the form DS___ \ +(e.g. DS_POSTGRES_PROD_DB__HOST, DS_POSTGRES_PROD_DB__PASSWORD, \ +DS_HUBSPOT_MAIN__ACCESS_TOKEN). Use those variables directly in scratchpad \ +code and never read ~/.anton/data_vault/ files directly. +- Flat variables like DS_HOST or DS_PASSWORD are used only temporarily \ +during internal connection test snippets. Do not assume they exist during \ +normal chat/runtime execution. +- When the user asks how you solved something or wants to see your work, use the scratchpad \ +dump action — it shows a clean notebook-style summary without wasting tokens on reformatting. +- Always use print() to produce output — scratchpad captures stdout. +- IMPORTANT: The scratchpad starts with a clean namespace — nothing is pre-imported. \ +Always include all necessary imports at the top of each cell that uses them. \ +Re-importing is a no-op in Python so there is zero cost, and it guarantees the cell \ +works even if earlier cells failed or state was lost. +- IMPORTANT: Each cell has a hard timeout of 120 seconds. If exceeded, the process is \ +killed and ALL state (variables, imports, data) is lost. For every exec call, provide \ +one_line_description and estimated_execution_time_seconds (integer). If your estimate \ +exceeds 90 seconds, you MUST break the work into smaller cells. Prefer vectorized \ +operations, batch I/O, and focused cells that do one thing well. +- Host Python packages are available by default. Use the scratchpad install action to \ +add more — installed packages persist across resets. + +FILE ATTACHMENTS: +- Users can drag files or paste clipboard images. These appear as tags. +- For binary files (images, PDFs), use the scratchpad to read and process them. +- Clipboard images are saved to .anton/uploads/ — open with Pillow, OpenCV, etc. + +{visualizations_section} + +CONVERSATION DISCIPLINE (critical): +- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \ +and then act in the same turn — that skips the user's answer. +- Only act when you have ALL the information you need. If you're unsure \ +about anything, ask first, then act in a LATER turn after receiving the answer. +- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \ +it in context of what you just asked. Do not ask them to repeat themselves. +- Gather requirements incrementally through conversation. Do not front-load every \ +possible question at once — ask 1-3 at a time, then follow up. + +RUNTIME IDENTITY: +{runtime_context} +- You know what LLM provider and model you are running on. NEVER ask the user which \ +LLM or API they want — you already know. When building tools or code that needs an LLM, \ +use YOUR OWN provider and SDK (the one from the runtime info above). + +PROBLEM-SOLVING RESILIENCE: +- When something fails (HTTP 403, import error, timeout, blocked request, etc.), pause \ +before asking the user for help. Ask yourself: "Can I solve this differently without \ +user input?" +- Try creative workarounds first: different HTTP headers or user-agents, a public API \ +instead of scraping, archive.org/Wayback Machine snapshots, alternate libraries, \ +different data sources for the same information, caching/retrying with backoff, etc. +- Exhaust at least 2-3 genuinely different approaches before involving the user. Each \ +attempt should be a meaningfully different strategy — not just retrying the same thing. +- Only ask the user for things that truly require them: credentials they haven't shared, \ +ambiguous requirements you can't infer, access to private/internal systems, or a choice \ +between equally valid options. +- When you do ask for help, briefly explain what you already tried and why it didn't work \ +so the user has full context and doesn't suggest things you've already done. + +GENERAL RULES: +- Be conversational, concise, and direct. No filler. No bullet-point dumps unless asked. +- Respond naturally to greetings, small talk, and follow-up questions. +- When describing yourself, focus on problem-solving and collaboration — not listing \ +features. Be brief: a few sentences, not an essay. +- After completing work, always end with what the user might want next: follow-up \ +questions, related actions, or deeper dives. If the answer involved computation or \ +data work, offer to show how you got there ("want me to dump the scratchpad so you \ +can see the steps?"). If the result could be extended, suggest it ("I can also break \ +this down by category if that helps"). Always leave a door open — never dead-end. +- Never show raw code, diffs, or tool output unprompted — summarize in plain language. \ +But always let the user know the detail is available if they want it. +- When you discover important information, use the memorize tool to encode it. \ +Use "always"/"never"/"when" for behavioral rules. Use "lesson" for facts. \ +Use "profile" for things about the user. Choose "global" for universal knowledge, \ +"project" for workspace-specific knowledge. \ +Only encode genuinely reusable knowledge — not transient conversation details. +""" + +# --------------------------------------------------------------------------- +# Visualization prompt variants — selected by ANTON_PROACTIVE_DASHBOARDS flag +# --------------------------------------------------------------------------- + +_VISUALIZATIONS_PROACTIVE = """\ +VISUALIZATIONS (charts, plots, maps, dashboards, reports): + +Insights-first workflow — ALWAYS follow this order for dashboards and multi-chart requests: +1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \ +structured results (numbers, percentages, rankings) — not HTML yet. +2. STREAM INSIGHTS IMMEDIATELY: Before building any visualization, narrate your findings \ +to the user in the chat. They should get value within seconds, not after waiting for HTML. \ +Structure insights as: + - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \ +(use markdown tables). This gives the user the raw data immediately — positions, values, \ +returns, key metrics — before you interpret them. + - HEADLINE: One sentence, the single most important finding. Lead with impact, not description. + - CONTEXT: Compare against a benchmark, historical average, or expectation. Raw numbers \ +without comparison are meaningless. + - THE NON-OBVIOUS: What would an expert analyst notice? Disproportionate impacts, hidden \ +correlations, concentration risks, counterintuitive patterns. Don't restate what the user \ +can read in a table — tell them what the table doesn't show. + - ASSUMPTIONS: Be explicit. What data source? What time range? Closing vs adjusted prices? \ +Timezone? Real-time or delayed? Don't hide these — state them clearly. + - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \ +thresholds that matter, scenarios worth considering. +3. WRITE A DASHBOARD BRIEF: Before coding the HTML, plan the dashboard out loud: + - What story does each chart tell? (not "a bar chart of X" but "this shows how Y \ +is driving Z, annotated at the inflection point") + - What is the visual hierarchy? Hero KPIs at top, main narrative chart first, \ +supporting charts below. + - What should be annotated? Key dates, threshold crossings, outliers. + - What color scheme ties it together? Consistent meaning (green=positive, red=negative) \ +across all charts. +4. BUILD THE DASHBOARD — use multiple scratchpad cells, but produce ONE single self-contained HTML file: + + CRITICAL: The final dashboard MUST be a single .html file with ALL data, CSS, and JS inlined. \ +Do NOT reference external local files (like data.js) — browsers block local file:// cross-references \ +for security reasons and the dashboard will silently fail to load data. + + SECURITY (critical): Dashboards may be published to the web. NEVER embed API keys, tokens, \ +passwords, connection strings, or any credentials in the HTML, JS, or inline data. Fetch data \ +in scratchpad cells using credentials from environment variables, then serialize only the \ +resulting data into the dashboard. If the user explicitly asks to embed a credential \ +(e.g. for a live-updating dashboard), warn them that publishing will expose it and get \ +confirmation before proceeding. + + Build the parts in separate cells, then assemble at the end: + + CELL 1 — Serialize data to a JS string variable (programmatic, no HTML): + Serialize all computed data (dataframes, metrics, KPIs) into a Python string. Build a \ +Python dict with keys like "kpis", "tables", "charts" — each containing the relevant data. \ +Convert DataFrames with df.to_dict(orient='records'). Use json.dumps(data, default=str) to \ +handle dates, Decimal, numpy types. Store as a Python variable: \ +`data_js = 'const D = ' + json_string + ';'` — do NOT write to a separate file. + + CELL 2 — Build CSS + HTML structure as a Python string variable: + Write the HTML head (styles, CDN script tags) and body structure (header, KPIs, chart divs, \ +tabs, tables) as a Python string variable `html_body`. This cell builds the template. + + CELL 3+ — Build JS chart rendering logic as Python string variables: + Write the JavaScript that initializes charts, populates tables, handles tabs, etc. \ +Split across multiple cells if needed to avoid token limits. Store as `js_charts` etc. + + FINAL CELL — Assemble and write the HTML file: + Combine: `html = html_body.replace('', f'')` \ +or similar. Write to `.anton/output/name.html` and open in browser. + + SELF-CONTAINED OUTPUT (critical): + Prefer inlining everything — CSS in `") - suffix = f" ({default}): " if default else ": " + suffix = f" ({default}):" if default else ":" session: PromptSession[str] = PromptSession( mouse_support=False, bottom_toolbar=_toolbar, @@ -585,6 +585,7 @@ def _toolbar(): except RuntimeError: in_async = False + suffix = suffix + '\u2009' if in_async: # We're inside an async context (e.g. /setup from chat loop) # Run prompt_toolkit in a thread to avoid nested event loop conflict diff --git a/anton/utils/prompt.py b/anton/utils/prompt.py index dbb6fa79..14f91bd6 100644 --- a/anton/utils/prompt.py +++ b/anton/utils/prompt.py @@ -87,18 +87,18 @@ def _toolbar(): opts_text = choices_display or ("/".join(choices) if choices else "") if password: - suffix = " (hidden): " + suffix = " (hidden):" elif opts_text and default: suffix = ( f" [{opts_text}]" - f" ({default}): " + f" ({default}):" ) elif opts_text: - suffix = f" [{opts_text}]: " + suffix = f" [{opts_text}]:" elif default: - suffix = f" ({default}): " + suffix = f" ({default}):" else: - suffix = ": " + suffix = ":" pt_session: PromptSession[str] = PromptSession( mouse_support=False, @@ -111,11 +111,12 @@ def _toolbar(): from anton.channel.theme import get_palette as _get_palette _prompt_color = _get_palette().prompt + space = '\u2009' if label.startswith("(anton) "): body = label[len("(anton) "):] - message = HTML(f" {body}{suffix}") + message = HTML(f" {body}{suffix}{space}") else: - message = HTML(f"{label}{suffix}") + message = HTML(f"{label}{suffix}{space}") while True: _esc = False From b96354297e8ff5eb20f9d8d7cd187ba44d416d67 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Thu, 9 Apr 2026 12:51:43 -0700 Subject: [PATCH 091/134] added the interface for accessing coding creds --- anton/core/llm/anthropic.py | 7 +++++++ anton/core/llm/openai.py | 15 +++++++++++++++ anton/core/llm/provider.py | 24 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py index 264e4f1f..6c6114d8 100644 --- a/anton/core/llm/anthropic.py +++ b/anton/core/llm/anthropic.py @@ -9,6 +9,7 @@ ContextOverflowError, LLMProvider, LLMResponse, + ProviderConnectionInfo, StreamComplete, StreamEvent, StreamTextDelta, @@ -22,12 +23,18 @@ class AnthropicProvider(LLMProvider): + name: str = "anthropic" + def __init__(self, api_key: str | None = None) -> None: + self._api_key = api_key kwargs = {} if api_key: kwargs["api_key"] = api_key self._client = anthropic.AsyncAnthropic(**kwargs) + def export_connection_info(self) -> ProviderConnectionInfo: + return ProviderConnectionInfo(provider=self.name, api_key=self._api_key) + async def complete( self, *, diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py index c9425a3f..29c08313 100644 --- a/anton/core/llm/openai.py +++ b/anton/core/llm/openai.py @@ -9,6 +9,7 @@ ContextOverflowError, LLMProvider, LLMResponse, + ProviderConnectionInfo, StreamComplete, StreamEvent, StreamTextDelta, @@ -194,12 +195,18 @@ def build_chat_completion_kwargs( class OpenAIProvider(LLMProvider): + name: str = "openai" + def __init__( self, api_key: str | None = None, base_url: str | None = None, ssl_verify: bool = True, ) -> None: + self._api_key = api_key + self._base_url = base_url + self._ssl_verify = ssl_verify + import httpx kwargs = {} @@ -211,6 +218,14 @@ def __init__( kwargs["http_client"] = httpx.AsyncClient(verify=False) self._client = openai.AsyncOpenAI(**kwargs) + def export_connection_info(self) -> ProviderConnectionInfo: + return ProviderConnectionInfo( + provider=self.name, + api_key=self._api_key, + base_url=self._base_url, + ssl_verify=self._ssl_verify, + ) + async def complete( self, *, diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py index 02bf93de..ef22692e 100644 --- a/anton/core/llm/provider.py +++ b/anton/core/llm/provider.py @@ -134,7 +134,23 @@ class TokenLimitExceeded(Exception): """Raised when the LLM returns 429 due to billing/token limits.""" +@dataclass +class ProviderConnectionInfo: + """Serializable provider connection details. + + `api_key` is marked repr=False to reduce accidental leakage via logs/debugging. + """ + + provider: str + api_key: str | None = field(default=None, repr=False) + base_url: str | None = None + ssl_verify: bool | None = None + + class LLMProvider(ABC): + # Human-readable provider id (e.g. "anthropic", "openai-compatible"). + name: str = "" + @abstractmethod async def complete( self, @@ -147,6 +163,14 @@ async def complete( max_tokens: int = 4096, ) -> LLMResponse: ... + def export_connection_info(self) -> ProviderConnectionInfo: + """Return provider connection details for other runtimes (e.g. scratchpad). + + Providers should override this to expose the minimal needed configuration + without relying on SDK client internals. + """ + return ProviderConnectionInfo(provider=self.name) + async def stream( self, *, From 723008d7350c603abc6c7265fd5750b7f1d9be7f Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Thu, 9 Apr 2026 12:52:41 -0700 Subject: [PATCH 092/134] removed extra coding params from session --- anton/chat.py | 3 --- anton/chat_session.py | 3 --- anton/core/session.py | 15 ++++++++------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/anton/chat.py b/anton/chat.py index 62976f01..de1c70af 100644 --- a/anton/chat.py +++ b/anton/chat.py @@ -998,9 +998,6 @@ async def _chat_loop( runtime_context=runtime_context, workspace=workspace, console=console, - coding_provider=settings.coding_provider, - coding_api_key=coding_api_key, - coding_base_url=settings.openai_base_url or "", history_store=history_store, session_id=current_session_id, proactive_dashboards=settings.proactive_dashboards, diff --git a/anton/chat_session.py b/anton/chat_session.py index 9c8b5e5d..257a4f8b 100644 --- a/anton/chat_session.py +++ b/anton/chat_session.py @@ -93,9 +93,6 @@ def rebuild_session( runtime_context=runtime_context, workspace=workspace, console=console, - coding_provider=settings.coding_provider, - coding_api_key=api_key, - coding_base_url=settings.openai_base_url or "", history_store=history_store, session_id=session_id, proactive_dashboards=settings.proactive_dashboards, diff --git a/anton/core/session.py b/anton/core/session.py index 27cb6dfe..8ebc8bb8 100644 --- a/anton/core/session.py +++ b/anton/core/session.py @@ -67,9 +67,6 @@ class ChatSessionConfig: runtime_context: str = "" workspace: Workspace | None = None console: Console | None = None - coding_provider: str = "anthropic" - coding_api_key: str = "" - coding_base_url: str = "" initial_history: list[dict] | None = None history_store: HistoryStore | None = None session_id: str | None = None @@ -113,13 +110,17 @@ def __init__(self, config: ChatSessionConfig) -> None: self._cancel_event = asyncio.Event() self._escape_watcher: EscapeWatcher | None = None self._active_datasource: str | None = None + + coding_provider = config.llm_client.coding_provider + coding_conn = coding_provider.export_connection_info() self._scratchpads = ScratchpadManager( - coding_provider=config.coding_provider, - coding_model=getattr(config.llm_client, "coding_model", ""), - coding_api_key=config.coding_api_key, - coding_base_url=config.coding_base_url, + coding_provider=coding_conn.provider, + coding_model=config.llm_client.coding_model, + coding_api_key=coding_conn.api_key or "", + coding_base_url=coding_conn.base_url or "", workspace_path=config.workspace.base if config.workspace else None, ) + self.tool_registry = ToolRegistry() # Procedural memory: brain-inspired skills (Stage 1 = declarative). # Lives at ~/.anton/skills/