From dba0103da51e3fe88ff84ee86421d5e3adc50b3d Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 13:41:23 -0700
Subject: [PATCH 001/134] introduced the core pkg for Anton

---
 anton/core/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 anton/core/__init__.py

diff --git a/anton/core/__init__.py b/anton/core/__init__.py
new file mode 100644
index 00000000..e69de29b

From eca2051c035e70e819a70ac7cdcafb52e66731c8 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 14:48:40 -0700
Subject: [PATCH 002/134] added pkgs for core components

---
 anton/core/backends/__init__.py | 0
 anton/core/llm/__init__.py      | 0
 anton/core/memory/__init__.py   | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 anton/core/backends/__init__.py
 create mode 100644 anton/core/llm/__init__.py
 create mode 100644 anton/core/memory/__init__.py

diff --git a/anton/core/backends/__init__.py b/anton/core/backends/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/anton/core/llm/__init__.py b/anton/core/llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/anton/core/memory/__init__.py b/anton/core/memory/__init__.py
new file mode 100644
index 00000000..e69de29b

From 8bca73076047989dc2e0614577420a6bef686c7a Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 15:49:09 -0700
Subject: [PATCH 003/134] separated the core chat session

---
 anton/core/session.py | 1183 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1183 insertions(+)
 create mode 100644 anton/core/session.py

diff --git a/anton/core/session.py b/anton/core/session.py
new file mode 100644
index 00000000..b4253542
--- /dev/null
+++ b/anton/core/session.py
@@ -0,0 +1,1183 @@
+from __future__ import annotations
+
+import asyncio
+from collections.abc import AsyncIterator
+from typing import TYPE_CHECKING
+
+from anton.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt
+from anton.llm.provider import (
+    ContextOverflowError,
+    StreamComplete,
+    StreamContextCompacted,
+    StreamEvent,
+    StreamTaskProgress,
+    StreamTextDelta,
+    StreamToolResult,
+)
+from anton.scratchpad import ScratchpadManager
+from anton.tools import (
+    CONNECT_DATASOURCE_TOOL,
+    MEMORIZE_TOOL,
+    PUBLISH_TOOL,
+    RECALL_TOOL,
+    SCRATCHPAD_TOOL,
+    dispatch_tool,
+    format_cell_result,
+    prepare_scratchpad_exec,
+)
+
+from anton.utils.datasources import (
+    build_datasource_context,
+    scrub_credentials,
+)
+
+if TYPE_CHECKING:
+    from rich.console import Console
+    from anton.context.self_awareness import SelfAwarenessContext
+    from anton.llm.client import LLMClient
+    from anton.memory.cortex import Cortex
+    from anton.memory.episodes import EpisodicMemory
+    from anton.memory.history_store import HistoryStore
+    from anton.workspace import Workspace
+
+
+_MAX_TOOL_ROUNDS = 25  # Hard limit on consecutive tool-call rounds per turn
+_MAX_CONTINUATIONS = 3  # Max times the verification loop can restart the tool loop
+_CONTEXT_PRESSURE_THRESHOLD = 0.7  # Trigger compaction when context is 70% full
+_MAX_CONSECUTIVE_ERRORS = 5  # Stop if the same tool fails this many times in a row
+_RESILIENCE_NUDGE_AT = 2  # Inject resilience nudge after this many consecutive errors
+_RESILIENCE_NUDGE = (
+    "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or "
+    "asking the user for help, try a creative workaround — different headers/user-agent, "
+    "a public API, archive.org, an alternate library, or a completely different data source. "
+    "Only involve the user if the problem truly requires something only they can provide."
+)
+
+# TODO: Is this enough for now?
+TOKEN_STATUS_CACHE_TTL = 60.0
+
+
+def _apply_error_tracking(
+    result_text: str,
+    tool_name: str,
+    error_streak: dict[str, int],
+    resilience_nudged: set[str],
+) -> str:
+    """Track consecutive errors per tool and append nudge/circuit-breaker messages."""
+    is_error = any(
+        marker in result_text
+        for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:")
+    )
+    if is_error:
+        error_streak[tool_name] = error_streak.get(tool_name, 0) + 1
+    else:
+        error_streak[tool_name] = 0
+        resilience_nudged.discard(tool_name)
+
+    streak = error_streak.get(tool_name, 0)
+    if streak >= _RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged:
+        result_text += _RESILIENCE_NUDGE
+        resilience_nudged.add(tool_name)
+
+    if streak >= _MAX_CONSECUTIVE_ERRORS:
+        result_text += (
+            f"\n\nSYSTEM: The '{tool_name}' tool has failed {_MAX_CONSECUTIVE_ERRORS} times "
+            "in a row. Stop retrying this approach. Either try a completely different "
+            "strategy or tell the user what's going wrong so they can help."
+        )
+
+    return result_text
+
+
+class ChatSession:
+    """Manages a multi-turn conversation with tool-call delegation."""
+
+    def __init__(
+        self,
+        llm_client: LLMClient,
+        *,
+        self_awareness: SelfAwarenessContext | None = None,
+        cortex: Cortex | None = None,
+        episodic: EpisodicMemory | None = None,
+        runtime_context: str = "",
+        workspace: Workspace | None = None,
+        console: Console | None = None,
+        coding_provider: str = "anthropic",
+        coding_api_key: str = "",
+        coding_base_url: str = "",
+        initial_history: list[dict] | None = None,
+        history_store: HistoryStore | None = None,
+        session_id: str | None = None,
+        proactive_dashboards: bool = False,
+    ) -> None:
+        self._llm = llm_client
+        self._self_awareness = self_awareness
+        self._cortex = cortex
+        self._episodic = episodic
+        self._runtime_context = runtime_context
+        self._proactive_dashboards = proactive_dashboards
+        self._workspace = workspace
+        self._console = console
+        self._history: list[dict] = list(initial_history) if initial_history else []
+        self._pending_memory_confirmations: list = []
+        self._turn_count = (
+            sum(1 for m in self._history if m.get("role") == "user")
+            if initial_history
+            else 0
+        )
+        self._history_store = history_store
+        self._session_id = session_id
+        self._cancel_event = asyncio.Event()
+        self._escape_watcher: "EscapeWatcher | None" = None
+        self._active_datasource: str | None = None
+        self._scratchpads = ScratchpadManager(
+            coding_provider=coding_provider,
+            coding_model=getattr(llm_client, "coding_model", ""),
+            coding_api_key=coding_api_key,
+            coding_base_url=coding_base_url,
+            workspace_path=workspace.base if workspace else None,
+        )
+
+    @property
+    def history(self) -> list[dict]:
+        return self._history
+
+    def repair_history(self) -> None:
+        """Fix dangling tool_use blocks left by mid-stream cancellation.
+
+        The Anthropic API requires every tool_use to be followed by a
+        tool_result.  If we cancelled mid-turn, the last assistant message
+        may contain tool_use blocks with no corresponding tool_result in
+        the next message.  Append synthetic tool_results so the
+        conversation can continue.
+        """
+        if not self._history:
+            return
+        last = self._history[-1]
+        if last.get("role") != "assistant":
+            return
+        content = last.get("content")
+        if not isinstance(content, list):
+            return
+        tool_ids = [
+            block["id"]
+            for block in content
+            if isinstance(block, dict) and block.get("type") == "tool_use"
+        ]
+        if not tool_ids:
+            return
+        self._history.append(
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": tid,
+                        "content": "Cancelled by user.",
+                    }
+                    for tid in tool_ids
+                ],
+            }
+        )
+
+    def _persist_history(self) -> None:
+        """Save current history to disk if a history store is configured."""
+        if self._history_store and self._session_id:
+            self._history_store.save(self._session_id, self._history)
+
+    async def _build_system_prompt(self, user_message: str = "") -> str:
+        import datetime as _dt
+        _now = _dt.datetime.now()
+        _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p")
+
+        prompt = CHAT_SYSTEM_PROMPT.format(
+            runtime_context=self._runtime_context,
+            visualizations_section=build_visualizations_prompt(
+                self._proactive_dashboards
+            ),
+            current_datetime=_current_datetime,
+        )
+        # Inject memory context (replaces old self_awareness)
+        if self._cortex is not None:
+            memory_section = await self._cortex.build_memory_context(user_message)
+            if memory_section:
+                prompt += memory_section
+        elif self._self_awareness is not None:
+            # Fallback for legacy usage (tests, etc.)
+            sa_section = self._self_awareness.build_prompt_section()
+            if sa_section:
+                prompt += sa_section
+        # Inject anton.md project context (user-written takes priority)
+        if self._workspace is not None:
+            md_context = self._workspace.build_anton_md_context()
+            if md_context:
+                prompt += md_context
+        # Inject connected datasource context without credentials
+        ds_ctx = build_datasource_context(active_only=self._active_datasource)
+        if ds_ctx:
+            prompt += ds_ctx
+        return prompt
+
+    # Packages the LLM is most likely to care about when writing scratchpad code.
+    _NOTABLE_PACKAGES: set[str] = {
+        "numpy",
+        "pandas",
+        "matplotlib",
+        "seaborn",
+        "scipy",
+        "scikit-learn",
+        "requests",
+        "httpx",
+        "aiohttp",
+        "beautifulsoup4",
+        "lxml",
+        "pillow",
+        "sympy",
+        "networkx",
+        "sqlalchemy",
+        "pydantic",
+        "rich",
+        "tqdm",
+        "click",
+        "fastapi",
+        "flask",
+        "django",
+        "openai",
+        "anthropic",
+        "tiktoken",
+        "transformers",
+        "torch",
+        "polars",
+        "pyarrow",
+        "openpyxl",
+        "xlsxwriter",
+        "plotly",
+        "bokeh",
+        "altair",
+        "pytest",
+        "hypothesis",
+        "yaml",
+        "pyyaml",
+        "toml",
+        "tomli",
+        "tomllib",
+        "jinja2",
+        "markdown",
+        "pygments",
+        "cryptography",
+        "paramiko",
+        "boto3",
+    }
+
+    def _build_tools(self) -> list[dict]:
+        scratchpad_tool = dict(SCRATCHPAD_TOOL)
+        pkg_list = self._scratchpads._available_packages
+        if pkg_list:
+            notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES)
+            if notable:
+                pkg_line = ", ".join(notable)
+                extra = f"\n\nInstalled packages ({len(pkg_list)} total, notable: {pkg_line})."
+            else:
+                extra = f"\n\nInstalled packages: {len(pkg_list)} total (standard library plus dependencies)."
+            scratchpad_tool["description"] = SCRATCHPAD_TOOL["description"] + extra
+
+        # Inject scratchpad wisdom from memory (procedural priming)
+        if self._cortex is not None:
+            wisdom = self._cortex.get_scratchpad_context()
+            if wisdom:
+                scratchpad_tool[
+                    "description"
+                ] += f"\n\nLessons from past sessions:\n{wisdom}"
+
+        tools = [scratchpad_tool]
+        if self._cortex is not None:
+            tools.append(MEMORIZE_TOOL)
+        elif self._self_awareness is not None:
+            # Legacy fallback
+            from anton.tools import MEMORIZE_TOOL as _MT
+
+            tools.append(_MT)
+        if self._episodic is not None and self._episodic.enabled:
+            tools.append(RECALL_TOOL)
+        tools.append(CONNECT_DATASOURCE_TOOL)
+        tools.append(PUBLISH_TOOL)
+        return tools
+
+    async def close(self) -> None:
+        """Clean up scratchpads and other resources."""
+        await self._scratchpads.close_all()
+
+    async def _summarize_history(self) -> None:
+        """Compress old conversation turns into a summary using the coding model.
+
+        Splits history into old (first 60%) and recent (last 40%), keeping at
+        least 4 recent turns.  The old portion is summarized by the fast coding
+        model and replaced with a single user message.
+        """
+        if len(self._history) < 6:
+            return  # Too short to summarize
+
+        min_recent = 4
+        split = max(int(len(self._history) * 0.6), 1)
+        # Ensure we keep at least min_recent turns
+        split = min(split, len(self._history) - min_recent)
+        if split < 2:
+            return
+
+        # Walk split backward to avoid breaking tool_use / tool_result pairs.
+        # A user message containing tool_result blocks must stay with the
+        # preceding assistant message that contains the matching tool_use.
+        while split > 1:
+            msg = self._history[split]
+            if msg.get("role") != "user":
+                break
+            content = msg.get("content")
+            if not isinstance(content, list):
+                break
+            has_tool_result = any(
+                isinstance(b, dict) and b.get("type") == "tool_result" for b in content
+            )
+            if not has_tool_result:
+                break
+            # This user message has tool_results — keep it (and its paired
+            # assistant message) in the recent portion.
+            split -= 1
+            # Also pull back over the preceding assistant message so the
+            # pair stays together.
+            if split > 1 and self._history[split].get("role") == "assistant":
+                split -= 1
+
+        if split < 2:
+            return
+
+        old_turns = self._history[:split]
+        recent_turns = self._history[split:]
+
+        # Serialize old turns into text for summarization
+        lines: list[str] = []
+        for msg in old_turns:
+            role = msg.get("role", "unknown")
+            content = msg.get("content", "")
+            if isinstance(content, str):
+                lines.append(f"[{role}]: {content[:2000]}")
+            elif isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "text":
+                            lines.append(f"[{role}]: {block['text'][:1000]}")
+                        elif block.get("type") == "tool_use":
+                            lines.append(
+                                f"[{role}/tool_use]: {block.get('name', '')}({str(block.get('input', ''))[:500]})"
+                            )
+                        elif block.get("type") == "tool_result":
+                            lines.append(
+                                f"[tool_result]: {str(block.get('content', ''))[:500]}"
+                            )
+
+        old_text = "\n".join(lines)
+        # Cap at ~8000 chars to avoid overloading the summarizer
+        if len(old_text) > 8000:
+            old_text = old_text[:8000] + "\n... (truncated)"
+
+        try:
+            summary_response = await self._llm.code(
+                system=(
+                    "Summarize this conversation history concisely. Preserve:\n"
+                    "- Key decisions and conclusions\n"
+                    "- Important data/results discovered\n"
+                    "- Variable names and values that are still relevant\n"
+                    "- Errors encountered and how they were resolved\n"
+                    "Keep it under 2000 tokens. Use bullet points."
+                ),
+                messages=[{"role": "user", "content": old_text}],
+                max_tokens=2048,
+            )
+            summary = summary_response.content or "(summary unavailable)"
+        except Exception:
+            # If summarization fails, just do a simple truncation
+            summary = f"(Earlier conversation with {len(old_turns)} turns — summarization failed)"
+
+        summary_msg = {
+            "role": "user",
+            "content": f"[Context summary of earlier conversation]\n{summary}",
+        }
+
+        # If the recent portion starts with a user message, insert a minimal
+        # assistant separator to avoid consecutive user messages (API error).
+        if recent_turns and recent_turns[0].get("role") == "user":
+            self._history = [
+                summary_msg,
+                {"role": "assistant", "content": "Understood."},
+                *recent_turns,
+            ]
+        else:
+            self._history = [summary_msg] + recent_turns
+
+    def _compact_scratchpads(self) -> bool:
+        """Compact all active scratchpads. Returns True if any were compacted."""
+        compacted = False
+        for pad in self._scratchpads._pads.values():
+            if pad._compact_cells():
+                compacted = True
+        return compacted
+
+    async def turn(self, user_input: str | list[dict]) -> str:
+        self._history.append({"role": "user", "content": user_input})
+
+        user_msg_str = user_input if isinstance(user_input, str) else ""
+        system = await self._build_system_prompt(user_msg_str)
+        tools = self._build_tools()
+
+        try:
+            response = await self._llm.plan(
+                system=system,
+                messages=self._history,
+                tools=tools,
+            )
+        except ContextOverflowError:
+            await self._summarize_history()
+            self._compact_scratchpads()
+            response = await self._llm.plan(
+                system=system,
+                messages=self._history,
+                tools=tools,
+            )
+
+        # Proactive compaction
+        if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD:
+            await self._summarize_history()
+            self._compact_scratchpads()
+
+        # Handle tool calls
+        tool_round = 0
+        error_streak: dict[str, int] = {}
+        resilience_nudged: set[str] = set()
+
+        while response.tool_calls:
+            tool_round += 1
+            if tool_round > _MAX_TOOL_ROUNDS:
+                self._history.append(
+                    {"role": "assistant", "content": response.content or ""}
+                )
+                self._history.append(
+                    {
+                        "role": "user",
+                        "content": (
+                            f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
+                            "Pause here. Summarize what you have accomplished so far and what remains. "
+                            "If you believe you are on a good track and can finish the task with more steps, "
+                            "tell the user and ask if they'd like you to continue. "
+                            "Do NOT retry automatically — wait for the user's response."
+                        ),
+                    }
+                )
+                response = await self._llm.plan(
+                    system=system,
+                    messages=self._history,
+                )
+                break
+
+            # Build assistant message with content blocks
+            assistant_content: list[dict] = []
+            if response.content:
+                assistant_content.append({"type": "text", "text": response.content})
+            for tc in response.tool_calls:
+                assistant_content.append(
+                    {
+                        "type": "tool_use",
+                        "id": tc.id,
+                        "name": tc.name,
+                        "input": tc.input,
+                    }
+                )
+            self._history.append({"role": "assistant", "content": assistant_content})
+
+            # Process each tool call via registry
+            tool_results: list[dict] = []
+            for tc in response.tool_calls:
+                try:
+                    result_text = await dispatch_tool(self, tc.name, tc.input)
+                except Exception as exc:
+                    result_text = f"Tool '{tc.name}' failed: {exc}"
+
+                result_text = scrub_credentials(result_text)
+                result_text = _apply_error_tracking(
+                    result_text,
+                    tc.name,
+                    error_streak,
+                    resilience_nudged,
+                )
+
+                tool_results.append(
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": tc.id,
+                        "content": result_text,
+                    }
+                )
+
+            self._history.append({"role": "user", "content": tool_results})
+
+            # Get follow-up from LLM
+            try:
+                response = await self._llm.plan(
+                    system=system,
+                    messages=self._history,
+                    tools=tools,
+                )
+            except ContextOverflowError:
+                await self._summarize_history()
+                self._compact_scratchpads()
+                response = await self._llm.plan(
+                    system=system,
+                    messages=self._history,
+                    tools=tools,
+                )
+
+            # Proactive compaction during tool loop
+            if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD:
+                await self._summarize_history()
+                self._compact_scratchpads()
+
+        # Text-only response
+        reply = response.content or ""
+        self._history.append({"role": "assistant", "content": reply})
+
+        # Periodic memory vacuum (Systems Consolidation)
+        if self._cortex is not None and self._cortex.mode != "off":
+            self._cortex.maybe_vacuum()
+
+        return reply
+
+    async def turn_stream(
+        self, user_input: str | list[dict]
+    ) -> AsyncIterator[StreamEvent]:
+        """Streaming version of turn(). Yields events as they arrive."""
+        self._history.append({"role": "user", "content": user_input})
+
+        # Log user input to episodic memory
+        if self._episodic is not None:
+            content = (
+                user_input if isinstance(user_input, str) else str(user_input)[:2000]
+            )
+            self._episodic.log_turn(self._turn_count + 1, "user", content)
+
+        user_msg_str = user_input if isinstance(user_input, str) else ""
+        assistant_text_parts: list[str] = []
+        _max_auto_retries = 2
+        _retry_count = 0
+
+        while True:
+            try:
+                async for event in self._stream_and_handle_tools(user_msg_str):
+                    if isinstance(event, StreamTextDelta):
+                        assistant_text_parts.append(event.text)
+                    yield event
+                break  # completed successfully
+            except Exception as _agent_exc:
+                _retry_count += 1
+                if _retry_count <= _max_auto_retries:
+                    # Inject the error into history and let the LLM try to recover
+                    self._history.append(
+                        {
+                            "role": "user",
+                            "content": (
+                                f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n"
+                                "If you can diagnose and fix the issue, continue working on the task. "
+                                "Adjust your approach to avoid the same error. "
+                                "If this is unrecoverable, summarize what you accomplished and suggest next steps."
+                            ),
+                        }
+                    )
+                    # Continue the while loop — _stream_and_handle_tools will be called
+                    # again with the error context now in history
+                    continue
+                else:
+                    # Exhausted retries — stop and summarize for the user
+                    self._history.append(
+                        {
+                            "role": "user",
+                            "content": (
+                                f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n"
+                                "Stop retrying. Please:\n"
+                                "1. Summarize what you accomplished so far.\n"
+                                "2. Explain what went wrong in plain language.\n"
+                                "3. Suggest next steps — what the user can try (e.g. rephrase, "
+                                "simplify the request, or ask you to continue from where you left off).\n"
+                                "Be concise and helpful."
+                            ),
+                        }
+                    )
+                    try:
+                        async for event in self._llm.plan_stream(
+                            system=await self._build_system_prompt(user_msg_str),
+                            messages=self._history,
+                        ):
+                            if isinstance(event, StreamTextDelta):
+                                assistant_text_parts.append(event.text)
+                            yield event
+                    except Exception:
+                        fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request."
+                        assistant_text_parts.append(fallback)
+                        yield StreamTextDelta(text=fallback)
+                    break
+
+        # Log assistant response to episodic memory
+        if self._episodic is not None and assistant_text_parts:
+            self._episodic.log_turn(
+                self._turn_count + 1,
+                "assistant",
+                "".join(assistant_text_parts)[:2000],
+            )
+
+        # Identity extraction (Default Mode Network — every 5 turns)
+        self._turn_count += 1
+        self._persist_history()
+        if self._cortex is not None and self._cortex.mode != "off":
+            if self._turn_count % 5 == 0 and isinstance(user_input, str):
+                asyncio.create_task(self._cortex.maybe_update_identity(user_input))
+            # Periodic memory vacuum (Systems Consolidation)
+            self._cortex.maybe_vacuum()
+
+    async def _stream_and_handle_tools(
+        self, user_message: str = ""
+    ) -> AsyncIterator[StreamEvent]:
+        """Stream one LLM call, handle tool loops, yield all events."""
+        system = await self._build_system_prompt(user_message)
+        tools = self._build_tools()
+
+        # Guard against summarizing an already-summarized history within the same
+        # turn (e.g. ContextOverflowError on first call + pressure > threshold on
+        # the tool-loop follow-up would previously produce a summary of a summary).
+        _compacted_this_turn = False
+
+        response: StreamComplete | None = None
+
+        try:
+            async for event in self._llm.plan_stream(
+                system=system,
+                messages=self._history,
+                tools=tools,
+            ):
+                yield event
+                if isinstance(event, StreamComplete):
+                    response = event
+        except ContextOverflowError:
+            await self._summarize_history()
+            self._compact_scratchpads()
+            _compacted_this_turn = True
+            yield StreamContextCompacted(
+                message="Context was getting long — older history has been summarized."
+            )
+            async for event in self._llm.plan_stream(
+                system=system,
+                messages=self._history,
+                tools=tools,
+            ):
+                yield event
+                if isinstance(event, StreamComplete):
+                    response = event
+
+        if response is None:
+            return
+
+        llm_response = response.response
+
+        # Detect max_tokens truncation — the LLM was cut off mid-response.
+        # Inject a continuation prompt so it can finish what it was doing.
+        if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls:
+            self._history.append(
+                {"role": "assistant", "content": llm_response.content or ""}
+            )
+            self._history.append(
+                {
+                    "role": "user",
+                    "content": (
+                        "SYSTEM: Your response was truncated because it exceeded the output token limit. "
+                        "Continue exactly where you left off. If you were about to call a tool, "
+                        "call it now. If the code you were writing was too long, split it into smaller parts."
+                    ),
+                }
+            )
+            response = None
+            try:
+                async for event in self._llm.plan_stream(
+                    system=system,
+                    messages=self._history,
+                    tools=tools,
+                ):
+                    yield event
+                    if isinstance(event, StreamComplete):
+                        response = event
+            except ContextOverflowError:
+                if not _compacted_this_turn:
+                    await self._summarize_history()
+                    self._compact_scratchpads()
+                    _compacted_this_turn = True
+                yield StreamContextCompacted(
+                    message="Context was getting long — older history has been summarized."
+                )
+                async for event in self._llm.plan_stream(
+                    system=system,
+                    messages=self._history,
+                    tools=tools,
+                ):
+                    yield event
+                    if isinstance(event, StreamComplete):
+                        response = event
+
+            if response is None:
+                return
+            llm_response = response.response
+
+        # Proactive compaction
+        if (
+            not _compacted_this_turn
+            and llm_response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD
+        ):
+            await self._summarize_history()
+            self._compact_scratchpads()
+            _compacted_this_turn = True
+            yield StreamContextCompacted(
+                message="Context was getting long — older history has been summarized."
+            )
+
+        # Tool-call loop with circuit breaker, wrapped in a completion
+        # verification outer loop that can restart the tool loop if the
+        # task isn't actually done yet.
+        continuation = 0
+        _max_rounds_hit = False
+
+        while True:  # Completion verification loop
+            tool_round = 0
+            error_streak: dict[str, int] = {}
+            resilience_nudged: set[str] = set()
+
+            while llm_response.tool_calls:
+                tool_round += 1
+                if tool_round > _MAX_TOOL_ROUNDS:
+                    _max_rounds_hit = True
+                    self._history.append(
+                        {"role": "assistant", "content": llm_response.content or ""}
+                    )
+                    self._history.append(
+                        {
+                            "role": "user",
+                            "content": (
+                                f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
+                                "Pause here. Summarize what you have accomplished so far and what remains. "
+                                "If you believe you are on a good track and can finish the task with more steps, "
+                                "tell the user and ask if they'd like you to continue. "
+                                "Do NOT retry automatically — wait for the user's response."
+                            ),
+                        }
+                    )
+                    async for event in self._llm.plan_stream(
+                        system=system,
+                        messages=self._history,
+                    ):
+                        yield event
+                    break
+
+                # Build assistant message with content blocks
+                assistant_content: list[dict] = []
+                if llm_response.content:
+                    assistant_content.append(
+                        {"type": "text", "text": llm_response.content}
+                    )
+                for tc in llm_response.tool_calls:
+                    assistant_content.append(
+                        {
+                            "type": "tool_use",
+                            "id": tc.id,
+                            "name": tc.name,
+                            "input": tc.input,
+                        }
+                    )
+                self._history.append(
+                    {"role": "assistant", "content": assistant_content}
+                )
+
+                # Process each tool call
+                tool_results: list[dict] = []
+                for tc in llm_response.tool_calls:
+                    if self._episodic is not None:
+                        self._episodic.log_turn(
+                            self._turn_count + 1,
+                            "tool_call",
+                            str(tc.input)[:2000],
+                            tool=tc.name,
+                        )
+
+                    try:
+                        if tc.name == "scratchpad" and tc.input.get("action") == "exec":
+                            # Inline streaming exec — yields progress events
+                            prep = await prepare_scratchpad_exec(self, tc.input)
+                            if isinstance(prep, str):
+                                result_text = prep
+                            else:
+                                (
+                                    pad,
+                                    code,
+                                    description,
+                                    estimated_time,
+                                    estimated_seconds,
+                                ) = prep
+                                yield StreamTaskProgress(
+                                    phase="scratchpad_start",
+                                    message=description or "Running code",
+                                    eta_seconds=estimated_seconds,
+                                )
+                                import time as _time
+
+                                _sp_t0 = _time.monotonic()
+                                from anton.scratchpad import Cell
+
+                                cell = None
+                                async for item in pad.execute_streaming(
+                                    code,
+                                    description=description,
+                                    estimated_time=estimated_time,
+                                    estimated_seconds=estimated_seconds,
+                                    cancel_event=self._cancel_event,
+                                ):
+                                    if isinstance(item, str):
+                                        yield StreamTaskProgress(
+                                            phase="scratchpad", message=item
+                                        )
+                                    elif isinstance(item, Cell):
+                                        cell = item
+                                _sp_elapsed = _time.monotonic() - _sp_t0
+                                yield StreamTaskProgress(
+                                    phase="scratchpad_done",
+                                    message=description or "Done",
+                                    eta_seconds=_sp_elapsed,
+                                )
+                                result_text = (
+                                    format_cell_result(cell)
+                                    if cell
+                                    else "No result produced."
+                                )
+                                if self._episodic is not None and cell is not None:
+                                    self._episodic.log_turn(
+                                        self._turn_count + 1,
+                                        "scratchpad",
+                                        (cell.stdout or "")[:2000],
+                                        description=description,
+                                    )
+                        elif tc.name == "connect_new_datasource" or (
+                            tc.name == "publish_or_preview" and tc.input.get("action") == "publish"
+                        ):
+                            # Interactive tool — pause spinner AND escape watcher
+                            yield StreamTaskProgress(
+                                phase="interactive",
+                                message="",
+                            )
+                            if self._escape_watcher:
+                                self._escape_watcher.pause()
+                            result_text = await dispatch_tool(self, tc.name, tc.input)
+                            if self._escape_watcher:
+                                self._escape_watcher.resume()
+                            yield StreamTaskProgress(
+                                phase="analyzing",
+                                message="Analyzing results...",
+                            )
+                        else:
+                            result_text = await dispatch_tool(self, tc.name, tc.input)
+                            if (
+                                tc.name == "scratchpad"
+                                and tc.input.get("action") == "dump"
+                            ):
+                                yield StreamToolResult(content=result_text)
+                                result_text = (
+                                    "The full notebook has been displayed to the user above. "
+                                    "Do not repeat it. Here is the content for your reference:\n\n"
+                                    + result_text
+                                )
+                    except Exception as exc:
+                        result_text = f"Tool '{tc.name}' failed: {exc}"
+
+                    if self._episodic is not None:
+                        self._episodic.log_turn(
+                            self._turn_count + 1,
+                            "tool_result",
+                            result_text[:2000],
+                            tool=tc.name,
+                        )
+                    result_text = scrub_credentials(result_text)
+                    result_text = _apply_error_tracking(
+                        result_text, tc.name, error_streak, resilience_nudged
+                    )
+                    tool_results.append(
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": tc.id,
+                            "content": result_text,
+                        }
+                    )
+
+                self._history.append({"role": "user", "content": tool_results})
+
+                # Signal that tools are done and LLM is now analyzing
+                yield StreamTaskProgress(
+                    phase="analyzing", message="Analyzing results..."
+                )
+
+                # Stream follow-up
+                response = None
+                try:
+                    async for event in self._llm.plan_stream(
+                        system=system,
+                        messages=self._history,
+                        tools=tools,
+                    ):
+                        yield event
+                        if isinstance(event, StreamComplete):
+                            response = event
+                except ContextOverflowError:
+                    if not _compacted_this_turn:
+                        await self._summarize_history()
+                        self._compact_scratchpads()
+                        _compacted_this_turn = True
+                    yield StreamContextCompacted(
+                        message="Context was getting long — older history has been summarized."
+                    )
+                    async for event in self._llm.plan_stream(
+                        system=system,
+                        messages=self._history,
+                        tools=tools,
+                    ):
+                        yield event
+                        if isinstance(event, StreamComplete):
+                            response = event
+
+                if response is None:
+                    return
+                llm_response = response.response
+
+                # Detect max_tokens truncation inside tool loop
+                if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls:
+                    self._history.append(
+                        {"role": "assistant", "content": llm_response.content or ""}
+                    )
+                    self._history.append(
+                        {
+                            "role": "user",
+                            "content": (
+                                "SYSTEM: Your response was truncated because it exceeded the output token limit. "
+                                "Continue exactly where you left off. If you were about to call a tool, "
+                                "call it now. If the code you were writing was too long, split it into smaller parts."
+                            ),
+                        }
+                    )
+                    response = None
+                    try:
+                        async for event in self._llm.plan_stream(
+                            system=system,
+                            messages=self._history,
+                            tools=tools,
+                        ):
+                            yield event
+                            if isinstance(event, StreamComplete):
+                                response = event
+                    except ContextOverflowError:
+                        if not _compacted_this_turn:
+                            await self._summarize_history()
+                            self._compact_scratchpads()
+                            _compacted_this_turn = True
+                        yield StreamContextCompacted(
+                            message="Context was getting long — older history has been summarized."
+                        )
+                        async for event in self._llm.plan_stream(
+                            system=system,
+                            messages=self._history,
+                            tools=tools,
+                        ):
+                            yield event
+                            if isinstance(event, StreamComplete):
+                                response = event
+
+                    if response is None:
+                        return
+                    llm_response = response.response
+
+                # Proactive compaction during tool loop
+                if (
+                    not _compacted_this_turn
+                    and llm_response.usage.context_pressure
+                    > _CONTEXT_PRESSURE_THRESHOLD
+                ):
+                    await self._summarize_history()
+                    self._compact_scratchpads()
+                    _compacted_this_turn = True
+                    yield StreamContextCompacted(
+                        message="Context was getting long — older history has been summarized."
+                    )
+
+            # --- Completion verification ---
+            # Only verify when tools were actually used (not for simple Q&A)
+            # and we haven't hit the max-rounds hard stop.
+            if tool_round == 0 or _max_rounds_hit:
+                break
+
+            # Append the assistant's final text so the verifier can see it
+            reply = llm_response.content or ""
+            self._history.append({"role": "assistant", "content": reply})
+
+            if continuation >= _MAX_CONTINUATIONS:
+                # Budget exhausted — ask LLM to diagnose and present to user
+                self._history.append(
+                    {
+                        "role": "user",
+                        "content": (
+                            "SYSTEM: You have attempted to complete this task multiple times "
+                            "but verification indicates it is still not done. Do NOT try again. "
+                            "Instead:\n"
+                            "1. Summarize exactly what was accomplished so far.\n"
+                            "2. Identify the specific blocker or failure preventing completion.\n"
+                            "3. Suggest concrete next steps the user can take to unblock this.\n"
+                            "Be honest and specific — do not be vague about what went wrong."
+                        ),
+                    }
+                )
+                yield StreamTaskProgress(
+                    phase="analyzing", message="Diagnosing incomplete task..."
+                )
+                async for event in self._llm.plan_stream(
+                    system=system,
+                    messages=self._history,
+                ):
+                    yield event
+                # Consolidation still runs after diagnosis
+                break
+
+            # Ask the LLM to self-assess completion.
+            # Use a copy of history with a trailing user message so models
+            # that don't support assistant-prefill won't reject the request.
+            verify_messages = list(self._history) + [
+                {
+                    "role": "user",
+                    "content": (
+                        "SYSTEM: Evaluate whether the task the user originally requested "
+                        "has been fully completed based on the conversation above."
+                    ),
+                }
+            ]
+            verification = await self._llm.plan(
+                system=(
+                    "You are a task-completion verifier. Given the conversation, determine "
+                    "whether the user's original request has been fully completed.\n\n"
+                    "Respond with EXACTLY one of these lines, followed by a brief reason:\n"
+                    "STATUS: COMPLETE — <reason>\n"
+                    "STATUS: INCOMPLETE — <reason>\n"
+                    "STATUS: STUCK — <reason>\n\n"
+                    "COMPLETE = the task is done or the response fully answers the question.\n"
+                    "INCOMPLETE = more work can be done to finish the task.\n"
+                    "STUCK = a blocker prevents completion (missing info, permissions, etc).\n\n"
+                    "Be strict: if the user asked for X and only part of X was delivered, "
+                    "that is INCOMPLETE, not COMPLETE. But if the user asked a question "
+                    "and the assistant answered it, that is COMPLETE even without tool use."
+                ),
+                messages=verify_messages,
+                max_tokens=256,
+            )
+
+            status_text = (verification.content or "").strip().upper()
+            if "STATUS: COMPLETE" in status_text:
+                break
+            if "STATUS: STUCK" in status_text:
+                # Stuck — inject diagnosis request and let the LLM explain
+                reason = (verification.content or "").strip()
+                self._history.append(
+                    {
+                        "role": "user",
+                        "content": (
+                            f"SYSTEM: Task verification determined this task is stuck.\n"
+                            f"Verifier assessment: {reason}\n\n"
+                            "Explain to the user what went wrong, what you tried, and "
+                            "suggest specific next steps they can take to unblock this."
+                        ),
+                    }
+                )
+                yield StreamTaskProgress(
+                    phase="analyzing", message="Diagnosing blocked task..."
+                )
+                async for event in self._llm.plan_stream(
+                    system=system,
+                    messages=self._history,
+                ):
+                    yield event
+                break
+
+            # INCOMPLETE — continue working
+            continuation += 1
+            reason = (verification.content or "").strip()
+            self._history.append(
+                {
+                    "role": "user",
+                    "content": (
+                        f"SYSTEM: Task verification determined this task is not yet complete "
+                        f"(attempt {continuation}/{_MAX_CONTINUATIONS}).\n"
+                        f"Verifier assessment: {reason}\n\n"
+                        "Continue working on the original request. Pick up where you left off "
+                        "and finish the remaining work. Do not repeat work already done."
+                    ),
+                }
+            )
+            yield StreamTaskProgress(
+                phase="analyzing",
+                message=f"Task incomplete — continuing ({continuation}/{_MAX_CONTINUATIONS})...",
+            )
+
+            # Re-enter tool loop: get next LLM response with tools available
+            response = None
+            async for event in self._llm.plan_stream(
+                system=system,
+                messages=self._history,
+                tools=tools,
+            ):
+                yield event
+                if isinstance(event, StreamComplete):
+                    response = event
+            if response is None:
+                return
+            llm_response = response.response
+            # Loop back to the top of the completion verification loop
+
+        # Text-only final response — append to history (if not already appended
+        # by the verification block above).
+        if not self._history or self._history[-1].get("role") != "assistant":
+            reply = llm_response.content or ""
+            self._history.append({"role": "assistant", "content": reply})
+
+        # Consolidation: replay scratchpad sessions to extract lessons
+        if self._cortex is not None and self._cortex.mode != "off":
+            self._maybe_consolidate_scratchpads()
+
+    def _maybe_consolidate_scratchpads(self) -> None:
+        """Check if any scratchpad sessions warrant consolidation and fire it off."""
+        from anton.memory.consolidator import Consolidator
+
+        consolidator = Consolidator()
+        for pad in self._scratchpads._pads.values():
+            cells = list(pad.cells)
+            if consolidator.should_replay(cells):
+                asyncio.create_task(self._consolidate(cells))
+
+    async def _consolidate(self, cells: list) -> None:
+        """Run offline consolidation on a completed scratchpad session."""
+        from anton.memory.consolidator import Consolidator
+
+        consolidator = Consolidator()
+        engrams = await consolidator.replay_and_extract(cells, self._llm)
+        if not engrams or self._cortex is None:
+            return
+
+        auto_encode = [e for e in engrams if not self._cortex.encoding_gate(e)]
+        needs_confirm = [e for e in engrams if self._cortex.encoding_gate(e)]
+
+        if auto_encode:
+            await self._cortex.encode(auto_encode)
+
+        if needs_confirm:
+            self._pending_memory_confirmations.extend(needs_confirm)

From 7d29c061cc88d30e5eaaff2322b13d342e4d6c14 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 15:49:30 -0700
Subject: [PATCH 004/134] updated existing chat implementation based on core

---
 anton/chat.py | 1166 +------------------------------------------------
 1 file changed, 1 insertion(+), 1165 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 4722c1f9..5bd77801 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -1,14 +1,10 @@
 from __future__ import annotations
 
 import asyncio
-import json as _json
 import os
 import urllib.error
-import re as _re
 import sys
-import uuid
 import time
-from collections.abc import AsyncIterator, Callable
 from pathlib import Path
 from typing import TYPE_CHECKING
 
@@ -16,18 +12,15 @@
 
 from anton.clipboard import (
     cleanup_old_uploads,
-    clipboard_unavailable_reason,
     grab_clipboard,
     is_clipboard_supported,
     parse_dropped_paths as _parse_dropped_paths,
     save_clipboard_image,
 )
-from anton.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt
+from anton.core.session import ChatSession
 from anton.llm.provider import (
-    ContextOverflowError,
     StreamComplete,
     StreamContextCompacted,
-    StreamEvent,
     StreamTaskProgress,
     StreamTextDelta,
     StreamToolResult,
@@ -35,22 +28,10 @@
     StreamToolUseEnd,
     StreamToolUseStart,
 )
-from anton.scratchpad import ScratchpadManager
-from anton.tools import (
-    CONNECT_DATASOURCE_TOOL,
-    MEMORIZE_TOOL,
-    PUBLISH_TOOL,
-    RECALL_TOOL,
-    SCRATCHPAD_TOOL,
-    dispatch_tool,
-    format_cell_result,
-    prepare_scratchpad_exec,
-)
 from anton.checks import TokenLimitInfo, TokenLimitStatus, check_minds_token_limits
 from anton.commands.setup import (
     handle_memory,
     handle_setup,
-    handle_setup_memory,
     handle_setup_models,
 )
 from anton.commands.ui import handle_theme, print_slash_help
@@ -69,12 +50,6 @@
     handle_test_datasource,
 )
 from anton.utils.prompt import (
-    MINDS_KEYS,
-    LLM_KEYS,
-    SECRET_PATTERNS,
-    mask_secret,
-    is_secret_key,
-    display_value,
     prompt_or_cancel,
     prompt_minds_api_key,
 )
@@ -83,30 +58,19 @@
     normalize_minds_url,
     describe_minds_connection_error,
     list_minds,
-    get_mind,
-    refresh_knowledge,
     list_datasources,
     test_llm,
 )
 from anton.data_vault import DataVault
 from anton.utils.datasources import (
-    build_datasource_context,
     register_secret_vars,
-    restore_namespaced_env,
-    remove_engine_block,
-    scrub_credentials,
-    parse_connection_slug,
 )
 from anton.datasource_registry import (
-    DatasourceEngine,
-    DatasourceField,
     DatasourceRegistry,
 )
-from anton.llm.openai import build_chat_completion_kwargs
 
 from prompt_toolkit import PromptSession
 from prompt_toolkit.formatted_text import HTML
-from prompt_toolkit.key_binding import KeyBindings
 from prompt_toolkit.styles import Style as PTStyle
 from rich.prompt import Confirm, Prompt
 
@@ -138,1134 +102,6 @@
 TOKEN_STATUS_CACHE_TTL = 60.0
 
 
-
-class ChatSession:
-    """Manages a multi-turn conversation with tool-call delegation."""
-
-    def __init__(
-        self,
-        llm_client: LLMClient,
-        *,
-        self_awareness: SelfAwarenessContext | None = None,
-        cortex: Cortex | None = None,
-        episodic: EpisodicMemory | None = None,
-        runtime_context: str = "",
-        workspace: Workspace | None = None,
-        console: Console | None = None,
-        coding_provider: str = "anthropic",
-        coding_api_key: str = "",
-        coding_base_url: str = "",
-        initial_history: list[dict] | None = None,
-        history_store: HistoryStore | None = None,
-        session_id: str | None = None,
-        proactive_dashboards: bool = False,
-    ) -> None:
-        self._llm = llm_client
-        self._self_awareness = self_awareness
-        self._cortex = cortex
-        self._episodic = episodic
-        self._runtime_context = runtime_context
-        self._proactive_dashboards = proactive_dashboards
-        self._workspace = workspace
-        self._console = console
-        self._history: list[dict] = list(initial_history) if initial_history else []
-        self._pending_memory_confirmations: list = []
-        self._turn_count = (
-            sum(1 for m in self._history if m.get("role") == "user")
-            if initial_history
-            else 0
-        )
-        self._history_store = history_store
-        self._session_id = session_id
-        self._cancel_event = asyncio.Event()
-        self._escape_watcher: "EscapeWatcher | None" = None
-        self._active_datasource: str | None = None
-        self._scratchpads = ScratchpadManager(
-            coding_provider=coding_provider,
-            coding_model=getattr(llm_client, "coding_model", ""),
-            coding_api_key=coding_api_key,
-            coding_base_url=coding_base_url,
-            workspace_path=workspace.base if workspace else None,
-        )
-
-    @property
-    def history(self) -> list[dict]:
-        return self._history
-
-    def repair_history(self) -> None:
-        """Fix dangling tool_use blocks left by mid-stream cancellation.
-
-        The Anthropic API requires every tool_use to be followed by a
-        tool_result.  If we cancelled mid-turn, the last assistant message
-        may contain tool_use blocks with no corresponding tool_result in
-        the next message.  Append synthetic tool_results so the
-        conversation can continue.
-        """
-        if not self._history:
-            return
-        last = self._history[-1]
-        if last.get("role") != "assistant":
-            return
-        content = last.get("content")
-        if not isinstance(content, list):
-            return
-        tool_ids = [
-            block["id"]
-            for block in content
-            if isinstance(block, dict) and block.get("type") == "tool_use"
-        ]
-        if not tool_ids:
-            return
-        self._history.append(
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": tid,
-                        "content": "Cancelled by user.",
-                    }
-                    for tid in tool_ids
-                ],
-            }
-        )
-
-    def _persist_history(self) -> None:
-        """Save current history to disk if a history store is configured."""
-        if self._history_store and self._session_id:
-            self._history_store.save(self._session_id, self._history)
-
-    async def _build_system_prompt(self, user_message: str = "") -> str:
-        import datetime as _dt
-        _now = _dt.datetime.now()
-        _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p")
-
-        prompt = CHAT_SYSTEM_PROMPT.format(
-            runtime_context=self._runtime_context,
-            visualizations_section=build_visualizations_prompt(
-                self._proactive_dashboards
-            ),
-            current_datetime=_current_datetime,
-        )
-        # Inject memory context (replaces old self_awareness)
-        if self._cortex is not None:
-            memory_section = await self._cortex.build_memory_context(user_message)
-            if memory_section:
-                prompt += memory_section
-        elif self._self_awareness is not None:
-            # Fallback for legacy usage (tests, etc.)
-            sa_section = self._self_awareness.build_prompt_section()
-            if sa_section:
-                prompt += sa_section
-        # Inject anton.md project context (user-written takes priority)
-        if self._workspace is not None:
-            md_context = self._workspace.build_anton_md_context()
-            if md_context:
-                prompt += md_context
-        # Inject connected datasource context without credentials
-        ds_ctx = build_datasource_context(active_only=self._active_datasource)
-        if ds_ctx:
-            prompt += ds_ctx
-        return prompt
-
-    # Packages the LLM is most likely to care about when writing scratchpad code.
-    _NOTABLE_PACKAGES: set[str] = {
-        "numpy",
-        "pandas",
-        "matplotlib",
-        "seaborn",
-        "scipy",
-        "scikit-learn",
-        "requests",
-        "httpx",
-        "aiohttp",
-        "beautifulsoup4",
-        "lxml",
-        "pillow",
-        "sympy",
-        "networkx",
-        "sqlalchemy",
-        "pydantic",
-        "rich",
-        "tqdm",
-        "click",
-        "fastapi",
-        "flask",
-        "django",
-        "openai",
-        "anthropic",
-        "tiktoken",
-        "transformers",
-        "torch",
-        "polars",
-        "pyarrow",
-        "openpyxl",
-        "xlsxwriter",
-        "plotly",
-        "bokeh",
-        "altair",
-        "pytest",
-        "hypothesis",
-        "yaml",
-        "pyyaml",
-        "toml",
-        "tomli",
-        "tomllib",
-        "jinja2",
-        "markdown",
-        "pygments",
-        "cryptography",
-        "paramiko",
-        "boto3",
-    }
-
-    def _build_tools(self) -> list[dict]:
-        scratchpad_tool = dict(SCRATCHPAD_TOOL)
-        pkg_list = self._scratchpads._available_packages
-        if pkg_list:
-            notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES)
-            if notable:
-                pkg_line = ", ".join(notable)
-                extra = f"\n\nInstalled packages ({len(pkg_list)} total, notable: {pkg_line})."
-            else:
-                extra = f"\n\nInstalled packages: {len(pkg_list)} total (standard library plus dependencies)."
-            scratchpad_tool["description"] = SCRATCHPAD_TOOL["description"] + extra
-
-        # Inject scratchpad wisdom from memory (procedural priming)
-        if self._cortex is not None:
-            wisdom = self._cortex.get_scratchpad_context()
-            if wisdom:
-                scratchpad_tool[
-                    "description"
-                ] += f"\n\nLessons from past sessions:\n{wisdom}"
-
-        tools = [scratchpad_tool]
-        if self._cortex is not None:
-            tools.append(MEMORIZE_TOOL)
-        elif self._self_awareness is not None:
-            # Legacy fallback
-            from anton.tools import MEMORIZE_TOOL as _MT
-
-            tools.append(_MT)
-        if self._episodic is not None and self._episodic.enabled:
-            tools.append(RECALL_TOOL)
-        tools.append(CONNECT_DATASOURCE_TOOL)
-        tools.append(PUBLISH_TOOL)
-        return tools
-
-    async def close(self) -> None:
-        """Clean up scratchpads and other resources."""
-        await self._scratchpads.close_all()
-
-    async def _summarize_history(self) -> None:
-        """Compress old conversation turns into a summary using the coding model.
-
-        Splits history into old (first 60%) and recent (last 40%), keeping at
-        least 4 recent turns.  The old portion is summarized by the fast coding
-        model and replaced with a single user message.
-        """
-        if len(self._history) < 6:
-            return  # Too short to summarize
-
-        min_recent = 4
-        split = max(int(len(self._history) * 0.6), 1)
-        # Ensure we keep at least min_recent turns
-        split = min(split, len(self._history) - min_recent)
-        if split < 2:
-            return
-
-        # Walk split backward to avoid breaking tool_use / tool_result pairs.
-        # A user message containing tool_result blocks must stay with the
-        # preceding assistant message that contains the matching tool_use.
-        while split > 1:
-            msg = self._history[split]
-            if msg.get("role") != "user":
-                break
-            content = msg.get("content")
-            if not isinstance(content, list):
-                break
-            has_tool_result = any(
-                isinstance(b, dict) and b.get("type") == "tool_result" for b in content
-            )
-            if not has_tool_result:
-                break
-            # This user message has tool_results — keep it (and its paired
-            # assistant message) in the recent portion.
-            split -= 1
-            # Also pull back over the preceding assistant message so the
-            # pair stays together.
-            if split > 1 and self._history[split].get("role") == "assistant":
-                split -= 1
-
-        if split < 2:
-            return
-
-        old_turns = self._history[:split]
-        recent_turns = self._history[split:]
-
-        # Serialize old turns into text for summarization
-        lines: list[str] = []
-        for msg in old_turns:
-            role = msg.get("role", "unknown")
-            content = msg.get("content", "")
-            if isinstance(content, str):
-                lines.append(f"[{role}]: {content[:2000]}")
-            elif isinstance(content, list):
-                for block in content:
-                    if isinstance(block, dict):
-                        if block.get("type") == "text":
-                            lines.append(f"[{role}]: {block['text'][:1000]}")
-                        elif block.get("type") == "tool_use":
-                            lines.append(
-                                f"[{role}/tool_use]: {block.get('name', '')}({str(block.get('input', ''))[:500]})"
-                            )
-                        elif block.get("type") == "tool_result":
-                            lines.append(
-                                f"[tool_result]: {str(block.get('content', ''))[:500]}"
-                            )
-
-        old_text = "\n".join(lines)
-        # Cap at ~8000 chars to avoid overloading the summarizer
-        if len(old_text) > 8000:
-            old_text = old_text[:8000] + "\n... (truncated)"
-
-        try:
-            summary_response = await self._llm.code(
-                system=(
-                    "Summarize this conversation history concisely. Preserve:\n"
-                    "- Key decisions and conclusions\n"
-                    "- Important data/results discovered\n"
-                    "- Variable names and values that are still relevant\n"
-                    "- Errors encountered and how they were resolved\n"
-                    "Keep it under 2000 tokens. Use bullet points."
-                ),
-                messages=[{"role": "user", "content": old_text}],
-                max_tokens=2048,
-            )
-            summary = summary_response.content or "(summary unavailable)"
-        except Exception:
-            # If summarization fails, just do a simple truncation
-            summary = f"(Earlier conversation with {len(old_turns)} turns — summarization failed)"
-
-        summary_msg = {
-            "role": "user",
-            "content": f"[Context summary of earlier conversation]\n{summary}",
-        }
-
-        # If the recent portion starts with a user message, insert a minimal
-        # assistant separator to avoid consecutive user messages (API error).
-        if recent_turns and recent_turns[0].get("role") == "user":
-            self._history = [
-                summary_msg,
-                {"role": "assistant", "content": "Understood."},
-                *recent_turns,
-            ]
-        else:
-            self._history = [summary_msg] + recent_turns
-
-    def _compact_scratchpads(self) -> bool:
-        """Compact all active scratchpads. Returns True if any were compacted."""
-        compacted = False
-        for pad in self._scratchpads._pads.values():
-            if pad._compact_cells():
-                compacted = True
-        return compacted
-
-    async def turn(self, user_input: str | list[dict]) -> str:
-        self._history.append({"role": "user", "content": user_input})
-
-        user_msg_str = user_input if isinstance(user_input, str) else ""
-        system = await self._build_system_prompt(user_msg_str)
-        tools = self._build_tools()
-
-        try:
-            response = await self._llm.plan(
-                system=system,
-                messages=self._history,
-                tools=tools,
-            )
-        except ContextOverflowError:
-            await self._summarize_history()
-            self._compact_scratchpads()
-            response = await self._llm.plan(
-                system=system,
-                messages=self._history,
-                tools=tools,
-            )
-
-        # Proactive compaction
-        if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD:
-            await self._summarize_history()
-            self._compact_scratchpads()
-
-        # Handle tool calls
-        tool_round = 0
-        error_streak: dict[str, int] = {}
-        resilience_nudged: set[str] = set()
-
-        while response.tool_calls:
-            tool_round += 1
-            if tool_round > _MAX_TOOL_ROUNDS:
-                self._history.append(
-                    {"role": "assistant", "content": response.content or ""}
-                )
-                self._history.append(
-                    {
-                        "role": "user",
-                        "content": (
-                            f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
-                            "Pause here. Summarize what you have accomplished so far and what remains. "
-                            "If you believe you are on a good track and can finish the task with more steps, "
-                            "tell the user and ask if they'd like you to continue. "
-                            "Do NOT retry automatically — wait for the user's response."
-                        ),
-                    }
-                )
-                response = await self._llm.plan(
-                    system=system,
-                    messages=self._history,
-                )
-                break
-
-            # Build assistant message with content blocks
-            assistant_content: list[dict] = []
-            if response.content:
-                assistant_content.append({"type": "text", "text": response.content})
-            for tc in response.tool_calls:
-                assistant_content.append(
-                    {
-                        "type": "tool_use",
-                        "id": tc.id,
-                        "name": tc.name,
-                        "input": tc.input,
-                    }
-                )
-            self._history.append({"role": "assistant", "content": assistant_content})
-
-            # Process each tool call via registry
-            tool_results: list[dict] = []
-            for tc in response.tool_calls:
-                try:
-                    result_text = await dispatch_tool(self, tc.name, tc.input)
-                except Exception as exc:
-                    result_text = f"Tool '{tc.name}' failed: {exc}"
-
-                result_text = scrub_credentials(result_text)
-                result_text = _apply_error_tracking(
-                    result_text,
-                    tc.name,
-                    error_streak,
-                    resilience_nudged,
-                )
-
-                tool_results.append(
-                    {
-                        "type": "tool_result",
-                        "tool_use_id": tc.id,
-                        "content": result_text,
-                    }
-                )
-
-            self._history.append({"role": "user", "content": tool_results})
-
-            # Get follow-up from LLM
-            try:
-                response = await self._llm.plan(
-                    system=system,
-                    messages=self._history,
-                    tools=tools,
-                )
-            except ContextOverflowError:
-                await self._summarize_history()
-                self._compact_scratchpads()
-                response = await self._llm.plan(
-                    system=system,
-                    messages=self._history,
-                    tools=tools,
-                )
-
-            # Proactive compaction during tool loop
-            if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD:
-                await self._summarize_history()
-                self._compact_scratchpads()
-
-        # Text-only response
-        reply = response.content or ""
-        self._history.append({"role": "assistant", "content": reply})
-
-        # Periodic memory vacuum (Systems Consolidation)
-        if self._cortex is not None and self._cortex.mode != "off":
-            self._cortex.maybe_vacuum()
-
-        return reply
-
-    async def turn_stream(
-        self, user_input: str | list[dict]
-    ) -> AsyncIterator[StreamEvent]:
-        """Streaming version of turn(). Yields events as they arrive."""
-        self._history.append({"role": "user", "content": user_input})
-
-        # Log user input to episodic memory
-        if self._episodic is not None:
-            content = (
-                user_input if isinstance(user_input, str) else str(user_input)[:2000]
-            )
-            self._episodic.log_turn(self._turn_count + 1, "user", content)
-
-        user_msg_str = user_input if isinstance(user_input, str) else ""
-        assistant_text_parts: list[str] = []
-        _max_auto_retries = 2
-        _retry_count = 0
-
-        while True:
-            try:
-                async for event in self._stream_and_handle_tools(user_msg_str):
-                    if isinstance(event, StreamTextDelta):
-                        assistant_text_parts.append(event.text)
-                    yield event
-                break  # completed successfully
-            except Exception as _agent_exc:
-                _retry_count += 1
-                if _retry_count <= _max_auto_retries:
-                    # Inject the error into history and let the LLM try to recover
-                    self._history.append(
-                        {
-                            "role": "user",
-                            "content": (
-                                f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n"
-                                "If you can diagnose and fix the issue, continue working on the task. "
-                                "Adjust your approach to avoid the same error. "
-                                "If this is unrecoverable, summarize what you accomplished and suggest next steps."
-                            ),
-                        }
-                    )
-                    # Continue the while loop — _stream_and_handle_tools will be called
-                    # again with the error context now in history
-                    continue
-                else:
-                    # Exhausted retries — stop and summarize for the user
-                    self._history.append(
-                        {
-                            "role": "user",
-                            "content": (
-                                f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n"
-                                "Stop retrying. Please:\n"
-                                "1. Summarize what you accomplished so far.\n"
-                                "2. Explain what went wrong in plain language.\n"
-                                "3. Suggest next steps — what the user can try (e.g. rephrase, "
-                                "simplify the request, or ask you to continue from where you left off).\n"
-                                "Be concise and helpful."
-                            ),
-                        }
-                    )
-                    try:
-                        async for event in self._llm.plan_stream(
-                            system=await self._build_system_prompt(user_msg_str),
-                            messages=self._history,
-                        ):
-                            if isinstance(event, StreamTextDelta):
-                                assistant_text_parts.append(event.text)
-                            yield event
-                    except Exception:
-                        fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request."
-                        assistant_text_parts.append(fallback)
-                        yield StreamTextDelta(text=fallback)
-                    break
-
-        # Log assistant response to episodic memory
-        if self._episodic is not None and assistant_text_parts:
-            self._episodic.log_turn(
-                self._turn_count + 1,
-                "assistant",
-                "".join(assistant_text_parts)[:2000],
-            )
-
-        # Identity extraction (Default Mode Network — every 5 turns)
-        self._turn_count += 1
-        self._persist_history()
-        if self._cortex is not None and self._cortex.mode != "off":
-            if self._turn_count % 5 == 0 and isinstance(user_input, str):
-                asyncio.create_task(self._cortex.maybe_update_identity(user_input))
-            # Periodic memory vacuum (Systems Consolidation)
-            self._cortex.maybe_vacuum()
-
-    async def _stream_and_handle_tools(
-        self, user_message: str = ""
-    ) -> AsyncIterator[StreamEvent]:
-        """Stream one LLM call, handle tool loops, yield all events."""
-        system = await self._build_system_prompt(user_message)
-        tools = self._build_tools()
-
-        # Guard against summarizing an already-summarized history within the same
-        # turn (e.g. ContextOverflowError on first call + pressure > threshold on
-        # the tool-loop follow-up would previously produce a summary of a summary).
-        _compacted_this_turn = False
-
-        response: StreamComplete | None = None
-
-        try:
-            async for event in self._llm.plan_stream(
-                system=system,
-                messages=self._history,
-                tools=tools,
-            ):
-                yield event
-                if isinstance(event, StreamComplete):
-                    response = event
-        except ContextOverflowError:
-            await self._summarize_history()
-            self._compact_scratchpads()
-            _compacted_this_turn = True
-            yield StreamContextCompacted(
-                message="Context was getting long — older history has been summarized."
-            )
-            async for event in self._llm.plan_stream(
-                system=system,
-                messages=self._history,
-                tools=tools,
-            ):
-                yield event
-                if isinstance(event, StreamComplete):
-                    response = event
-
-        if response is None:
-            return
-
-        llm_response = response.response
-
-        # Detect max_tokens truncation — the LLM was cut off mid-response.
-        # Inject a continuation prompt so it can finish what it was doing.
-        if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls:
-            self._history.append(
-                {"role": "assistant", "content": llm_response.content or ""}
-            )
-            self._history.append(
-                {
-                    "role": "user",
-                    "content": (
-                        "SYSTEM: Your response was truncated because it exceeded the output token limit. "
-                        "Continue exactly where you left off. If you were about to call a tool, "
-                        "call it now. If the code you were writing was too long, split it into smaller parts."
-                    ),
-                }
-            )
-            response = None
-            try:
-                async for event in self._llm.plan_stream(
-                    system=system,
-                    messages=self._history,
-                    tools=tools,
-                ):
-                    yield event
-                    if isinstance(event, StreamComplete):
-                        response = event
-            except ContextOverflowError:
-                if not _compacted_this_turn:
-                    await self._summarize_history()
-                    self._compact_scratchpads()
-                    _compacted_this_turn = True
-                yield StreamContextCompacted(
-                    message="Context was getting long — older history has been summarized."
-                )
-                async for event in self._llm.plan_stream(
-                    system=system,
-                    messages=self._history,
-                    tools=tools,
-                ):
-                    yield event
-                    if isinstance(event, StreamComplete):
-                        response = event
-
-            if response is None:
-                return
-            llm_response = response.response
-
-        # Proactive compaction
-        if (
-            not _compacted_this_turn
-            and llm_response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD
-        ):
-            await self._summarize_history()
-            self._compact_scratchpads()
-            _compacted_this_turn = True
-            yield StreamContextCompacted(
-                message="Context was getting long — older history has been summarized."
-            )
-
-        # Tool-call loop with circuit breaker, wrapped in a completion
-        # verification outer loop that can restart the tool loop if the
-        # task isn't actually done yet.
-        continuation = 0
-        _max_rounds_hit = False
-
-        while True:  # Completion verification loop
-            tool_round = 0
-            error_streak: dict[str, int] = {}
-            resilience_nudged: set[str] = set()
-
-            while llm_response.tool_calls:
-                tool_round += 1
-                if tool_round > _MAX_TOOL_ROUNDS:
-                    _max_rounds_hit = True
-                    self._history.append(
-                        {"role": "assistant", "content": llm_response.content or ""}
-                    )
-                    self._history.append(
-                        {
-                            "role": "user",
-                            "content": (
-                                f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
-                                "Pause here. Summarize what you have accomplished so far and what remains. "
-                                "If you believe you are on a good track and can finish the task with more steps, "
-                                "tell the user and ask if they'd like you to continue. "
-                                "Do NOT retry automatically — wait for the user's response."
-                            ),
-                        }
-                    )
-                    async for event in self._llm.plan_stream(
-                        system=system,
-                        messages=self._history,
-                    ):
-                        yield event
-                    break
-
-                # Build assistant message with content blocks
-                assistant_content: list[dict] = []
-                if llm_response.content:
-                    assistant_content.append(
-                        {"type": "text", "text": llm_response.content}
-                    )
-                for tc in llm_response.tool_calls:
-                    assistant_content.append(
-                        {
-                            "type": "tool_use",
-                            "id": tc.id,
-                            "name": tc.name,
-                            "input": tc.input,
-                        }
-                    )
-                self._history.append(
-                    {"role": "assistant", "content": assistant_content}
-                )
-
-                # Process each tool call
-                tool_results: list[dict] = []
-                for tc in llm_response.tool_calls:
-                    if self._episodic is not None:
-                        self._episodic.log_turn(
-                            self._turn_count + 1,
-                            "tool_call",
-                            str(tc.input)[:2000],
-                            tool=tc.name,
-                        )
-
-                    try:
-                        if tc.name == "scratchpad" and tc.input.get("action") == "exec":
-                            # Inline streaming exec — yields progress events
-                            prep = await prepare_scratchpad_exec(self, tc.input)
-                            if isinstance(prep, str):
-                                result_text = prep
-                            else:
-                                (
-                                    pad,
-                                    code,
-                                    description,
-                                    estimated_time,
-                                    estimated_seconds,
-                                ) = prep
-                                yield StreamTaskProgress(
-                                    phase="scratchpad_start",
-                                    message=description or "Running code",
-                                    eta_seconds=estimated_seconds,
-                                )
-                                import time as _time
-
-                                _sp_t0 = _time.monotonic()
-                                from anton.scratchpad import Cell
-
-                                cell = None
-                                async for item in pad.execute_streaming(
-                                    code,
-                                    description=description,
-                                    estimated_time=estimated_time,
-                                    estimated_seconds=estimated_seconds,
-                                    cancel_event=self._cancel_event,
-                                ):
-                                    if isinstance(item, str):
-                                        yield StreamTaskProgress(
-                                            phase="scratchpad", message=item
-                                        )
-                                    elif isinstance(item, Cell):
-                                        cell = item
-                                _sp_elapsed = _time.monotonic() - _sp_t0
-                                yield StreamTaskProgress(
-                                    phase="scratchpad_done",
-                                    message=description or "Done",
-                                    eta_seconds=_sp_elapsed,
-                                )
-                                result_text = (
-                                    format_cell_result(cell)
-                                    if cell
-                                    else "No result produced."
-                                )
-                                if self._episodic is not None and cell is not None:
-                                    self._episodic.log_turn(
-                                        self._turn_count + 1,
-                                        "scratchpad",
-                                        (cell.stdout or "")[:2000],
-                                        description=description,
-                                    )
-                        elif tc.name == "connect_new_datasource" or (
-                            tc.name == "publish_or_preview" and tc.input.get("action") == "publish"
-                        ):
-                            # Interactive tool — pause spinner AND escape watcher
-                            yield StreamTaskProgress(
-                                phase="interactive",
-                                message="",
-                            )
-                            if self._escape_watcher:
-                                self._escape_watcher.pause()
-                            result_text = await dispatch_tool(self, tc.name, tc.input)
-                            if self._escape_watcher:
-                                self._escape_watcher.resume()
-                            yield StreamTaskProgress(
-                                phase="analyzing",
-                                message="Analyzing results...",
-                            )
-                        else:
-                            result_text = await dispatch_tool(self, tc.name, tc.input)
-                            if (
-                                tc.name == "scratchpad"
-                                and tc.input.get("action") == "dump"
-                            ):
-                                yield StreamToolResult(content=result_text)
-                                result_text = (
-                                    "The full notebook has been displayed to the user above. "
-                                    "Do not repeat it. Here is the content for your reference:\n\n"
-                                    + result_text
-                                )
-                    except Exception as exc:
-                        result_text = f"Tool '{tc.name}' failed: {exc}"
-
-                    if self._episodic is not None:
-                        self._episodic.log_turn(
-                            self._turn_count + 1,
-                            "tool_result",
-                            result_text[:2000],
-                            tool=tc.name,
-                        )
-                    result_text = scrub_credentials(result_text)
-                    result_text = _apply_error_tracking(
-                        result_text, tc.name, error_streak, resilience_nudged
-                    )
-                    tool_results.append(
-                        {
-                            "type": "tool_result",
-                            "tool_use_id": tc.id,
-                            "content": result_text,
-                        }
-                    )
-
-                self._history.append({"role": "user", "content": tool_results})
-
-                # Signal that tools are done and LLM is now analyzing
-                yield StreamTaskProgress(
-                    phase="analyzing", message="Analyzing results..."
-                )
-
-                # Stream follow-up
-                response = None
-                try:
-                    async for event in self._llm.plan_stream(
-                        system=system,
-                        messages=self._history,
-                        tools=tools,
-                    ):
-                        yield event
-                        if isinstance(event, StreamComplete):
-                            response = event
-                except ContextOverflowError:
-                    if not _compacted_this_turn:
-                        await self._summarize_history()
-                        self._compact_scratchpads()
-                        _compacted_this_turn = True
-                    yield StreamContextCompacted(
-                        message="Context was getting long — older history has been summarized."
-                    )
-                    async for event in self._llm.plan_stream(
-                        system=system,
-                        messages=self._history,
-                        tools=tools,
-                    ):
-                        yield event
-                        if isinstance(event, StreamComplete):
-                            response = event
-
-                if response is None:
-                    return
-                llm_response = response.response
-
-                # Detect max_tokens truncation inside tool loop
-                if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls:
-                    self._history.append(
-                        {"role": "assistant", "content": llm_response.content or ""}
-                    )
-                    self._history.append(
-                        {
-                            "role": "user",
-                            "content": (
-                                "SYSTEM: Your response was truncated because it exceeded the output token limit. "
-                                "Continue exactly where you left off. If you were about to call a tool, "
-                                "call it now. If the code you were writing was too long, split it into smaller parts."
-                            ),
-                        }
-                    )
-                    response = None
-                    try:
-                        async for event in self._llm.plan_stream(
-                            system=system,
-                            messages=self._history,
-                            tools=tools,
-                        ):
-                            yield event
-                            if isinstance(event, StreamComplete):
-                                response = event
-                    except ContextOverflowError:
-                        if not _compacted_this_turn:
-                            await self._summarize_history()
-                            self._compact_scratchpads()
-                            _compacted_this_turn = True
-                        yield StreamContextCompacted(
-                            message="Context was getting long — older history has been summarized."
-                        )
-                        async for event in self._llm.plan_stream(
-                            system=system,
-                            messages=self._history,
-                            tools=tools,
-                        ):
-                            yield event
-                            if isinstance(event, StreamComplete):
-                                response = event
-
-                    if response is None:
-                        return
-                    llm_response = response.response
-
-                # Proactive compaction during tool loop
-                if (
-                    not _compacted_this_turn
-                    and llm_response.usage.context_pressure
-                    > _CONTEXT_PRESSURE_THRESHOLD
-                ):
-                    await self._summarize_history()
-                    self._compact_scratchpads()
-                    _compacted_this_turn = True
-                    yield StreamContextCompacted(
-                        message="Context was getting long — older history has been summarized."
-                    )
-
-            # --- Completion verification ---
-            # Only verify when tools were actually used (not for simple Q&A)
-            # and we haven't hit the max-rounds hard stop.
-            if tool_round == 0 or _max_rounds_hit:
-                break
-
-            # Append the assistant's final text so the verifier can see it
-            reply = llm_response.content or ""
-            self._history.append({"role": "assistant", "content": reply})
-
-            if continuation >= _MAX_CONTINUATIONS:
-                # Budget exhausted — ask LLM to diagnose and present to user
-                self._history.append(
-                    {
-                        "role": "user",
-                        "content": (
-                            "SYSTEM: You have attempted to complete this task multiple times "
-                            "but verification indicates it is still not done. Do NOT try again. "
-                            "Instead:\n"
-                            "1. Summarize exactly what was accomplished so far.\n"
-                            "2. Identify the specific blocker or failure preventing completion.\n"
-                            "3. Suggest concrete next steps the user can take to unblock this.\n"
-                            "Be honest and specific — do not be vague about what went wrong."
-                        ),
-                    }
-                )
-                yield StreamTaskProgress(
-                    phase="analyzing", message="Diagnosing incomplete task..."
-                )
-                async for event in self._llm.plan_stream(
-                    system=system,
-                    messages=self._history,
-                ):
-                    yield event
-                # Consolidation still runs after diagnosis
-                break
-
-            # Ask the LLM to self-assess completion.
-            # Use a copy of history with a trailing user message so models
-            # that don't support assistant-prefill won't reject the request.
-            verify_messages = list(self._history) + [
-                {
-                    "role": "user",
-                    "content": (
-                        "SYSTEM: Evaluate whether the task the user originally requested "
-                        "has been fully completed based on the conversation above."
-                    ),
-                }
-            ]
-            verification = await self._llm.plan(
-                system=(
-                    "You are a task-completion verifier. Given the conversation, determine "
-                    "whether the user's original request has been fully completed.\n\n"
-                    "Respond with EXACTLY one of these lines, followed by a brief reason:\n"
-                    "STATUS: COMPLETE — <reason>\n"
-                    "STATUS: INCOMPLETE — <reason>\n"
-                    "STATUS: STUCK — <reason>\n\n"
-                    "COMPLETE = the task is done or the response fully answers the question.\n"
-                    "INCOMPLETE = more work can be done to finish the task.\n"
-                    "STUCK = a blocker prevents completion (missing info, permissions, etc).\n\n"
-                    "Be strict: if the user asked for X and only part of X was delivered, "
-                    "that is INCOMPLETE, not COMPLETE. But if the user asked a question "
-                    "and the assistant answered it, that is COMPLETE even without tool use."
-                ),
-                messages=verify_messages,
-                max_tokens=256,
-            )
-
-            status_text = (verification.content or "").strip().upper()
-            if "STATUS: COMPLETE" in status_text:
-                break
-            if "STATUS: STUCK" in status_text:
-                # Stuck — inject diagnosis request and let the LLM explain
-                reason = (verification.content or "").strip()
-                self._history.append(
-                    {
-                        "role": "user",
-                        "content": (
-                            f"SYSTEM: Task verification determined this task is stuck.\n"
-                            f"Verifier assessment: {reason}\n\n"
-                            "Explain to the user what went wrong, what you tried, and "
-                            "suggest specific next steps they can take to unblock this."
-                        ),
-                    }
-                )
-                yield StreamTaskProgress(
-                    phase="analyzing", message="Diagnosing blocked task..."
-                )
-                async for event in self._llm.plan_stream(
-                    system=system,
-                    messages=self._history,
-                ):
-                    yield event
-                break
-
-            # INCOMPLETE — continue working
-            continuation += 1
-            reason = (verification.content or "").strip()
-            self._history.append(
-                {
-                    "role": "user",
-                    "content": (
-                        f"SYSTEM: Task verification determined this task is not yet complete "
-                        f"(attempt {continuation}/{_MAX_CONTINUATIONS}).\n"
-                        f"Verifier assessment: {reason}\n\n"
-                        "Continue working on the original request. Pick up where you left off "
-                        "and finish the remaining work. Do not repeat work already done."
-                    ),
-                }
-            )
-            yield StreamTaskProgress(
-                phase="analyzing",
-                message=f"Task incomplete — continuing ({continuation}/{_MAX_CONTINUATIONS})...",
-            )
-
-            # Re-enter tool loop: get next LLM response with tools available
-            response = None
-            async for event in self._llm.plan_stream(
-                system=system,
-                messages=self._history,
-                tools=tools,
-            ):
-                yield event
-                if isinstance(event, StreamComplete):
-                    response = event
-            if response is None:
-                return
-            llm_response = response.response
-            # Loop back to the top of the completion verification loop
-
-        # Text-only final response — append to history (if not already appended
-        # by the verification block above).
-        if not self._history or self._history[-1].get("role") != "assistant":
-            reply = llm_response.content or ""
-            self._history.append({"role": "assistant", "content": reply})
-
-        # Consolidation: replay scratchpad sessions to extract lessons
-        if self._cortex is not None and self._cortex.mode != "off":
-            self._maybe_consolidate_scratchpads()
-
-    def _maybe_consolidate_scratchpads(self) -> None:
-        """Check if any scratchpad sessions warrant consolidation and fire it off."""
-        from anton.memory.consolidator import Consolidator
-
-        consolidator = Consolidator()
-        for pad in self._scratchpads._pads.values():
-            cells = list(pad.cells)
-            if consolidator.should_replay(cells):
-                asyncio.create_task(self._consolidate(cells))
-
-    async def _consolidate(self, cells: list) -> None:
-        """Run offline consolidation on a completed scratchpad session."""
-        from anton.memory.consolidator import Consolidator
-
-        consolidator = Consolidator()
-        engrams = await consolidator.replay_and_extract(cells, self._llm)
-        if not engrams or self._cortex is None:
-            return
-
-        auto_encode = [e for e in engrams if not self._cortex.encoding_gate(e)]
-        needs_confirm = [e for e in engrams if self._cortex.encoding_gate(e)]
-
-        if auto_encode:
-            await self._cortex.encode(auto_encode)
-
-        if needs_confirm:
-            self._pending_memory_confirmations.extend(needs_confirm)
-
-
-def _apply_error_tracking(
-    result_text: str,
-    tool_name: str,
-    error_streak: dict[str, int],
-    resilience_nudged: set[str],
-) -> str:
-    """Track consecutive errors per tool and append nudge/circuit-breaker messages."""
-    is_error = any(
-        marker in result_text
-        for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:")
-    )
-    if is_error:
-        error_streak[tool_name] = error_streak.get(tool_name, 0) + 1
-    else:
-        error_streak[tool_name] = 0
-        resilience_nudged.discard(tool_name)
-
-    streak = error_streak.get(tool_name, 0)
-    if streak >= _RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged:
-        result_text += _RESILIENCE_NUDGE
-        resilience_nudged.add(tool_name)
-
-    if streak >= _MAX_CONSECUTIVE_ERRORS:
-        result_text += (
-            f"\n\nSYSTEM: The '{tool_name}' tool has failed {_MAX_CONSECUTIVE_ERRORS} times "
-            "in a row. Stop retrying this approach. Either try a completely different "
-            "strategy or tell the user what's going wrong so they can help."
-        )
-
-    return result_text
-
-
-
 async def _handle_connect(
     console: Console,
     settings: AntonSettings,

From 53de84f28ee3f1012c7e7702084ea9b6d430311c Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 16:02:45 -0700
Subject: [PATCH 005/134] added the core pkg for tools

---
 anton/core/tools/tool_defs.py     | 0
 anton/core/tools/tool_handlers.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 anton/core/tools/tool_defs.py
 create mode 100644 anton/core/tools/tool_handlers.py

diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py
new file mode 100644
index 00000000..e69de29b
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
new file mode 100644
index 00000000..e69de29b

From 18ff4c394a8d0f3111c1effdb24f4cc975ca3e95 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 16:05:17 -0700
Subject: [PATCH 006/134] moved core tools

---
 anton/core/tools/tool_defs.py     | 145 +++++++++++++
 anton/core/tools/tool_handlers.py | 196 +++++++++++++++++
 anton/tools.py                    | 340 ------------------------------
 3 files changed, 341 insertions(+), 340 deletions(-)

diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py
index e69de29b..a687d202 100644
--- a/anton/core/tools/tool_defs.py
+++ b/anton/core/tools/tool_defs.py
@@ -0,0 +1,145 @@
+SCRATCHPAD_TOOL = {
+    "name": "scratchpad",
+    "description": (
+        "Run Python code in a persistent scratchpad. Use this whenever you need to "
+        "count characters, do math, parse data, transform text, or any task that "
+        "benefits from precise computation rather than guessing. Variables, imports, "
+        "and data persist across cells — like a notebook you drive programmatically.\n\n"
+        "Actions:\n"
+        "- exec: Run code in the scratchpad (creates it if needed)\n"
+        "- view: See all cells and their outputs\n"
+        "- reset: Restart the process, clearing all state (installed packages survive)\n"
+        "- remove: Kill the scratchpad and delete its environment\n"
+        "- dump: Show a clean notebook-style summary of cells (code + truncated output)\n"
+        "- install: Install Python packages into the scratchpad's environment. "
+        "Packages persist across resets.\n\n"
+        "Use print() to produce output. Host Python packages are available by default. "
+        "Include a 'packages' array on exec calls for any libraries your code needs — "
+        "they'll be auto-installed before the cell runs (already-installed ones are skipped).\n"
+        "get_llm() returns a pre-configured LLM client (sync) — call "
+        "llm.complete(system=..., messages=[...]) for AI-powered computation.\n"
+        "llm.generate_object(MyModel, system=..., messages=[...]) extracts structured "
+        "data into Pydantic models. Supports single models and list[Model].\n"
+        "agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) "
+        "runs a tool-call loop where the LLM reasons and calls your tools iteratively. "
+        "handle_tool(name, inputs) -> str is a plain sync function.\n"
+        "sample(var) inspects any variable with type-aware formatting — DataFrames get "
+        "shape/dtypes/head, dicts get keys/values, lists get length/items. "
+        "Defaults to 'preview' mode (compact); use sample(var, mode='full') for complete dump.\n"
+        "All .anton/.env secrets are available as environment variables (os.environ).\n\n"
+        "IMPORTANT: Cells have an inactivity timeout of 30 seconds — if a cell produces "
+        "no output and no progress() calls for 30s, it is killed and all state is lost. "
+        "For long-running code (API calls, data extraction, heavy computation), call "
+        "progress(message) periodically to signal work is ongoing and reset the timer. "
+        "The total timeout scales from your estimated_execution_time_seconds "
+        "(roughly 2x the estimate). You MUST provide estimated_execution_time_seconds "
+        "for every exec call. For very long operations, provide a realistic estimate "
+        "and use progress() to keep the cell alive."
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]},
+            "name": {"type": "string", "description": "Scratchpad name"},
+            "code": {
+                "type": "string",
+                "description": "Python code (exec only). Use print() for output.",
+            },
+            "packages": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Package names needed by this cell (exec or install). "
+                "Listed after code so you know exactly what to include. "
+                "Already-installed packages are skipped automatically.",
+            },
+            "one_line_description": {
+                "type": "string",
+                "description": "Brief description of what this cell does (e.g. 'Scrape listing prices'). Required for exec.",
+            },
+            "estimated_execution_time_seconds": {
+                "type": "integer",
+                "description": "Estimated execution time in seconds. Drives the total timeout (roughly 2x estimate). Use progress() for long cells.",
+            },
+        },
+        "required": ["action", "name"],
+    },
+}
+
+
+MEMORIZE_TOOL = {
+    "name": "memorize",
+    "description": (
+        "Encode a rule or lesson into long-term memory for future sessions. "
+        "Use this when you learn something important, discover a useful pattern, "
+        "or the user asks you to remember something.\n\n"
+        "Entry kinds:\n"
+        "- always: Something to always do ('Use httpx instead of requests')\n"
+        "- never: Something to never do ('Never use time.sleep() in scratchpad')\n"
+        "- when: Conditional rule ('If paginated API → use async + progress()')\n"
+        "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n"
+        "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')"
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "entries": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "text": {
+                            "type": "string",
+                            "description": "The memory to encode",
+                        },
+                        "kind": {
+                            "type": "string",
+                            "enum": ["always", "never", "when", "lesson", "profile"],
+                        },
+                        "scope": {
+                            "type": "string",
+                            "enum": ["global", "project"],
+                        },
+                        "topic": {
+                            "type": "string",
+                            "description": "Topic slug for lessons (e.g. 'api-coingecko')",
+                        },
+                    },
+                    "required": ["text", "kind", "scope"],
+                },
+            },
+        },
+        "required": ["entries"],
+    },
+}
+
+
+RECALL_TOOL = {
+    "name": "recall",
+    "description": (
+        "Search your episodic memory — an archive of past conversations. "
+        "ONLY use this when the user explicitly asks about a previous conversation "
+        "or session (e.g. 'what did we talk about last time?', 'remember when we...', "
+        "'have we discussed X before?'). Do NOT use this for questions about code, "
+        "files, or data in the workspace — use the scratchpad to explore those directly.\n\n"
+        "Returns timestamped episodes matching the query (newest first). "
+        "A single call is enough — do not call multiple times with different queries."
+    ),
+    "input_schema": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "Search term to find in past conversations.",
+            },
+            "max_results": {
+                "type": "integer",
+                "description": "Maximum episodes to return (default 20).",
+            },
+            "days_back": {
+                "type": "integer",
+                "description": "Only search episodes from the last N days.",
+            },
+        },
+        "required": ["query"],
+    },
+}
\ No newline at end of file
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index e69de29b..ebe5dd30 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -0,0 +1,196 @@
+from anton.core.session import ChatSession
+
+
+async def handle_recall(session: ChatSession, tc_input: dict) -> str:
+    """Process a recall tool call — search episodic memory."""
+    if session._episodic is None or not session._episodic.enabled:
+        return "Episodic memory is not available."
+
+    query = tc_input.get("query", "")
+    if not query:
+        return "No query provided."
+
+    kwargs: dict = {}
+    if "max_results" in tc_input:
+        kwargs["max_results"] = int(tc_input["max_results"])
+    if "days_back" in tc_input:
+        kwargs["days_back"] = int(tc_input["days_back"])
+
+    return session._episodic.recall_formatted(query, **kwargs)
+
+
+async def handle_memorize(session: ChatSession, tc_input: dict) -> str:
+    """Process a memorize tool call and return a result string.
+
+    Encoding is fire-and-forget so it never blocks scratchpad execution.
+    """
+    import asyncio
+
+    if session._cortex is None:
+        return "Memory system not available."
+
+    if session._cortex.mode == "off":
+        return "Memory encoding is disabled. Change memory mode via /setup to enable."
+
+    from anton.memory.hippocampus import Engram
+
+    raw_entries = tc_input.get("entries", [])
+    if not raw_entries:
+        return "No entries provided."
+
+    engrams: list[Engram] = []
+    for entry in raw_entries:
+        if not isinstance(entry, dict) or "text" not in entry:
+            continue
+
+        kind = entry.get("kind", "lesson")
+        if kind not in ("always", "never", "when", "lesson", "profile"):
+            kind = "lesson"
+
+        scope = entry.get("scope", "project")
+        if scope not in ("global", "project"):
+            scope = "project"
+
+        # User-sourced memories (via explicit tool call) get high confidence
+        engrams.append(Engram(
+            text=entry["text"],
+            kind=kind,
+            scope=scope,
+            confidence="high",
+            topic=entry.get("topic", ""),
+            source="user",
+        ))
+
+    if not engrams:
+        return "No valid entries provided."
+
+    # Always encode immediately via fire-and-forget — the LLM explicitly
+    # chose to memorize these, so we never interrupt the user mid-turn
+    # with confirmation prompts.  Confirmations are reserved for the
+    # post-turn consolidator (lessons extracted from scratchpad sessions).
+    async def _encode_bg(cortex, entries):
+        try:
+            await cortex.encode(entries)
+        except Exception:
+            pass  # Best-effort; don't disrupt the conversation
+
+    asyncio.create_task(_encode_bg(session._cortex, engrams))
+
+    descriptions = [f"Encoded {e.kind}: {e.text}" for e in engrams]
+    return "Memory updated: " + "; ".join(descriptions)
+
+
+async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict):
+    """Validate and prepare a scratchpad exec call.
+
+    Returns (pad, code, description, estimated_time, estimated_seconds) or
+    a str error message if validation fails.
+    """
+    name = tc_input.get("name", "")
+    code = tc_input.get("code", "")
+    if not code or not code.strip():
+        return "No code provided."
+
+    pad = await session._scratchpads.get_or_create(name)
+
+    # Auto-install packages before running the cell
+    packages = tc_input.get("packages", [])
+    if packages:
+        install_result = await pad.install_packages(packages)
+        if "Install failed" in install_result or "timed out" in install_result:
+            return install_result
+
+    description = tc_input.get("one_line_description", "")
+    estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0)
+    if isinstance(estimated_seconds, str):
+        try:
+            estimated_seconds = int(estimated_seconds)
+        except ValueError:
+            estimated_seconds = 0
+
+    estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else ""
+    return pad, code, description, estimated_time, estimated_seconds
+
+
+def format_cell_result(cell) -> str:
+    """Format a Cell into a tool result string.
+
+    Every section is labeled so the LLM can tell what came from where:
+    [output] — print() / stdout from the cell code
+    [logs]   — library logging (httpx, urllib3, etc.) captured at INFO+
+    [stderr] — warnings and stderr writes
+    [error]  — Python traceback if the cell raised an exception
+    """
+    parts: list[str] = []
+    if cell.stdout:
+        stdout = cell.stdout
+        if len(stdout) > 10_000:
+            stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)"
+        parts.append(f"[output]\n{stdout}")
+    if cell.logs if hasattr(cell, "logs") else False:
+        logs = cell.logs.strip()
+        if len(logs) > 3_000:
+            logs = logs[:3_000] + "\n... (logs truncated)"
+        parts.append(f"[logs]\n{logs}")
+    if cell.stderr:
+        parts.append(f"[stderr]\n{cell.stderr}")
+    if cell.error:
+        parts.append(f"[error]\n{cell.error}")
+    if not parts:
+        return "Code executed successfully (no output)."
+    return "\n".join(parts)
+
+
+async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
+    """Dispatch a scratchpad tool call by action."""
+    action = tc_input.get("action", "")
+    name = tc_input.get("name", "")
+
+    if not name:
+        return "Scratchpad name is required."
+
+    if action == "exec":
+        result = await prepare_scratchpad_exec(session, tc_input)
+        if isinstance(result, str):
+            return result
+        pad, code, description, estimated_time, estimated_seconds = result
+
+        cell = await pad.execute(
+            code,
+            description=description,
+            estimated_time=estimated_time,
+            estimated_seconds=estimated_seconds,
+        )
+        return format_cell_result(cell)
+
+    elif action == "view":
+        pad = session._scratchpads._pads.get(name)
+        if pad is None:
+            return f"No scratchpad named '{name}'."
+        return pad.view()
+
+    elif action == "reset":
+        pad = session._scratchpads._pads.get(name)
+        if pad is None:
+            return f"No scratchpad named '{name}'."
+        await pad.reset()
+        return f"Scratchpad '{name}' reset. All state cleared."
+
+    elif action == "remove":
+        return await session._scratchpads.remove(name)
+
+    elif action == "dump":
+        pad = session._scratchpads._pads.get(name)
+        if pad is None:
+            return f"No scratchpad named '{name}'."
+        return pad.render_notebook()
+
+    elif action == "install":
+        packages = tc_input.get("packages", [])
+        if not packages:
+            return "No packages specified."
+        pad = await session._scratchpads.get_or_create(name)
+        return await pad.install_packages(packages)
+
+    else:
+        return f"Unknown scratchpad action: {action}"
diff --git a/anton/tools.py b/anton/tools.py
index 2254b39a..f8797aef 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -60,84 +60,6 @@ def build_tool_schemas(available: list[str]) -> list[dict]:
         if t.name in available
     ]
 
-
-MEMORIZE_TOOL = {
-    "name": "memorize",
-    "description": (
-        "Encode a rule or lesson into long-term memory for future sessions. "
-        "Use this when you learn something important, discover a useful pattern, "
-        "or the user asks you to remember something.\n\n"
-        "Entry kinds:\n"
-        "- always: Something to always do ('Use httpx instead of requests')\n"
-        "- never: Something to never do ('Never use time.sleep() in scratchpad')\n"
-        "- when: Conditional rule ('If paginated API → use async + progress()')\n"
-        "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n"
-        "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')"
-    ),
-    "input_schema": {
-        "type": "object",
-        "properties": {
-            "entries": {
-                "type": "array",
-                "items": {
-                    "type": "object",
-                    "properties": {
-                        "text": {
-                            "type": "string",
-                            "description": "The memory to encode",
-                        },
-                        "kind": {
-                            "type": "string",
-                            "enum": ["always", "never", "when", "lesson", "profile"],
-                        },
-                        "scope": {
-                            "type": "string",
-                            "enum": ["global", "project"],
-                        },
-                        "topic": {
-                            "type": "string",
-                            "description": "Topic slug for lessons (e.g. 'api-coingecko')",
-                        },
-                    },
-                    "required": ["text", "kind", "scope"],
-                },
-            },
-        },
-        "required": ["entries"],
-    },
-}
-
-RECALL_TOOL = {
-    "name": "recall",
-    "description": (
-        "Search your episodic memory — an archive of past conversations. "
-        "ONLY use this when the user explicitly asks about a previous conversation "
-        "or session (e.g. 'what did we talk about last time?', 'remember when we...', "
-        "'have we discussed X before?'). Do NOT use this for questions about code, "
-        "files, or data in the workspace — use the scratchpad to explore those directly.\n\n"
-        "Returns timestamped episodes matching the query (newest first). "
-        "A single call is enough — do not call multiple times with different queries."
-    ),
-    "input_schema": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": "Search term to find in past conversations.",
-            },
-            "max_results": {
-                "type": "integer",
-                "description": "Maximum episodes to return (default 20).",
-            },
-            "days_back": {
-                "type": "integer",
-                "description": "Only search episodes from the last N days.",
-            },
-        },
-        "required": ["query"],
-    },
-}
-
 CONNECT_DATASOURCE_TOOL = {
     "name": "connect_new_datasource",
     "description": (
@@ -197,268 +119,6 @@ def build_tool_schemas(available: list[str]) -> list[dict]:
 }
 
 
-SCRATCHPAD_TOOL = {
-    "name": "scratchpad",
-    "description": (
-        "Run Python code in a persistent scratchpad. Use this whenever you need to "
-        "count characters, do math, parse data, transform text, or any task that "
-        "benefits from precise computation rather than guessing. Variables, imports, "
-        "and data persist across cells — like a notebook you drive programmatically.\n\n"
-        "Actions:\n"
-        "- exec: Run code in the scratchpad (creates it if needed)\n"
-        "- view: See all cells and their outputs\n"
-        "- reset: Restart the process, clearing all state (installed packages survive)\n"
-        "- remove: Kill the scratchpad and delete its environment\n"
-        "- dump: Show a clean notebook-style summary of cells (code + truncated output)\n"
-        "- install: Install Python packages into the scratchpad's environment. "
-        "Packages persist across resets.\n\n"
-        "Use print() to produce output. Host Python packages are available by default. "
-        "Include a 'packages' array on exec calls for any libraries your code needs — "
-        "they'll be auto-installed before the cell runs (already-installed ones are skipped).\n"
-        "get_llm() returns a pre-configured LLM client (sync) — call "
-        "llm.complete(system=..., messages=[...]) for AI-powered computation.\n"
-        "llm.generate_object(MyModel, system=..., messages=[...]) extracts structured "
-        "data into Pydantic models. Supports single models and list[Model].\n"
-        "agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) "
-        "runs a tool-call loop where the LLM reasons and calls your tools iteratively. "
-        "handle_tool(name, inputs) -> str is a plain sync function.\n"
-        "sample(var) inspects any variable with type-aware formatting — DataFrames get "
-        "shape/dtypes/head, dicts get keys/values, lists get length/items. "
-        "Defaults to 'preview' mode (compact); use sample(var, mode='full') for complete dump.\n"
-        "All .anton/.env secrets are available as environment variables (os.environ).\n\n"
-        "IMPORTANT: Cells have an inactivity timeout of 30 seconds — if a cell produces "
-        "no output and no progress() calls for 30s, it is killed and all state is lost. "
-        "For long-running code (API calls, data extraction, heavy computation), call "
-        "progress(message) periodically to signal work is ongoing and reset the timer. "
-        "The total timeout scales from your estimated_execution_time_seconds "
-        "(roughly 2x the estimate). You MUST provide estimated_execution_time_seconds "
-        "for every exec call. For very long operations, provide a realistic estimate "
-        "and use progress() to keep the cell alive."
-    ),
-    "input_schema": {
-        "type": "object",
-        "properties": {
-            "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]},
-            "name": {"type": "string", "description": "Scratchpad name"},
-            "code": {
-                "type": "string",
-                "description": "Python code (exec only). Use print() for output.",
-            },
-            "packages": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Package names needed by this cell (exec or install). "
-                "Listed after code so you know exactly what to include. "
-                "Already-installed packages are skipped automatically.",
-            },
-            "one_line_description": {
-                "type": "string",
-                "description": "Brief description of what this cell does (e.g. 'Scrape listing prices'). Required for exec.",
-            },
-            "estimated_execution_time_seconds": {
-                "type": "integer",
-                "description": "Estimated execution time in seconds. Drives the total timeout (roughly 2x estimate). Use progress() for long cells.",
-            },
-        },
-        "required": ["action", "name"],
-    },
-}
-
-async def handle_recall(session: ChatSession, tc_input: dict) -> str:
-    """Process a recall tool call — search episodic memory."""
-    if session._episodic is None or not session._episodic.enabled:
-        return "Episodic memory is not available."
-
-    query = tc_input.get("query", "")
-    if not query:
-        return "No query provided."
-
-    kwargs: dict = {}
-    if "max_results" in tc_input:
-        kwargs["max_results"] = int(tc_input["max_results"])
-    if "days_back" in tc_input:
-        kwargs["days_back"] = int(tc_input["days_back"])
-
-    return session._episodic.recall_formatted(query, **kwargs)
-
-
-async def handle_memorize(session: ChatSession, tc_input: dict) -> str:
-    """Process a memorize tool call and return a result string.
-
-    Encoding is fire-and-forget so it never blocks scratchpad execution.
-    """
-    import asyncio
-
-    if session._cortex is None:
-        return "Memory system not available."
-
-    if session._cortex.mode == "off":
-        return "Memory encoding is disabled. Change memory mode via /setup to enable."
-
-    from anton.memory.hippocampus import Engram
-
-    raw_entries = tc_input.get("entries", [])
-    if not raw_entries:
-        return "No entries provided."
-
-    engrams: list[Engram] = []
-    for entry in raw_entries:
-        if not isinstance(entry, dict) or "text" not in entry:
-            continue
-
-        kind = entry.get("kind", "lesson")
-        if kind not in ("always", "never", "when", "lesson", "profile"):
-            kind = "lesson"
-
-        scope = entry.get("scope", "project")
-        if scope not in ("global", "project"):
-            scope = "project"
-
-        # User-sourced memories (via explicit tool call) get high confidence
-        engrams.append(Engram(
-            text=entry["text"],
-            kind=kind,
-            scope=scope,
-            confidence="high",
-            topic=entry.get("topic", ""),
-            source="user",
-        ))
-
-    if not engrams:
-        return "No valid entries provided."
-
-    # Always encode immediately via fire-and-forget — the LLM explicitly
-    # chose to memorize these, so we never interrupt the user mid-turn
-    # with confirmation prompts.  Confirmations are reserved for the
-    # post-turn consolidator (lessons extracted from scratchpad sessions).
-    async def _encode_bg(cortex, entries):
-        try:
-            await cortex.encode(entries)
-        except Exception:
-            pass  # Best-effort; don't disrupt the conversation
-
-    asyncio.create_task(_encode_bg(session._cortex, engrams))
-
-    descriptions = [f"Encoded {e.kind}: {e.text}" for e in engrams]
-    return "Memory updated: " + "; ".join(descriptions)
-
-
-async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict):
-    """Validate and prepare a scratchpad exec call.
-
-    Returns (pad, code, description, estimated_time, estimated_seconds) or
-    a str error message if validation fails.
-    """
-    name = tc_input.get("name", "")
-    code = tc_input.get("code", "")
-    if not code or not code.strip():
-        return "No code provided."
-
-    pad = await session._scratchpads.get_or_create(name)
-
-    # Auto-install packages before running the cell
-    packages = tc_input.get("packages", [])
-    if packages:
-        install_result = await pad.install_packages(packages)
-        if "Install failed" in install_result or "timed out" in install_result:
-            return install_result
-
-    description = tc_input.get("one_line_description", "")
-    estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0)
-    if isinstance(estimated_seconds, str):
-        try:
-            estimated_seconds = int(estimated_seconds)
-        except ValueError:
-            estimated_seconds = 0
-
-    estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else ""
-    return pad, code, description, estimated_time, estimated_seconds
-
-
-def format_cell_result(cell) -> str:
-    """Format a Cell into a tool result string.
-
-    Every section is labeled so the LLM can tell what came from where:
-    [output] — print() / stdout from the cell code
-    [logs]   — library logging (httpx, urllib3, etc.) captured at INFO+
-    [stderr] — warnings and stderr writes
-    [error]  — Python traceback if the cell raised an exception
-    """
-    parts: list[str] = []
-    if cell.stdout:
-        stdout = cell.stdout
-        if len(stdout) > 10_000:
-            stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)"
-        parts.append(f"[output]\n{stdout}")
-    if cell.logs if hasattr(cell, "logs") else False:
-        logs = cell.logs.strip()
-        if len(logs) > 3_000:
-            logs = logs[:3_000] + "\n... (logs truncated)"
-        parts.append(f"[logs]\n{logs}")
-    if cell.stderr:
-        parts.append(f"[stderr]\n{cell.stderr}")
-    if cell.error:
-        parts.append(f"[error]\n{cell.error}")
-    if not parts:
-        return "Code executed successfully (no output)."
-    return "\n".join(parts)
-
-
-async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
-    """Dispatch a scratchpad tool call by action."""
-    action = tc_input.get("action", "")
-    name = tc_input.get("name", "")
-
-    if not name:
-        return "Scratchpad name is required."
-
-    if action == "exec":
-        result = await prepare_scratchpad_exec(session, tc_input)
-        if isinstance(result, str):
-            return result
-        pad, code, description, estimated_time, estimated_seconds = result
-
-        cell = await pad.execute(
-            code,
-            description=description,
-            estimated_time=estimated_time,
-            estimated_seconds=estimated_seconds,
-        )
-        return format_cell_result(cell)
-
-    elif action == "view":
-        pad = session._scratchpads._pads.get(name)
-        if pad is None:
-            return f"No scratchpad named '{name}'."
-        return pad.view()
-
-    elif action == "reset":
-        pad = session._scratchpads._pads.get(name)
-        if pad is None:
-            return f"No scratchpad named '{name}'."
-        await pad.reset()
-        return f"Scratchpad '{name}' reset. All state cleared."
-
-    elif action == "remove":
-        return await session._scratchpads.remove(name)
-
-    elif action == "dump":
-        pad = session._scratchpads._pads.get(name)
-        if pad is None:
-            return f"No scratchpad named '{name}'."
-        return pad.render_notebook()
-
-    elif action == "install":
-        packages = tc_input.get("packages", [])
-        if not packages:
-            return "No packages specified."
-        pad = await session._scratchpads.get_or_create(name)
-        return await pad.install_packages(packages)
-
-    else:
-        return f"Unknown scratchpad action: {action}"
-
-
 async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str:
     """Handle connect_new_datasource tool call — interactive connection flow."""
     engine = tc_input.get("engine", "")

From 1ab00f91721818cc07539fad08ad5a73d1114146 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 17:48:52 -0700
Subject: [PATCH 007/134] reintroduced ToolDef class and impl core tools

---
 anton/core/tools/tool_defs.py | 47 ++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py
index a687d202..ce221e3b 100644
--- a/anton/core/tools/tool_defs.py
+++ b/anton/core/tools/tool_defs.py
@@ -1,6 +1,20 @@
-SCRATCHPAD_TOOL = {
-    "name": "scratchpad",
-    "description": (
+from anton.core.tools.tool_handlers import handle_scratchpad, handle_memorize, handle_recall
+
+from dataclasses import dataclass
+from typing import Callable
+
+
+@dataclass
+class ToolDef:
+    name: str
+    description: str
+    input_schema: dict
+    handler: Callable  # async (session, tc_input) -> str
+
+
+SCRATCHPAD_TOOL = ToolDef(
+    name = "scratchpad",
+    description = (
         "Run Python code in a persistent scratchpad. Use this whenever you need to "
         "count characters, do math, parse data, transform text, or any task that "
         "benefits from precise computation rather than guessing. Variables, imports, "
@@ -36,7 +50,7 @@
         "for every exec call. For very long operations, provide a realistic estimate "
         "and use progress() to keep the cell alive."
     ),
-    "input_schema": {
+        input_schema = {
         "type": "object",
         "properties": {
             "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]},
@@ -63,12 +77,13 @@
         },
         "required": ["action", "name"],
     },
-}
+    handler = handle_scratchpad,
+)
 
 
-MEMORIZE_TOOL = {
-    "name": "memorize",
-    "description": (
+MEMORIZE_TOOL = ToolDef(
+    name = "memorize",
+    description = (
         "Encode a rule or lesson into long-term memory for future sessions. "
         "Use this when you learn something important, discover a useful pattern, "
         "or the user asks you to remember something.\n\n"
@@ -79,7 +94,7 @@
         "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n"
         "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')"
     ),
-    "input_schema": {
+    input_schema = {
         "type": "object",
         "properties": {
             "entries": {
@@ -110,12 +125,13 @@
         },
         "required": ["entries"],
     },
-}
+    handler = handle_memorize,
+)
 
 
-RECALL_TOOL = {
-    "name": "recall",
-    "description": (
+RECALL_TOOL = ToolDef(
+    name = "recall",
+    description = (
         "Search your episodic memory — an archive of past conversations. "
         "ONLY use this when the user explicitly asks about a previous conversation "
         "or session (e.g. 'what did we talk about last time?', 'remember when we...', "
@@ -124,7 +140,7 @@
         "Returns timestamped episodes matching the query (newest first). "
         "A single call is enough — do not call multiple times with different queries."
     ),
-    "input_schema": {
+    input_schema = {
         "type": "object",
         "properties": {
             "query": {
@@ -142,4 +158,5 @@
         },
         "required": ["query"],
     },
-}
\ No newline at end of file
+    handler = handle_recall,
+)
\ No newline at end of file

From 75df939f50019aa985a4c1a06aa96a2002d4df7c Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 17:49:05 -0700
Subject: [PATCH 008/134] removed invalid dispatch

---
 anton/core/tools/tool_handlers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index ebe5dd30..33246553 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -1,4 +1,7 @@
-from anton.core.session import ChatSession
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from anton.chat_session import ChatSession
 
 
 async def handle_recall(session: ChatSession, tc_input: dict) -> str:

From 42470231ea02f67d18964a34697d0d794c004d7c Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 17:49:27 -0700
Subject: [PATCH 009/134] introduced ToolRegistry

---
 anton/core/tools/registry.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 anton/core/tools/registry.py

diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
new file mode 100644
index 00000000..59a13e74
--- /dev/null
+++ b/anton/core/tools/registry.py
@@ -0,0 +1,25 @@
+from anton.core.tools.tool_defs import MEMORIZE_TOOL, RECALL_TOOL, SCRATCHPAD_TOOL, ToolDef
+
+
+class ToolRegistry:
+    """
+    Registry of tools available to the LLM.
+    """
+    def __init__(self) -> None:
+        # Register core tools.
+        self._tools = [SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL]
+
+    def register_tool(self, tool_def: ToolDef) -> None:
+        """
+        Register a new (extra to core) tool.
+        """
+        self._tools.append(tool_def)
+
+    def dispatch_tool(self, tool_name: str, tc_input: dict) -> str:
+        """
+        Dispatch a tool call by name. Returns result text.
+        """
+        tool_def = next((tool for tool in self._tools if tool.name == tool_name), None)
+        if tool_def is None:
+            raise ValueError(f"Tool {tool_name} not found")
+        return tool_def.handler(tc_input)

From 3eec212b4ecbb25ebfee0341dd224547fdd26d45 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 17:51:26 -0700
Subject: [PATCH 010/134] removed core tool registration

---
 anton/core/tools/registry.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 59a13e74..6ef01d29 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -1,4 +1,7 @@
-from anton.core.tools.tool_defs import MEMORIZE_TOOL, RECALL_TOOL, SCRATCHPAD_TOOL, ToolDef
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from anton.core.tools.tool_defs import ToolDef
 
 
 class ToolRegistry:
@@ -7,7 +10,7 @@ class ToolRegistry:
     """
     def __init__(self) -> None:
         # Register core tools.
-        self._tools = [SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL]
+        self._tools = []
 
     def register_tool(self, tool_def: ToolDef) -> None:
         """

From cf519d09d41c1499f696207e5fb71bbb3706feb8 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 18:04:25 -0700
Subject: [PATCH 011/134] fixed tool registration and dispatch logic

---
 anton/core/session.py        | 51 ++++++++++++--------------
 anton/core/tools/registry.py | 10 ++++--
 anton/tools.py               | 69 +-----------------------------------
 3 files changed, 31 insertions(+), 99 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index b4253542..068db4a8 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -15,16 +15,8 @@
     StreamToolResult,
 )
 from anton.scratchpad import ScratchpadManager
-from anton.tools import (
-    CONNECT_DATASOURCE_TOOL,
-    MEMORIZE_TOOL,
-    PUBLISH_TOOL,
-    RECALL_TOOL,
-    SCRATCHPAD_TOOL,
-    dispatch_tool,
-    format_cell_result,
-    prepare_scratchpad_exec,
-)
+from anton.core.tools.registry import ToolRegistry
+from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef
 
 from anton.utils.datasources import (
     build_datasource_context,
@@ -109,6 +101,7 @@ def __init__(
         history_store: HistoryStore | None = None,
         session_id: str | None = None,
         proactive_dashboards: bool = False,
+        tools: list[ToolDef] | None = None,
     ) -> None:
         self._llm = llm_client
         self._self_awareness = self_awareness
@@ -116,6 +109,7 @@ def __init__(
         self._episodic = episodic
         self._runtime_context = runtime_context
         self._proactive_dashboards = proactive_dashboards
+        self._extra_tools = tools
         self._workspace = workspace
         self._console = console
         self._history: list[dict] = list(initial_history) if initial_history else []
@@ -137,6 +131,7 @@ def __init__(
             coding_base_url=coding_base_url,
             workspace_path=workspace.base if workspace else None,
         )
+        self.tool_registry = ToolRegistry()
 
     @property
     def history(self) -> list[dict]:
@@ -270,7 +265,13 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
     }
 
     def _build_tools(self) -> list[dict]:
-        scratchpad_tool = dict(SCRATCHPAD_TOOL)
+        self._build_core_tools()
+        for tool in self._extra_tools:
+            self.tool_registry.register_tool(tool)
+        return self.tool_registry.dump()
+
+    def _build_core_tools(self) -> None:
+        scratchpad_tool = SCRATCHPAD_TOOL
         pkg_list = self._scratchpads._available_packages
         if pkg_list:
             notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES)
@@ -279,29 +280,21 @@ def _build_tools(self) -> list[dict]:
                 extra = f"\n\nInstalled packages ({len(pkg_list)} total, notable: {pkg_line})."
             else:
                 extra = f"\n\nInstalled packages: {len(pkg_list)} total (standard library plus dependencies)."
-            scratchpad_tool["description"] = SCRATCHPAD_TOOL["description"] + extra
+            scratchpad_tool.description = scratchpad_tool.description + extra
 
         # Inject scratchpad wisdom from memory (procedural priming)
         if self._cortex is not None:
             wisdom = self._cortex.get_scratchpad_context()
             if wisdom:
-                scratchpad_tool[
-                    "description"
-                ] += f"\n\nLessons from past sessions:\n{wisdom}"
+                scratchpad_tool.description += f"\n\nLessons from past sessions:\n{wisdom}"
 
-        tools = [scratchpad_tool]
-        if self._cortex is not None:
-            tools.append(MEMORIZE_TOOL)
-        elif self._self_awareness is not None:
-            # Legacy fallback
-            from anton.tools import MEMORIZE_TOOL as _MT
+        self.tool_registry.register_tool(scratchpad_tool)
+
+        if self._cortex is not None or self._self_awareness is not None:
+            self.tool_registry.register_tool(MEMORIZE_TOOL)
 
-            tools.append(_MT)
         if self._episodic is not None and self._episodic.enabled:
-            tools.append(RECALL_TOOL)
-        tools.append(CONNECT_DATASOURCE_TOOL)
-        tools.append(PUBLISH_TOOL)
-        return tools
+            self.tool_registry.register_tool(RECALL_TOOL)
 
     async def close(self) -> None:
         """Clean up scratchpads and other resources."""
@@ -496,7 +489,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
             tool_results: list[dict] = []
             for tc in response.tool_calls:
                 try:
-                    result_text = await dispatch_tool(self, tc.name, tc.input)
+                    result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input)
                 except Exception as exc:
                     result_text = f"Tool '{tc.name}' failed: {exc}"
 
@@ -875,7 +868,7 @@ async def _stream_and_handle_tools(
                             )
                             if self._escape_watcher:
                                 self._escape_watcher.pause()
-                            result_text = await dispatch_tool(self, tc.name, tc.input)
+                            result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input)
                             if self._escape_watcher:
                                 self._escape_watcher.resume()
                             yield StreamTaskProgress(
@@ -883,7 +876,7 @@ async def _stream_and_handle_tools(
                                 message="Analyzing results...",
                             )
                         else:
-                            result_text = await dispatch_tool(self, tc.name, tc.input)
+                            result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input)
                             if (
                                 tc.name == "scratchpad"
                                 and tc.input.get("action") == "dump"
diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 6ef01d29..a73b6679 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -18,11 +18,17 @@ def register_tool(self, tool_def: ToolDef) -> None:
         """
         self._tools.append(tool_def)
 
-    def dispatch_tool(self, tool_name: str, tc_input: dict) -> str:
+    async def dispatch_tool(self, tool_name: str, tc_input: dict) -> str:
         """
         Dispatch a tool call by name. Returns result text.
         """
         tool_def = next((tool for tool in self._tools if tool.name == tool_name), None)
         if tool_def is None:
             raise ValueError(f"Tool {tool_name} not found")
-        return tool_def.handler(tc_input)
+        return await tool_def.handler(tc_input)
+
+    def dump(self) -> list[dict]:
+        """
+        Dump the registry as a list of tool definitions.
+        """
+        return [tool.model_dump() for tool in self._tools]
diff --git a/anton/tools.py b/anton/tools.py
index f8797aef..d39a70c2 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -2,64 +2,12 @@
 
 from __future__ import annotations
 
-from collections.abc import Callable
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from anton.chat import ChatSession
 
 
-@dataclass
-class ToolDef:
-    name: str
-    description: str
-    input_schema: dict
-    handler: Callable  # async (session, tc_input) -> str
-    stream_handler: Callable | None = None  # async generator version
-
-
-_registry: dict[str, ToolDef] = {}
-
-
-def tool(name: str, *, description: str, input_schema: dict):
-    """Decorator to register a tool with its handler."""
-    def decorator(fn):
-        _registry[name] = ToolDef(
-            name=name,
-            description=description,
-            input_schema=input_schema,
-            handler=fn,
-        )
-        return fn
-    return decorator
-
-
-def tool_stream(name: str):
-    """Decorator to register a streaming handler for an existing tool."""
-    def decorator(fn):
-        if name in _registry:
-            _registry[name].stream_handler = fn
-        return fn
-    return decorator
-
-
-def get_tool(name: str) -> ToolDef | None:
-    return _registry.get(name)
-
-
-def all_tools() -> list[ToolDef]:
-    return list(_registry.values())
-
-
-def build_tool_schemas(available: list[str]) -> list[dict]:
-    """Build API-ready tool schema dicts for the given tool names."""
-    return [
-        {"name": t.name, "description": t.description, "input_schema": t.input_schema}
-        for t in _registry.values()
-        if t.name in available
-    ]
-
 CONNECT_DATASOURCE_TOOL = {
     "name": "connect_new_datasource",
     "description": (
@@ -272,18 +220,3 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str
 
     return f"Published successfully!\nView URL: {view_url}"
 
-
-async def dispatch_tool(session: ChatSession, tool_name: str, tc_input: dict) -> str:
-    """Dispatch a tool call by name. Returns result text."""
-    if tool_name == "memorize":
-        return await handle_memorize(session, tc_input)
-    elif tool_name == "scratchpad":
-        return await handle_scratchpad(session, tc_input)
-    elif tool_name == "recall":
-        return await handle_recall(session, tc_input)
-    elif tool_name == "connect_new_datasource":
-        return await handle_connect_datasource(session, tc_input)
-    elif tool_name == "publish_or_preview":
-        return await handle_publish_or_preview(session, tc_input)
-    else:
-        return f"Unknown tool: {tool_name}"

From 687f37193341383df80bde66861f41ff005256e4 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 18:11:27 -0700
Subject: [PATCH 012/134] handled extra tool registration

---
 anton/chat.py  |   2 +
 anton/tools.py | 131 +++++++++++++++++++++++++------------------------
 2 files changed, 68 insertions(+), 65 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 5bd77801..d579a8b3 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -49,6 +49,7 @@
     handle_connect_datasource,
     handle_test_datasource,
 )
+from anton.tools import CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL
 from anton.utils.prompt import (
     prompt_or_cancel,
     prompt_minds_api_key,
@@ -1015,6 +1016,7 @@ async def _chat_loop(
         history_store=history_store,
         session_id=current_session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL],
     )
 
     # Handle --resume flag at startup
diff --git a/anton/tools.py b/anton/tools.py
index d39a70c2..3ce6d92c 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -1,76 +1,16 @@
-"""Dynamic tool registry — decorator-based registration for chat tools."""
-
-from __future__ import annotations
+"""Extra tools for the open source terminal agent."""
 
 from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-    from anton.chat import ChatSession
-
-
-CONNECT_DATASOURCE_TOOL = {
-    "name": "connect_new_datasource",
-    "description": (
-        "Connect a new data source to Anton's Local Vault. Call this when the user "
-        "asks a question that requires data from a source that isn't connected yet "
-        "(e.g. email, database, CRM, API). This starts an interactive connection flow "
-        "where the user enters their credentials.\n\n"
-        "Pass the datasource type/name (e.g. 'gmail', 'postgres', 'salesforce', 'hubspot'). "
-        "Anton will match it to the right connector and guide the user through setup.\n\n"
-        "Do NOT print any message before calling this tool — it handles the user-facing output."
-    ),
-    "input_schema": {
-        "type": "object",
-        "properties": {
-            "engine": {
-                "type": "string",
-                "description": "The datasource type or name (e.g. 'gmail', 'postgres', 'snowflake', 'hubspot')",
-            },
-            "reason": {
-                "type": "string",
-                "description": "Brief explanation of why this datasource is needed",
-            },
-        },
-        "required": ["engine"],
-    },
-}
+from anton.core.tools.tool_defs import ToolDef
 
-PUBLISH_TOOL = {
-    "name": "publish_or_preview",
-    "description": (
-        "Call this after generating an HTML dashboard or report in .anton/output/. "
-        "Actions: 'ask' (default) prompts the user to preview/publish/skip interactively. "
-        "'preview' opens the file in the browser immediately. "
-        "'publish' publishes to the web immediately. "
-        "Use 'preview' or 'publish' when the user has already stated their intent. "
-        "Use 'ask' after generating a new dashboard to let the user choose."
-    ),
-    "input_schema": {
-        "type": "object",
-        "properties": {
-            "file_path": {
-                "type": "string",
-                "description": "Path to the HTML file (e.g. .anton/output/dashboard.html)",
-            },
-            "title": {
-                "type": "string",
-                "description": "Short title describing the dashboard (e.g. 'BTC & Macro Dashboard')",
-            },
-            "action": {
-                "type": "string",
-                "enum": ["ask", "preview", "publish"],
-                "description": "What to do: 'ask' prompts user, 'preview' opens locally, 'publish' publishes to web",
-            },
-        },
-        "required": ["file_path"],
-    },
-}
+if TYPE_CHECKING:
+    from anton.core.session import ChatSession
 
 
 async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str:
     """Handle connect_new_datasource tool call — interactive connection flow."""
     engine = tc_input.get("engine", "")
-    reason = tc_input.get("reason", "")
     if not engine:
         return "Engine name is required."
 
@@ -84,7 +24,6 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     )
 
     from anton.commands.datasource import handle_connect_datasource
-    from anton.utils.prompt import prompt_or_cancel
     from anton.data_vault import DataVault
 
     # Check which connections exist before
@@ -134,6 +73,35 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
         )
 
 
+CONNECT_DATASOURCE_TOOL = ToolDef(
+    name = "connect_new_datasource",
+    description = (
+        "Connect a new data source to Anton's Local Vault. Call this when the user "
+        "asks a question that requires data from a source that isn't connected yet "
+        "(e.g. email, database, CRM, API). This starts an interactive connection flow "
+        "where the user enters their credentials.\n\n"
+        "Pass the datasource type/name (e.g. 'gmail', 'postgres', 'salesforce', 'hubspot'). "
+        "Anton will match it to the right connector and guide the user through setup.\n\n"
+        "Do NOT print any message before calling this tool — it handles the user-facing output."
+    ),
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "engine": {
+                "type": "string",
+                "description": "The datasource type or name (e.g. 'gmail', 'postgres', 'snowflake', 'hubspot')",
+            },
+            "reason": {
+                "type": "string",
+                "description": "Brief explanation of why this datasource is needed",
+            },
+        },
+        "required": ["engine"],
+    },
+    handler = handle_connect_datasource,
+)
+
+
 async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str:
     """Interactive preview/publish flow after dashboard creation."""
     import os
@@ -161,6 +129,7 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str
     # Publish flow
     from anton.config.settings import AntonSettings
     from anton.publisher import publish
+    from anton.utils.prompt import prompt_or_cancel
 
     settings = AntonSettings()
 
@@ -220,3 +189,35 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str
 
     return f"Published successfully!\nView URL: {view_url}"
 
+
+PUBLISH_TOOL = ToolDef(
+    name = "publish_or_preview",
+    description = (
+        "Call this after generating an HTML dashboard or report in .anton/output/. "
+        "Actions: 'ask' (default) prompts the user to preview/publish/skip interactively. "
+        "'preview' opens the file in the browser immediately. "
+        "'publish' publishes to the web immediately. "
+        "Use 'preview' or 'publish' when the user has already stated their intent. "
+        "Use 'ask' after generating a new dashboard to let the user choose."
+    ),
+    input_schema = {
+        "type": "object",
+        "properties": {
+            "file_path": {
+                "type": "string",
+                "description": "Path to the HTML file (e.g. .anton/output/dashboard.html)",
+            },
+            "title": {
+                "type": "string",
+                "description": "Short title describing the dashboard (e.g. 'BTC & Macro Dashboard')",
+            },
+            "action": {
+                "type": "string",
+                "enum": ["ask", "preview", "publish"],
+                "description": "What to do: 'ask' prompts user, 'preview' opens locally, 'publish' publishes to web",
+            },
+        },
+        "required": ["file_path"],
+    },
+    handler = handle_publish_or_preview,
+)

From 01e3a9bc11c9c01e5529eeb2458f6b077ead4594 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 18:25:36 -0700
Subject: [PATCH 013/134] introduced core utils and fixed broken imports

---
 anton/core/session.py             |  4 +-
 anton/core/tools/tool_handlers.py | 63 +-----------------------------
 anton/core/utils/__init__.py      |  0
 anton/core/utils/scratchpad.py    | 65 +++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+), 62 deletions(-)
 create mode 100644 anton/core/utils/__init__.py
 create mode 100644 anton/core/utils/scratchpad.py

diff --git a/anton/core/session.py b/anton/core/session.py
index 068db4a8..ac7266d6 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -17,6 +17,7 @@
 from anton.scratchpad import ScratchpadManager
 from anton.core.tools.registry import ToolRegistry
 from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef
+from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
 
 from anton.utils.datasources import (
     build_datasource_context,
@@ -26,6 +27,7 @@
 if TYPE_CHECKING:
     from rich.console import Console
     from anton.context.self_awareness import SelfAwarenessContext
+    from anton.chat_ui import EscapeWatcher
     from anton.llm.client import LLMClient
     from anton.memory.cortex import Cortex
     from anton.memory.episodes import EpisodicMemory
@@ -122,7 +124,7 @@ def __init__(
         self._history_store = history_store
         self._session_id = session_id
         self._cancel_event = asyncio.Event()
-        self._escape_watcher: "EscapeWatcher | None" = None
+        self._escape_watcher: EscapeWatcher | None = None
         self._active_datasource: str | None = None
         self._scratchpads = ScratchpadManager(
             coding_provider=coding_provider,
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index 33246553..16420f87 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -1,5 +1,7 @@
 from typing import TYPE_CHECKING
 
+from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
+
 if TYPE_CHECKING:
     from anton.chat_session import ChatSession
 
@@ -83,67 +85,6 @@ async def _encode_bg(cortex, entries):
     return "Memory updated: " + "; ".join(descriptions)
 
 
-async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict):
-    """Validate and prepare a scratchpad exec call.
-
-    Returns (pad, code, description, estimated_time, estimated_seconds) or
-    a str error message if validation fails.
-    """
-    name = tc_input.get("name", "")
-    code = tc_input.get("code", "")
-    if not code or not code.strip():
-        return "No code provided."
-
-    pad = await session._scratchpads.get_or_create(name)
-
-    # Auto-install packages before running the cell
-    packages = tc_input.get("packages", [])
-    if packages:
-        install_result = await pad.install_packages(packages)
-        if "Install failed" in install_result or "timed out" in install_result:
-            return install_result
-
-    description = tc_input.get("one_line_description", "")
-    estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0)
-    if isinstance(estimated_seconds, str):
-        try:
-            estimated_seconds = int(estimated_seconds)
-        except ValueError:
-            estimated_seconds = 0
-
-    estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else ""
-    return pad, code, description, estimated_time, estimated_seconds
-
-
-def format_cell_result(cell) -> str:
-    """Format a Cell into a tool result string.
-
-    Every section is labeled so the LLM can tell what came from where:
-    [output] — print() / stdout from the cell code
-    [logs]   — library logging (httpx, urllib3, etc.) captured at INFO+
-    [stderr] — warnings and stderr writes
-    [error]  — Python traceback if the cell raised an exception
-    """
-    parts: list[str] = []
-    if cell.stdout:
-        stdout = cell.stdout
-        if len(stdout) > 10_000:
-            stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)"
-        parts.append(f"[output]\n{stdout}")
-    if cell.logs if hasattr(cell, "logs") else False:
-        logs = cell.logs.strip()
-        if len(logs) > 3_000:
-            logs = logs[:3_000] + "\n... (logs truncated)"
-        parts.append(f"[logs]\n{logs}")
-    if cell.stderr:
-        parts.append(f"[stderr]\n{cell.stderr}")
-    if cell.error:
-        parts.append(f"[error]\n{cell.error}")
-    if not parts:
-        return "Code executed successfully (no output)."
-    return "\n".join(parts)
-
-
 async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
     """Dispatch a scratchpad tool call by action."""
     action = tc_input.get("action", "")
diff --git a/anton/core/utils/__init__.py b/anton/core/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/anton/core/utils/scratchpad.py b/anton/core/utils/scratchpad.py
new file mode 100644
index 00000000..a94dc6c4
--- /dev/null
+++ b/anton/core/utils/scratchpad.py
@@ -0,0 +1,65 @@
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from anton.core.session import ChatSession
+
+
+async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict):
+    """Validate and prepare a scratchpad exec call.
+
+    Returns (pad, code, description, estimated_time, estimated_seconds) or
+    a str error message if validation fails.
+    """
+    name = tc_input.get("name", "")
+    code = tc_input.get("code", "")
+    if not code or not code.strip():
+        return "No code provided."
+
+    pad = await session._scratchpads.get_or_create(name)
+
+    # Auto-install packages before running the cell
+    packages = tc_input.get("packages", [])
+    if packages:
+        install_result = await pad.install_packages(packages)
+        if "Install failed" in install_result or "timed out" in install_result:
+            return install_result
+
+    description = tc_input.get("one_line_description", "")
+    estimated_seconds = tc_input.get("estimated_execution_time_seconds", 0)
+    if isinstance(estimated_seconds, str):
+        try:
+            estimated_seconds = int(estimated_seconds)
+        except ValueError:
+            estimated_seconds = 0
+
+    estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else ""
+    return pad, code, description, estimated_time, estimated_seconds
+
+
+def format_cell_result(cell) -> str:
+    """Format a Cell into a tool result string.
+
+    Every section is labeled so the LLM can tell what came from where:
+    [output] — print() / stdout from the cell code
+    [logs]   — library logging (httpx, urllib3, etc.) captured at INFO+
+    [stderr] — warnings and stderr writes
+    [error]  — Python traceback if the cell raised an exception
+    """
+    parts: list[str] = []
+    if cell.stdout:
+        stdout = cell.stdout
+        if len(stdout) > 10_000:
+            stdout = stdout[:10_000] + f"\n\n... (truncated, {len(stdout)} chars total)"
+        parts.append(f"[output]\n{stdout}")
+    if cell.logs if hasattr(cell, "logs") else False:
+        logs = cell.logs.strip()
+        if len(logs) > 3_000:
+            logs = logs[:3_000] + "\n... (logs truncated)"
+        parts.append(f"[logs]\n{logs}")
+    if cell.stderr:
+        parts.append(f"[stderr]\n{cell.stderr}")
+    if cell.error:
+        parts.append(f"[error]\n{cell.error}")
+    if not parts:
+        return "Code executed successfully (no output)."
+    return "\n".join(parts)
\ No newline at end of file

From bdf79cd756248a5c550f2172e489ce3dd1a283ec Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 18:40:45 -0700
Subject: [PATCH 014/134] fixed another broken import

---
 anton/chat_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/chat_session.py b/anton/chat_session.py
index 3e76878d..a042ab52 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -13,7 +13,7 @@
 if TYPE_CHECKING:
     from anton.chat import ChatSession
     from anton.memory.cortex import Cortex
-    from anton.memory.episodic import EpisodicMemory
+    from anton.memory.episodes import EpisodicMemory
     from anton.memory.history_store import HistoryStore
     from anton.workspace import Workspace
 

From 95ee00f4b9402a0c1356fa89ccb540caa77c4e18 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 18:48:43 -0700
Subject: [PATCH 015/134] fixed more imports

---
 anton/core/tools/registry.py      | 1 +
 anton/core/tools/tool_handlers.py | 1 +
 anton/core/utils/scratchpad.py    | 1 +
 anton/tools.py                    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index a73b6679..4e1109a5 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index 16420f87..82adefd6 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from typing import TYPE_CHECKING
 
 from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
diff --git a/anton/core/utils/scratchpad.py b/anton/core/utils/scratchpad.py
index a94dc6c4..18d7efd9 100644
--- a/anton/core/utils/scratchpad.py
+++ b/anton/core/utils/scratchpad.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
diff --git a/anton/tools.py b/anton/tools.py
index 3ce6d92c..11e0fa54 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -1,5 +1,6 @@
 """Extra tools for the open source terminal agent."""
 
+from __future__ import annotations
 from typing import TYPE_CHECKING
 
 from anton.core.tools.tool_defs import ToolDef

From f835fdfe7556562b945af6ac9036f837efb9499d Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 19:00:27 -0700
Subject: [PATCH 016/134] fixed tool dump

---
 anton/core/tools/registry.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 4e1109a5..81b6a640 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+from dataclasses import asdict
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -32,4 +33,10 @@ def dump(self) -> list[dict]:
         """
         Dump the registry as a list of tool definitions.
         """
-        return [tool.model_dump() for tool in self._tools]
+        tool_defs = []
+        for tool_def in self._tools:
+            # Remove the handler from the tool definition.
+            tool_def = asdict(tool_def)
+            tool_def.pop("handler")
+            tool_defs.append(tool_def)
+        return tool_defs

From 69113530d063fcc3b334fd7be123fd963a0db604 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 7 Apr 2026 19:04:00 -0700
Subject: [PATCH 017/134] fixed duplicate tool registration

---
 anton/core/session.py        | 7 ++++---
 anton/core/tools/registry.py | 7 +++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index ac7266d6..4c64e836 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -267,9 +267,10 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
     }
 
     def _build_tools(self) -> list[dict]:
-        self._build_core_tools()
-        for tool in self._extra_tools:
-            self.tool_registry.register_tool(tool)
+        if not self.tool_registry:
+            self._build_core_tools()
+            for tool in self._extra_tools:
+                self.tool_registry.register_tool(tool)
         return self.tool_registry.dump()
 
     def _build_core_tools(self) -> None:
diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 81b6a640..c3f5f69c 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -14,6 +14,12 @@ def __init__(self) -> None:
         # Register core tools.
         self._tools = []
 
+    def __bool__(self) -> bool:
+        """
+        Return True if there are any tools registered.
+        """
+        return bool(self._tools)
+
     def register_tool(self, tool_def: ToolDef) -> None:
         """
         Register a new (extra to core) tool.
@@ -32,6 +38,7 @@ async def dispatch_tool(self, tool_name: str, tc_input: dict) -> str:
     def dump(self) -> list[dict]:
         """
         Dump the registry as a list of tool definitions.
+        This is used to build the tools list for the LLM. As a result, the handler is not needed.
         """
         tool_defs = []
         for tool_def in self._tools:

From 45da437e3581eb8119ad783423a495fafd23ad06 Mon Sep 17 00:00:00 2001
From: Konstantin Sivakov <konstantin.sivakov@gmail.com>
Date: Wed, 8 Apr 2026 15:50:43 +0200
Subject: [PATCH 018/134] Make the choices for cresting a new connection

---
 anton/commands/datasource.py | 155 ++++++++++++++++++++++++-----------
 1 file changed, 107 insertions(+), 48 deletions(-)

diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index 9652367b..1e69a5b4 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -515,6 +515,40 @@ async def handle_add_custom_datasource(
     return engine_def, credentials
 
 
+async def _reconnect_to_saved(
+    console: Console,
+    session: "ChatSession",
+    vault: "DataVault",
+    registry: "DatasourceRegistry",
+    slug: str,
+    conn: dict,
+) -> "ChatSession":
+    """Inject env for a saved connection and mark it as the active datasource."""
+    restore_namespaced_env(vault)
+    session._active_datasource = slug
+    recon_engine_def = registry.get(conn["engine"])
+    if recon_engine_def:
+        register_secret_vars(recon_engine_def, engine=conn["engine"], name=conn["name"])
+        engine_label = recon_engine_def.display_name
+    else:
+        engine_label = conn["engine"]
+    console.print()
+    console.print(
+        f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
+    )
+    console.print()
+    session._history.append(
+        {
+            "role": "assistant",
+            "content": (
+                f'I\'ve reconnected to the {engine_label} connection "{slug}" '
+                f"in the Local Vault. I can now query this data source when needed."
+            ),
+        }
+    )
+    return session
+
+
 async def handle_connect_datasource(
     console: Console,
     scratchpads: ScratchpadManager,
@@ -654,17 +688,19 @@ async def handle_connect_datasource(
     display_engines = popular_engines + other_engines + custom_engines
 
     saved_connections = vault.list_connections()
-    # Build deduplicated list of saved connection display entries
-    saved_entries: list[tuple[str, str]] = []  # (slug, display_name)
+    # Build deduplicated list of engine types from saved connections (one per engine)
+    seen_engines: set[str] = set()
+    recent_engine_entries: list[tuple[str, str]] = []  # (engine_slug, display_name)
     for c in saved_connections:
-        slug = f"{c['engine']}-{c['name']}"
-        engine = registry.get(c["engine"])
-        label = engine.display_name if engine else c["engine"]
-        saved_entries.append((slug, label))
+        if c["engine"] not in seen_engines:
+            seen_engines.add(c["engine"])
+            engine_obj = registry.get(c["engine"])
+            label = engine_obj.display_name if engine_obj else c["engine"]
+            recent_engine_entries.append((c["engine"], label))
 
     def print_sections() -> None:
         console.print(
-            "[anton.cyan](anton)[/] Choose a data source:\n"
+            "[anton.cyan](anton)[/] Select a data source to create a new connection:\n"
         )
         console.print("       [bold]  Primary")
         console.print(
@@ -676,10 +712,10 @@ def print_sections() -> None:
             for i, e in enumerate(popular_engines, 1):
                 console.print(f"          [bold]{i:>2}.[/bold] {e.display_name}")
             console.print()
-        if saved_entries:
+        if recent_engine_entries:
             start = len(popular_engines) + 1
-            console.print("       [bold]  Recent connections")
-            for i, (slug, label) in enumerate(saved_entries, start):
+            console.print("       [bold]  Recently used data sources")
+            for i, (_, label) in enumerate(recent_engine_entries, start):
                 console.print(f"          [bold]{i:>2}.[/bold] {label}")
             console.print()
 
@@ -697,28 +733,72 @@ def print_all() -> None:
             console.print(f"          [bold]{i:>2}.[/bold] {e.display_name}{star}")
         console.print()
 
-    if prefill:
-        answer = prefill
-    else:
+    async def get_create_new_answer() -> str | None:
         print_sections()
         console.print(
             "       [anton.muted]Don't see yours? Type a datasource name (e.g., GitHub, Gmail, Jira, ...)\n"
             "       It can be virtually any datasource — we'll figure out the details together.[/]"
         )
         console.print()
-        answer = await prompt_or_cancel(
+        ans = await prompt_or_cancel(
             "(anton) Enter a number or type a datasource name",
         )
-        if answer is None:
-            return session
-        if answer.strip().lower() == "all":
+        if ans is None:
+            return None
+        if ans.strip().lower() == "all":
             console.print()
             print_all()
-            answer = await prompt_or_cancel(
+            ans = await prompt_or_cancel(
                 "(anton) Enter a number or type a name",
             )
-            if answer is None:
+        return ans
+
+    if prefill:
+        answer = prefill
+    elif saved_connections:
+        console.print()
+        console.print("[anton.cyan](anton)[/] What would you like to do?\n")
+        console.print("          [bold]  1.[/bold] Use an existing connection")
+        console.print("          [bold]  2.[/bold] Create a new connection")
+        console.print()
+        top_choice = await prompt_or_cancel(
+            "(anton) Enter a number", choices=["1", "2"]
+        )
+        if top_choice is None:
+            return session
+
+        if top_choice == "1":
+            console.print()
+            console.print("[anton.cyan](anton)[/] Your saved connections:\n")
+            for i, c in enumerate(saved_connections, 1):
+                conn_slug = f"{c['engine']}-{c['name']}"
+                engine_obj = registry.get(c["engine"])
+                engine_label = engine_obj.display_name if engine_obj else c["engine"]
+                console.print(
+                    f"          [bold]{i:>2}.[/bold] {conn_slug}"
+                    f" [dim]— {engine_label}[/]"
+                )
+            console.print()
+            pick = await prompt_or_cancel(
+                "(anton) Enter a number",
+                choices=[str(i) for i in range(1, len(saved_connections) + 1)],
+            )
+            if pick is None:
                 return session
+            picked_conn = saved_connections[int(pick) - 1]
+            picked_slug = f"{picked_conn['engine']}-{picked_conn['name']}"
+            return await _reconnect_to_saved(
+                console, session, vault, registry, picked_slug, picked_conn
+            )
+
+        # top_choice == "2": create new connection
+        answer = await get_create_new_answer()
+        if answer is None:
+            return session
+    else:
+        answer = await get_create_new_answer()
+        if answer is None:
+            return session
 
     stripped_answer = answer.strip()
     known_slugs = {
@@ -726,36 +806,16 @@ def print_all() -> None:
     }
     if stripped_answer in known_slugs:
         conn = known_slugs[stripped_answer]
-        restore_namespaced_env(vault)
-        session._active_datasource = stripped_answer
-        recon_engine_def = registry.get(conn["engine"])
-        if recon_engine_def:
-            register_secret_vars(recon_engine_def, engine=conn["engine"], name=conn["name"])
-            engine_label = recon_engine_def.display_name
-        else:
-            engine_label = conn["engine"]
-        console.print()
-        console.print(
-            f'[anton.success]        ✓ Reconnected to [bold]"{stripped_answer}"[/bold].[/]'
+        return await _reconnect_to_saved(
+            console, session, vault, registry, stripped_answer, conn
         )
-        console.print()
-        session._history.append(
-            {
-                "role": "assistant",
-                "content": (
-                    f'I\'ve reconnected to the {engine_label} connection "{stripped_answer}" '
-                    f"in the Local Vault. I can now query this data source when needed."
-                ),
-            }
-        )
-        return session
 
     engine_def: DatasourceEngine | None = None
     custom_source = False
     llm_recognised = False
-    # Saved connections are numbered after popular engines
+    # Recently used data sources are numbered after popular engines
     saved_start = len(popular_engines) + 1
-    max_num = len(popular_engines) + len(saved_entries)
+    max_num = len(popular_engines) + len(recent_engine_entries)
 
     if stripped_answer.isdigit() or (stripped_answer.lstrip("-").isdigit()):
         pick_num = int(stripped_answer)
@@ -763,11 +823,10 @@ def print_all() -> None:
             custom_source = True
         elif 1 <= pick_num <= len(popular_engines):
             engine_def = popular_engines[pick_num - 1]
-        elif saved_entries and saved_start <= pick_num <= max_num:
-            # User picked a recent connection type — start a new connection of that engine
-            picked_slug, picked_label = saved_entries[pick_num - saved_start]
-            picked_engine = picked_slug.split("-", 1)[0]
-            engine_def = registry.get(picked_engine)
+        elif recent_engine_entries and saved_start <= pick_num <= max_num:
+            # User picked a recently used data source — start a new connection of that engine
+            picked_engine_slug, _ = recent_engine_entries[pick_num - saved_start]
+            engine_def = registry.get(picked_engine_slug)
             if engine_def is None:
                 custom_source = True
         else:

From bcaca1046244c5fff82650dd9a680904509435a6 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 08:57:44 -0700
Subject: [PATCH 019/134] fixed merge drift

---
 anton/chat.py         | 7 +------
 anton/core/session.py | 4 ++++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index c88473d7..a4945f5e 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -19,7 +19,6 @@
 )
 from anton.core.session import ChatSession
 from anton.llm.provider import (
-    ContextOverflowError,
     TokenLimitExceeded,
     StreamComplete,
     StreamContextCompacted,
@@ -75,17 +74,13 @@
 from prompt_toolkit import PromptSession
 from prompt_toolkit.formatted_text import HTML
 from prompt_toolkit.styles import Style as PTStyle
-from rich.prompt import Confirm, Prompt
+from rich.prompt import Prompt
 
 if TYPE_CHECKING:
     from rich.console import Console
 
     from anton.config.settings import AntonSettings
-    from anton.context.self_awareness import SelfAwarenessContext
-    from anton.llm.client import LLMClient
-    from anton.memory.cortex import Cortex
     from anton.memory.episodes import EpisodicMemory
-    from anton.memory.history_store import HistoryStore
     from anton.workspace import Workspace
 
 
diff --git a/anton/core/session.py b/anton/core/session.py
index 4c64e836..f5020418 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -13,6 +13,7 @@
     StreamTaskProgress,
     StreamTextDelta,
     StreamToolResult,
+    TokenLimitExceeded
 )
 from anton.scratchpad import ScratchpadManager
 from anton.core.tools.registry import ToolRegistry
@@ -571,6 +572,9 @@ async def turn_stream(
                     yield event
                 break  # completed successfully
             except Exception as _agent_exc:
+                # Token/billing limit — don't retry, let the chat loop handle it
+                if isinstance(_agent_exc, TokenLimitExceeded):
+                    raise
                 _retry_count += 1
                 if _retry_count <= _max_auto_retries:
                     # Inject the error into history and let the LLM try to recover

From 154989db1bd92c7f3d72fb1b2579e593fc95935e Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 09:02:58 -0700
Subject: [PATCH 020/134] bumped version for release

---
 anton/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/__init__.py b/anton/__init__.py
index c72e3798..8c0d5d5b 100644
--- a/anton/__init__.py
+++ b/anton/__init__.py
@@ -1 +1 @@
-__version__ = "1.1.4"
+__version__ = "2.0.0"

From ca73b7d9d24f69cfac04a133b541ee417e6a9645 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 09:27:47 -0700
Subject: [PATCH 021/134] fixed broken tool dispatch

---
 anton/core/session.py        | 14 ++++++++++----
 anton/core/tools/registry.py |  7 +++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index f5020418..b6b9bf51 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -112,7 +112,7 @@ def __init__(
         self._episodic = episodic
         self._runtime_context = runtime_context
         self._proactive_dashboards = proactive_dashboards
-        self._extra_tools = tools
+        self._extra_tools = tools or []
         self._workspace = workspace
         self._console = console
         self._history: list[dict] = list(initial_history) if initial_history else []
@@ -493,7 +493,9 @@ async def turn(self, user_input: str | list[dict]) -> str:
             tool_results: list[dict] = []
             for tc in response.tool_calls:
                 try:
-                    result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input)
+                    result_text = await self.tool_registry.dispatch_tool(
+                        self, tc.name, tc.input
+                    )
                 except Exception as exc:
                     result_text = f"Tool '{tc.name}' failed: {exc}"
 
@@ -875,7 +877,9 @@ async def _stream_and_handle_tools(
                             )
                             if self._escape_watcher:
                                 self._escape_watcher.pause()
-                            result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input)
+                            result_text = await self.tool_registry.dispatch_tool(
+                                self, tc.name, tc.input
+                            )
                             if self._escape_watcher:
                                 self._escape_watcher.resume()
                             yield StreamTaskProgress(
@@ -883,7 +887,9 @@ async def _stream_and_handle_tools(
                                 message="Analyzing results...",
                             )
                         else:
-                            result_text = await self.tool_registry.dispatch_tool(tc.name, tc.input)
+                            result_text = await self.tool_registry.dispatch_tool(
+                                self, tc.name, tc.input
+                            )
                             if (
                                 tc.name == "scratchpad"
                                 and tc.input.get("action") == "dump"
diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index c3f5f69c..2579b5ff 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
+    from anton.core.session import ChatSession
     from anton.core.tools.tool_defs import ToolDef
 
 
@@ -26,14 +27,16 @@ def register_tool(self, tool_def: ToolDef) -> None:
         """
         self._tools.append(tool_def)
 
-    async def dispatch_tool(self, tool_name: str, tc_input: dict) -> str:
+    async def dispatch_tool(
+        self, session: "ChatSession", tool_name: str, tc_input: dict
+    ) -> str:
         """
         Dispatch a tool call by name. Returns result text.
         """
         tool_def = next((tool for tool in self._tools if tool.name == tool_name), None)
         if tool_def is None:
             raise ValueError(f"Tool {tool_name} not found")
-        return await tool_def.handler(tc_input)
+        return await tool_def.handler(session, tc_input)
 
     def dump(self) -> list[dict]:
         """

From 62f4aec0d1cd6771eed8d2dbfbe562b06755c316 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 09:29:21 -0700
Subject: [PATCH 022/134] fixed missing imports

---
 anton/chat.py         | 2 +-
 anton/core/session.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/anton/chat.py b/anton/chat.py
index a4945f5e..000aa3c1 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -17,7 +17,7 @@
     parse_dropped_paths as _parse_dropped_paths,
     save_clipboard_image,
 )
-from anton.core.session import ChatSession
+from anton.core.session import ChatSession, TOKEN_STATUS_CACHE_TTL
 from anton.llm.provider import (
     TokenLimitExceeded,
     StreamComplete,
diff --git a/anton/core/session.py b/anton/core/session.py
index b6b9bf51..13938158 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -36,6 +36,7 @@
     from anton.workspace import Workspace
 
 
+# TODO: Move to settings?
 _MAX_TOOL_ROUNDS = 25  # Hard limit on consecutive tool-call rounds per turn
 _MAX_CONTINUATIONS = 3  # Max times the verification loop can restart the tool loop
 _CONTEXT_PRESSURE_THRESHOLD = 0.7  # Trigger compaction when context is 70% full

From e04c2be7ffab49f348aca47ccbd5eba2bb0f27de Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 09:31:01 -0700
Subject: [PATCH 023/134] fixed unit tests

---
 tests/test_chat_context.py    |  6 ++--
 tests/test_chat_scratchpad.py | 59 +++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py
index e06f4276..ddf70b82 100644
--- a/tests/test_chat_context.py
+++ b/tests/test_chat_context.py
@@ -11,7 +11,7 @@
 from anton.chat import ChatSession, _handle_connect
 from anton.minds_client import describe_minds_connection_error
 from anton.config.settings import AntonSettings
-from anton.tools import MEMORIZE_TOOL
+from anton.core.tools.tool_defs import MEMORIZE_TOOL
 from anton.context.self_awareness import SelfAwarenessContext
 from anton.llm.provider import LLMResponse, ToolCall, Usage
 from anton.workspace import Workspace
@@ -81,8 +81,8 @@ def cortex(memory_dirs):
 
 class TestMemorizeTool:
     def test_tool_definition_structure(self):
-        assert MEMORIZE_TOOL["name"] == "memorize"
-        props = MEMORIZE_TOOL["input_schema"]["properties"]
+        assert MEMORIZE_TOOL.name == "memorize"
+        props = MEMORIZE_TOOL.input_schema["properties"]
         assert "entries" in props
 
     async def test_memorize_creates_rule(self, cortex, memory_dirs):
diff --git a/tests/test_chat_scratchpad.py b/tests/test_chat_scratchpad.py
index 45926e83..e69356da 100644
--- a/tests/test_chat_scratchpad.py
+++ b/tests/test_chat_scratchpad.py
@@ -1,14 +1,25 @@
 from __future__ import annotations
 
+from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from anton.chat import SCRATCHPAD_TOOL, ChatSession
+from anton.core.session import ChatSession
+from anton.core.tools.tool_defs import SCRATCHPAD_TOOL
 from anton.commands.session import handle_resume
 from anton.llm.provider import LLMResponse, StreamComplete, StreamToolResult, ToolCall, Usage
 
 
+@pytest.fixture()
+def workspace():
+    # Keep scratchpad venvs inside the repo workspace (pytest runs sandboxed and
+    # can't write to the real home directory).
+    base = Path(__file__).resolve().parents[1] / ".pytest-workspace"
+    base.mkdir(parents=True, exist_ok=True)
+    return MagicMock(base=base)
+
+
 def _text_response(text: str) -> LLMResponse:
     return LLMResponse(
         content=text,
@@ -39,29 +50,29 @@ def _scratchpad_response(
 
 class TestScratchpadToolDefinition:
     def test_tool_definition_structure(self):
-        assert SCRATCHPAD_TOOL["name"] == "scratchpad"
-        props = SCRATCHPAD_TOOL["input_schema"]["properties"]
+        assert SCRATCHPAD_TOOL.name == "scratchpad"
+        props = SCRATCHPAD_TOOL.input_schema["properties"]
         assert "action" in props
         assert "name" in props
         assert "code" in props
         assert "packages" in props
-        assert SCRATCHPAD_TOOL["input_schema"]["required"] == ["action", "name"]
+        assert SCRATCHPAD_TOOL.input_schema["required"] == ["action", "name"]
 
     def test_tool_has_install_action(self):
-        actions = SCRATCHPAD_TOOL["input_schema"]["properties"]["action"]["enum"]
+        actions = SCRATCHPAD_TOOL.input_schema["properties"]["action"]["enum"]
         assert "install" in actions
 
     def test_packages_property_is_array_of_strings(self):
-        packages_prop = SCRATCHPAD_TOOL["input_schema"]["properties"]["packages"]
+        packages_prop = SCRATCHPAD_TOOL.input_schema["properties"]["packages"]
         assert packages_prop["type"] == "array"
         assert packages_prop["items"] == {"type": "string"}
 
-    async def test_scratchpad_tool_in_tools(self):
+    async def test_scratchpad_tool_in_tools(self, workspace):
         """scratchpad should always be in _build_tools() output."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             await session.turn("hello")
 
@@ -74,7 +85,7 @@ async def test_scratchpad_tool_in_tools(self):
 
 
 class TestScratchpadExecViaChat:
-    async def test_scratchpad_exec_via_chat(self):
+    async def test_scratchpad_exec_via_chat(self, workspace):
         """exec action flows through and returns output."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(
@@ -84,7 +95,7 @@ async def test_scratchpad_exec_via_chat(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             reply = await session.turn("what is 7 * 6?")
 
@@ -100,7 +111,7 @@ async def test_scratchpad_exec_via_chat(self):
 
 
 class TestScratchpadViewViaChat:
-    async def test_scratchpad_view_via_chat(self):
+    async def test_scratchpad_view_via_chat(self, workspace):
         """view action returns cell history."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(
@@ -111,7 +122,7 @@ async def test_scratchpad_view_via_chat(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             await session.turn("run and show")
 
@@ -129,7 +140,7 @@ async def test_scratchpad_view_via_chat(self):
 
 
 class TestScratchpadRemoveViaChat:
-    async def test_scratchpad_remove_via_chat(self):
+    async def test_scratchpad_remove_via_chat(self, workspace):
         """remove action cleans up the scratchpad."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(
@@ -140,7 +151,7 @@ async def test_scratchpad_remove_via_chat(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             await session.turn("create and remove")
 
@@ -155,7 +166,7 @@ async def test_scratchpad_remove_via_chat(self):
 
 
 class TestScratchpadDumpViaChat:
-    async def test_scratchpad_dump_via_chat(self):
+    async def test_scratchpad_dump_via_chat(self, workspace):
         """dump action flows through chat, returns markdown with code fences."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(
@@ -169,7 +180,7 @@ async def test_scratchpad_dump_via_chat(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             await session.turn("show me my work")
 
@@ -202,7 +213,7 @@ async def __anext__(self):
 
 
 class TestScratchpadDumpStreaming:
-    async def test_scratchpad_dump_streams_tool_result(self):
+    async def test_scratchpad_dump_streams_tool_result(self, workspace):
         """dump action yields a StreamToolResult for display, but sends a short
         summary back to the LLM to avoid it parroting the full notebook."""
         mock_llm = AsyncMock()
@@ -231,7 +242,7 @@ def fake_plan_stream(**kwargs):
 
         mock_llm.plan_stream = fake_plan_stream
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             events = []
             async for event in session.turn_stream("show work"):
@@ -255,7 +266,7 @@ def fake_plan_stream(**kwargs):
 
 
 class TestScratchpadStreaming:
-    async def test_scratchpad_in_streaming_path(self):
+    async def test_scratchpad_in_streaming_path(self, workspace):
         """scratchpad exec should work in turn_stream() too."""
         tool_response = _scratchpad_response("Computing.", "exec", "s", "print(99)")
         final_response = _text_response("Got 99.")
@@ -274,7 +285,7 @@ def fake_plan_stream(**kwargs):
 
         mock_llm.plan_stream = fake_plan_stream
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             events = []
             async for event in session.turn_stream("compute 99"):
@@ -294,7 +305,7 @@ def fake_plan_stream(**kwargs):
 
 
 class TestScratchpadInstallViaChat:
-    async def test_install_action_dispatch(self):
+    async def test_install_action_dispatch(self, workspace):
         """install action flows through chat and returns pip output."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(
@@ -306,7 +317,7 @@ async def test_install_action_dispatch(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             reply = await session.turn("install cowsay")
 
@@ -320,7 +331,7 @@ async def test_install_action_dispatch(self):
         finally:
             await session.close()
 
-    async def test_install_empty_packages_via_chat(self):
+    async def test_install_empty_packages_via_chat(self, workspace):
         """install with no packages returns a message without crashing."""
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(
@@ -330,7 +341,7 @@ async def test_install_empty_packages_via_chat(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(mock_llm, workspace=workspace)
         try:
             await session.turn("install nothing")
 

From fd5b05254024aacc4de676d2fb6398c68411fac2 Mon Sep 17 00:00:00 2001
From: Konstantin Sivakov <konstantin.sivakov@gmail.com>
Date: Wed, 8 Apr 2026 18:47:19 +0200
Subject: [PATCH 024/134] Mark stub-only the failing test

---
 tests/e2e/scenarios/test_error_handling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/scenarios/test_error_handling.py b/tests/e2e/scenarios/test_error_handling.py
index 922406ae..731d19f5 100644
--- a/tests/e2e/scenarios/test_error_handling.py
+++ b/tests/e2e/scenarios/test_error_handling.py
@@ -78,6 +78,7 @@ def log_message(self, *_): pass
     assert_not_output(result, "Traceback (most recent call last)")
 
 
+@pytest.mark.stub_only
 def test_large_input_no_crash(cfg, stub, tmp_path):
     stub.queue_text("Got your big message.")
     stub.queue_verification_ok()

From 2b4d2318f477f57d7f7d08d3ca1c933998bbc8b5 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 8 Apr 2026 11:04:29 -0700
Subject: [PATCH 025/134] calling publish tool

---
 anton/tools.py | 33 +++++++--------------------------
 1 file changed, 7 insertions(+), 26 deletions(-)

diff --git a/anton/tools.py b/anton/tools.py
index 11e0fa54..94cb828d 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -130,39 +130,20 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str
     # Publish flow
     from anton.config.settings import AntonSettings
     from anton.publisher import publish
-    from anton.utils.prompt import prompt_or_cancel
 
     settings = AntonSettings()
 
     if not settings.minds_api_key:
+        console.print()
         console.print("  [anton.muted]To publish you need a free Minds account.[/]")
+        console.print("  [anton.muted]Run [bold]/publish[/bold] to set up your API key and publish.[/]")
         console.print()
-        has_key = await prompt_or_cancel(
-            "  (anton) Do you have an mdb.ai API key?",
-            choices=["y", "n"],
-            choices_display="y/n",
-            default="y",
+        return (
+            "STOP: No Minds API key configured. Do NOT call this tool again. "
+            "Tell the user to run the /publish command to set up their mdb.ai API key "
+            "and publish their dashboard. The /publish command handles the interactive "
+            "API key setup flow."
         )
-        if has_key is None:
-            console.print()
-            return "User cancelled publish."
-        if has_key == "n":
-            webbrowser.open(
-                "https://mdb.ai/auth/realms/mindsdb/protocol/openid-connect/registrations"
-                "?client_id=public-client&response_type=code&scope=openid"
-                "&redirect_uri=https%3A%2F%2Fmdb.ai"
-            )
-            console.print()
-
-        api_key = await prompt_or_cancel("  (anton) API key", password=True)
-        if api_key is None or not api_key.strip():
-            console.print()
-            return "User cancelled publish."
-        api_key = api_key.strip()
-        settings.minds_api_key = api_key
-        if session._workspace:
-            session._workspace.set_secret("ANTON_MINDS_API_KEY", api_key)
-        console.print()
 
     from rich.live import Live
     from rich.spinner import Spinner

From 9ffbc4b126526222757cf2964b5068dcb0a5da9e Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 11:57:35 -0700
Subject: [PATCH 026/134] copied llm to core

---
 anton/core/llm/anthropic.py | 196 +++++++++++++++++
 anton/core/llm/client.py    | 112 ++++++++++
 anton/core/llm/openai.py    | 394 ++++++++++++++++++++++++++++++++++
 anton/core/llm/prompts.py   | 415 ++++++++++++++++++++++++++++++++++++
 anton/core/llm/provider.py  | 165 ++++++++++++++
 5 files changed, 1282 insertions(+)
 create mode 100644 anton/core/llm/anthropic.py
 create mode 100644 anton/core/llm/client.py
 create mode 100644 anton/core/llm/openai.py
 create mode 100644 anton/core/llm/prompts.py
 create mode 100644 anton/core/llm/provider.py

diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
new file mode 100644
index 00000000..c940b4e5
--- /dev/null
+++ b/anton/core/llm/anthropic.py
@@ -0,0 +1,196 @@
+from __future__ import annotations
+
+import json
+from collections.abc import AsyncIterator
+
+import anthropic
+
+from .provider import (
+    ContextOverflowError,
+    LLMProvider,
+    LLMResponse,
+    StreamComplete,
+    StreamEvent,
+    StreamTextDelta,
+    StreamToolUseDelta,
+    StreamToolUseEnd,
+    StreamToolUseStart,
+    ToolCall,
+    Usage,
+    compute_context_pressure,
+)
+
+
+class AnthropicProvider(LLMProvider):
+    def __init__(self, api_key: str | None = None) -> None:
+        kwargs = {}
+        if api_key:
+            kwargs["api_key"] = api_key
+        self._client = anthropic.AsyncAnthropic(**kwargs)
+
+    async def complete(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        tool_choice: dict | None = None,
+        max_tokens: int = 4096,
+    ) -> LLMResponse:
+        kwargs: dict = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "system": system,
+            "messages": messages,
+        }
+        if tools:
+            kwargs["tools"] = tools
+        if tool_choice:
+            kwargs["tool_choice"] = tool_choice
+
+        try:
+            response = await self._client.messages.create(**kwargs)
+        except anthropic.BadRequestError as exc:
+            msg = str(exc).lower()
+            if "prompt is too long" in msg or "context limit" in msg:
+                raise ContextOverflowError(str(exc)) from exc
+            raise
+        except anthropic.APIStatusError as exc:
+            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+                msg = f"Server returned 429 — {exc.body['detail']}"
+                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                from anton.llm.provider import TokenLimitExceeded
+                raise TokenLimitExceeded(msg) from exc
+            else:
+                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
+            raise ConnectionError(msg) from exc
+        except anthropic.APIConnectionError as exc:
+            raise ConnectionError(
+                "Could not reach the LLM server — check your connection or try again in a moment."
+            ) from exc
+
+        content_text = ""
+        tool_calls: list[ToolCall] = []
+
+        for block in response.content:
+            if block.type == "text":
+                content_text += block.text
+            elif block.type == "tool_use":
+                tool_calls.append(
+                    ToolCall(id=block.id, name=block.name, input=block.input)
+                )
+
+        input_tokens = response.usage.input_tokens
+        return LLMResponse(
+            content=content_text,
+            tool_calls=tool_calls,
+            usage=Usage(
+                input_tokens=input_tokens,
+                output_tokens=response.usage.output_tokens,
+                context_pressure=compute_context_pressure(model, input_tokens),
+            ),
+            stop_reason=response.stop_reason,
+        )
+
+    async def stream(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        kwargs: dict = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "system": system,
+            "messages": messages,
+        }
+        if tools:
+            kwargs["tools"] = tools
+
+        content_text = ""
+        tool_calls: list[ToolCall] = []
+        input_tokens = 0
+        output_tokens = 0
+        stop_reason: str | None = None
+
+        # Track content blocks by index for tool correlation
+        blocks: dict[int, dict] = {}
+
+        try:
+            async with self._client.messages.stream(**kwargs) as stream:
+                async for event in stream:
+                    if event.type == "message_start":
+                        usage = event.message.usage
+                        input_tokens = usage.input_tokens
+                        output_tokens = getattr(usage, "output_tokens", 0)
+
+                    elif event.type == "content_block_start":
+                        idx = event.index
+                        block = event.content_block
+                        if block.type == "tool_use":
+                            blocks[idx] = {"type": "tool_use", "id": block.id, "name": block.name, "json_parts": []}
+                            yield StreamToolUseStart(id=block.id, name=block.name)
+                        else:
+                            blocks[idx] = {"type": "text"}
+
+                    elif event.type == "content_block_delta":
+                        idx = event.index
+                        delta = event.delta
+                        if delta.type == "text_delta":
+                            content_text += delta.text
+                            yield StreamTextDelta(text=delta.text)
+                        elif delta.type == "input_json_delta":
+                            info = blocks.get(idx, {})
+                            if info.get("type") == "tool_use":
+                                info["json_parts"].append(delta.partial_json)
+                                yield StreamToolUseDelta(id=info["id"], json_delta=delta.partial_json)
+
+                    elif event.type == "content_block_stop":
+                        idx = event.index
+                        info = blocks.get(idx, {})
+                        if info.get("type") == "tool_use":
+                            raw_json = "".join(info["json_parts"])
+                            parsed_input = json.loads(raw_json) if raw_json else {}
+                            tool_calls.append(
+                                ToolCall(id=info["id"], name=info["name"], input=parsed_input)
+                            )
+                            yield StreamToolUseEnd(id=info["id"])
+
+                    elif event.type == "message_delta":
+                        stop_reason = event.delta.stop_reason
+                        output_tokens = event.usage.output_tokens
+        except anthropic.BadRequestError as exc:
+            msg = str(exc).lower()
+            if "prompt is too long" in msg or "context limit" in msg:
+                raise ContextOverflowError(str(exc)) from exc
+            raise
+        except anthropic.APIStatusError as exc:
+            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+                msg = f"Server returned 429 — {exc.body['detail']}"
+                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                from anton.llm.provider import TokenLimitExceeded
+                raise TokenLimitExceeded(msg) from exc
+            else:
+                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
+            raise ConnectionError(msg) from exc
+        except anthropic.APIConnectionError as exc:
+            raise ConnectionError(
+                "Could not reach the LLM server — check your connection or try again in a moment."
+            ) from exc
+
+        yield StreamComplete(
+            response=LLMResponse(
+                content=content_text,
+                tool_calls=tool_calls,
+                usage=Usage(
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    context_pressure=compute_context_pressure(model, input_tokens),
+                ),
+                stop_reason=stop_reason,
+            )
+        )
diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py
new file mode 100644
index 00000000..8a773608
--- /dev/null
+++ b/anton/core/llm/client.py
@@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from typing import TYPE_CHECKING
+
+from .provider import LLMProvider, LLMResponse, StreamEvent
+
+if TYPE_CHECKING:
+    from anton.config.settings import AntonSettings
+
+
+class LLMClient:
+    def __init__(
+        self,
+        *,
+        planning_provider: LLMProvider,
+        planning_model: str,
+        coding_provider: LLMProvider,
+        coding_model: str,
+        max_tokens: int = 8192,
+    ) -> None:
+        self._planning_provider = planning_provider
+        self._planning_model = planning_model
+        self._coding_provider = coding_provider
+        self._coding_model = coding_model
+        self._max_tokens = max_tokens
+
+    async def plan(
+        self,
+        *,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        max_tokens: int | None = None,
+    ) -> LLMResponse:
+        return await self._planning_provider.complete(
+            model=self._planning_model,
+            system=system,
+            messages=messages,
+            tools=tools,
+            max_tokens=max_tokens or self._max_tokens,
+        )
+
+    async def plan_stream(
+        self,
+        *,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        max_tokens: int | None = None,
+    ) -> AsyncIterator[StreamEvent]:
+        async for event in self._planning_provider.stream(
+            model=self._planning_model,
+            system=system,
+            messages=messages,
+            tools=tools,
+            max_tokens=max_tokens or self._max_tokens,
+        ):
+            yield event
+
+    @property
+    def coding_provider(self) -> LLMProvider:
+        """The LLM provider used for coding/skill execution."""
+        return self._coding_provider
+
+    @property
+    def coding_model(self) -> str:
+        """The model name used for coding/skill execution."""
+        return self._coding_model
+
+    async def code(
+        self,
+        *,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        max_tokens: int | None = None,
+    ) -> LLMResponse:
+        return await self._coding_provider.complete(
+            model=self._coding_model,
+            system=system,
+            messages=messages,
+            tools=tools,
+            max_tokens=max_tokens or self._max_tokens,
+        )
+
+    @classmethod
+    def from_settings(cls, settings: AntonSettings) -> LLMClient:
+        from anton.llm.anthropic import AnthropicProvider
+        from anton.llm.openai import OpenAIProvider
+
+        providers = {
+            "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key),
+            "openai": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify),
+            "openai-compatible": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify),
+        }
+
+        planning_factory = providers.get(settings.planning_provider)
+        coding_factory = providers.get(settings.coding_provider)
+
+        if planning_factory is None:
+            raise ValueError(f"Unknown planning provider: {settings.planning_provider}")
+        if coding_factory is None:
+            raise ValueError(f"Unknown coding provider: {settings.coding_provider}")
+
+        return cls(
+            planning_provider=planning_factory(),
+            planning_model=settings.planning_model,
+            coding_provider=coding_factory(),
+            coding_model=settings.coding_model,
+            max_tokens=getattr(settings, "max_tokens", 8192),
+        )
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
new file mode 100644
index 00000000..26236faf
--- /dev/null
+++ b/anton/core/llm/openai.py
@@ -0,0 +1,394 @@
+from __future__ import annotations
+
+import json
+from collections.abc import AsyncIterator
+
+import openai
+
+from .provider import (
+    ContextOverflowError,
+    LLMProvider,
+    LLMResponse,
+    StreamComplete,
+    StreamEvent,
+    StreamTextDelta,
+    StreamToolUseDelta,
+    StreamToolUseEnd,
+    StreamToolUseStart,
+    ToolCall,
+    Usage,
+    compute_context_pressure,
+)
+
+
+def _translate_tools(tools: list[dict]) -> list[dict]:
+    """Anthropic tool format -> OpenAI function-calling format."""
+    result = []
+    for tool in tools:
+        result.append({
+            "type": "function",
+            "function": {
+                "name": tool["name"],
+                "description": tool.get("description", ""),
+                "parameters": tool.get("input_schema", {}),
+            },
+        })
+    return result
+
+
+def _translate_tool_choice(tool_choice: dict) -> dict | str:
+    """Anthropic tool_choice -> OpenAI tool_choice."""
+    tc_type = tool_choice.get("type")
+    if tc_type == "tool":
+        return {"type": "function", "function": {"name": tool_choice["name"]}}
+    if tc_type == "any":
+        return "required"
+    if tc_type == "auto":
+        return "auto"
+    return "auto"
+
+
+def _translate_messages(system: str, messages: list[dict]) -> list[dict]:
+    """Convert Anthropic-style messages to OpenAI chat format.
+
+    Handles:
+    - system prompt -> {"role": "system", ...}
+    - plain text messages pass through
+    - assistant messages with tool_use content blocks -> tool_calls array
+    - user messages with tool_result content blocks -> role:tool messages
+    """
+    result: list[dict] = []
+    if system:
+        result.append({"role": "system", "content": system})
+
+    for msg in messages:
+        role = msg["role"]
+        content = msg.get("content")
+
+        # Plain string content — pass through
+        if isinstance(content, str):
+            result.append({"role": role, "content": content})
+            continue
+
+        # Content is a list of blocks (Anthropic format)
+        if isinstance(content, list):
+            if role == "assistant":
+                result.extend(_translate_assistant_blocks(content))
+            elif role == "user":
+                result.extend(_translate_user_blocks(content))
+            else:
+                # Fallback: join text blocks
+                text = " ".join(
+                    b.get("text", "") for b in content if b.get("type") == "text"
+                )
+                result.append({"role": role, "content": text or ""})
+            continue
+
+        # Fallback
+        result.append({"role": role, "content": str(content) if content else ""})
+
+    return result
+
+
+def _translate_assistant_blocks(blocks: list[dict]) -> list[dict]:
+    """Convert assistant content blocks to OpenAI message(s)."""
+    text_parts: list[str] = []
+    tool_calls: list[dict] = []
+
+    for block in blocks:
+        if block.get("type") == "text":
+            text_parts.append(block["text"])
+        elif block.get("type") == "tool_use":
+            tool_calls.append({
+                "id": block["id"],
+                "type": "function",
+                "function": {
+                    "name": block["name"],
+                    "arguments": json.dumps(block.get("input", {})),
+                },
+            })
+
+    msg: dict = {"role": "assistant"}
+    content = "\n".join(text_parts) if text_parts else None
+    msg["content"] = content
+    if tool_calls:
+        msg["tool_calls"] = tool_calls
+    return [msg]
+
+
+def _translate_user_blocks(blocks: list[dict]) -> list[dict]:
+    """Convert user content blocks (including tool_result and image) to OpenAI messages."""
+    result: list[dict] = []
+    content_parts: list[dict] = []  # Accumulates text + image_url blocks
+
+    for block in blocks:
+        if block.get("type") == "tool_result":
+            # Flush any accumulated content parts first
+            if content_parts:
+                result.append({"role": "user", "content": content_parts})
+                content_parts = []
+            # tool_result -> role:tool message
+            content = block.get("content", "")
+            if isinstance(content, list):
+                content = "\n".join(
+                    b.get("text", "") for b in content if b.get("type") == "text"
+                )
+            result.append({
+                "role": "tool",
+                "tool_call_id": block["tool_use_id"],
+                "content": str(content),
+            })
+        elif block.get("type") == "text":
+            content_parts.append({"type": "text", "text": block.get("text", "")})
+        elif block.get("type") == "image":
+            # Anthropic image block -> OpenAI image_url block
+            source = block.get("source", {})
+            if source.get("type") == "base64":
+                media_type = source.get("media_type", "image/png")
+                data = source.get("data", "")
+                content_parts.append({
+                    "type": "image_url",
+                    "image_url": {"url": f"data:{media_type};base64,{data}"},
+                })
+
+    if content_parts:
+        # If only text parts, flatten to a simple string for compatibility
+        if all(p.get("type") == "text" for p in content_parts):
+            result.append({
+                "role": "user",
+                "content": "\n".join(p["text"] for p in content_parts),
+            })
+        else:
+            result.append({"role": "user", "content": content_parts})
+
+    return result
+
+
+def build_chat_completion_kwargs(
+    *,
+    model: str,
+    messages: list[dict],
+    max_tokens: int,
+    stream: bool = False,
+) -> dict:
+    """Build chat.completions kwargs using modern OpenAI parameter names."""
+    kwargs: dict = {
+        "model": model,
+        "messages": messages,
+        "max_completion_tokens": max_tokens,
+    }
+    if stream:
+        kwargs["stream"] = True
+        kwargs["stream_options"] = {"include_usage": True}
+    return kwargs
+
+
+class OpenAIProvider(LLMProvider):
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        ssl_verify: bool = True,
+    ) -> None:
+        import httpx
+
+        kwargs = {}
+        if api_key:
+            kwargs["api_key"] = api_key
+        if base_url:
+            kwargs["base_url"] = base_url
+        if not ssl_verify:
+            kwargs["http_client"] = httpx.AsyncClient(verify=False)
+        self._client = openai.AsyncOpenAI(**kwargs)
+
+    async def complete(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        tool_choice: dict | None = None,
+        max_tokens: int = 4096,
+    ) -> LLMResponse:
+        oai_messages = _translate_messages(system, messages)
+
+        kwargs = build_chat_completion_kwargs(
+            model=model,
+            messages=oai_messages,
+            max_tokens=max_tokens,
+        )
+        if tools:
+            kwargs["tools"] = _translate_tools(tools)
+        if tool_choice:
+            kwargs["tool_choice"] = _translate_tool_choice(tool_choice)
+
+        try:
+            response = await self._client.chat.completions.create(**kwargs)
+        except openai.BadRequestError as exc:
+            msg = str(exc).lower()
+            if "context_length_exceeded" in msg or "maximum context length" in msg:
+                raise ContextOverflowError(str(exc)) from exc
+            raise
+        except openai.APIStatusError as exc:
+            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+                msg = f"Server returned 429 — {exc.body['detail']}"
+                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                from anton.llm.provider import TokenLimitExceeded
+                raise TokenLimitExceeded(msg) from exc
+            else:
+                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
+            raise ConnectionError(msg) from exc
+        except openai.APIConnectionError as exc:
+            raise ConnectionError(
+                "Could not reach the LLM server — check your connection or try again in a moment."
+            ) from exc
+
+        choice = response.choices[0]
+        message = choice.message
+
+        content_text = message.content or ""
+        tool_calls: list[ToolCall] = []
+
+        if message.tool_calls:
+            for tc in message.tool_calls:
+                tool_calls.append(
+                    ToolCall(
+                        id=tc.id,
+                        name=tc.function.name,
+                        input=json.loads(tc.function.arguments) if tc.function.arguments else {},
+                    )
+                )
+
+        usage_obj = response.usage
+        input_tokens = usage_obj.prompt_tokens if usage_obj else 0
+        return LLMResponse(
+            content=content_text,
+            tool_calls=tool_calls,
+            usage=Usage(
+                input_tokens=input_tokens,
+                output_tokens=usage_obj.completion_tokens if usage_obj else 0,
+                context_pressure=compute_context_pressure(model, input_tokens),
+            ),
+            stop_reason=choice.finish_reason,
+        )
+
+    async def stream(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        oai_messages = _translate_messages(system, messages)
+
+        kwargs = build_chat_completion_kwargs(
+            model=model,
+            messages=oai_messages,
+            max_tokens=max_tokens,
+            stream=True,
+        )
+        if tools:
+            kwargs["tools"] = _translate_tools(tools)
+
+        content_text = ""
+        tool_calls: list[ToolCall] = []
+        input_tokens = 0
+        output_tokens = 0
+        stop_reason: str | None = None
+
+        # Track tool call deltas by index
+        tc_state: dict[int, dict] = {}
+
+        try:
+            stream = await self._client.chat.completions.create(**kwargs)
+            async for chunk in stream:
+                if chunk.usage:
+                    input_tokens = chunk.usage.prompt_tokens
+                    output_tokens = chunk.usage.completion_tokens
+
+                if not chunk.choices:
+                    continue
+
+                delta = chunk.choices[0].delta
+                finish = chunk.choices[0].finish_reason
+
+                if finish:
+                    stop_reason = finish
+
+                # Text content
+                if delta.content:
+                    content_text += delta.content
+                    yield StreamTextDelta(text=delta.content)
+
+                # Tool call deltas
+                if delta.tool_calls:
+                    for tc_delta in delta.tool_calls:
+                        idx = tc_delta.index
+                        if idx not in tc_state:
+                            # New tool call
+                            tc_state[idx] = {
+                                "id": tc_delta.id or "",
+                                "name": tc_delta.function.name if tc_delta.function and tc_delta.function.name else "",
+                                "args_parts": [],
+                            }
+                            if tc_state[idx]["id"] and tc_state[idx]["name"]:
+                                yield StreamToolUseStart(
+                                    id=tc_state[idx]["id"],
+                                    name=tc_state[idx]["name"],
+                                )
+                        else:
+                            # Update id/name if provided in later chunks
+                            if tc_delta.id:
+                                tc_state[idx]["id"] = tc_delta.id
+                            if tc_delta.function and tc_delta.function.name:
+                                tc_state[idx]["name"] = tc_delta.function.name
+
+                        # Accumulate argument fragments
+                        if tc_delta.function and tc_delta.function.arguments:
+                            tc_state[idx]["args_parts"].append(tc_delta.function.arguments)
+                            yield StreamToolUseDelta(
+                                id=tc_state[idx]["id"],
+                                json_delta=tc_delta.function.arguments,
+                            )
+        except openai.BadRequestError as exc:
+            msg = str(exc).lower()
+            if "context_length_exceeded" in msg or "maximum context length" in msg:
+                raise ContextOverflowError(str(exc)) from exc
+            raise
+        except openai.APIStatusError as exc:
+            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+                msg = f"Server returned 429 — {exc.body['detail']}"
+                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
+                from anton.llm.provider import TokenLimitExceeded
+                raise TokenLimitExceeded(msg) from exc
+            else:
+                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
+            raise ConnectionError(msg) from exc
+        except openai.APIConnectionError as exc:
+            raise ConnectionError(
+                "Could not reach the LLM server — check your connection or try again in a moment."
+            ) from exc
+
+        # Finalize tool calls
+        for idx in sorted(tc_state):
+            info = tc_state[idx]
+            raw_json = "".join(info["args_parts"])
+            parsed = json.loads(raw_json) if raw_json else {}
+            tool_calls.append(ToolCall(id=info["id"], name=info["name"], input=parsed))
+            yield StreamToolUseEnd(id=info["id"])
+
+        yield StreamComplete(
+            response=LLMResponse(
+                content=content_text,
+                tool_calls=tool_calls,
+                usage=Usage(
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    context_pressure=compute_context_pressure(model, input_tokens),
+                ),
+                stop_reason=stop_reason,
+            )
+        )
diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
new file mode 100644
index 00000000..cce3ea9f
--- /dev/null
+++ b/anton/core/llm/prompts.py
@@ -0,0 +1,415 @@
+LEARNING_EXTRACT_PROMPT = """\
+Analyze this task execution and extract reusable learnings.
+For each learning, provide:
+- topic: short snake_case category name
+- content: the learning detail (1-3 sentences)
+- summary: one-line summary for indexing
+
+Return a JSON array. If no meaningful learnings, return [].
+
+Example output:
+[{"topic": "file_operations", "content": "Always check if a file exists before reading.", "summary": "Check file existence before reads"}]
+"""
+
+CHAT_SYSTEM_PROMPT = """\
+You are Anton — a self-evolving autonomous system that collaborates with people to \
+solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \
+computer, and you use that computer to get things done.
+
+Current date and time: {current_datetime}
+
+WHO YOU ARE:
+- You solve problems — not just write code. If someone needs emails classified, data \
+analyzed, a server monitored, or a workflow automated, you figure out how.
+- You learn and evolve. Every task teaches you something. You remember what worked, \
+what didn't, and get better over time. Your memory is local to this workspace.
+- You collaborate. You think alongside the user, ask smart questions, and work through \
+problems together — not just take orders.
+
+YOUR CAPABILITIES:
+- **Internet access**: You DO have access to the internet via the scratchpad. You can \
+fetch data from APIs, scrape websites, download files, and pull live data. Always use \
+the scratchpad for any internet access — requests, urllib, yfinance, etc.
+- **Scratchpad execution**: Give you a problem, you break it down and execute it \
+step by step — reading files, running commands, writing code, searching codebases. \
+The scratchpad is your primary execution engine — it has its own isolated environment \
+and can install packages on the fly.
+- **Persistent memory**: You have a brain-inspired memory system with rules (always/never/when), \
+lessons (facts), and identity (profile). Memories persist across sessions at both global \
+(~/.anton/memory/) and project (<workspace>/.anton/memory/) scopes.
+- **Self-awareness**: You can learn and persist facts about the project, the user's \
+preferences, and conventions via the memorize tool — so you don't start from \
+scratch every session.
+- **Episodic memory**: Searchable archive of past conversations. \
+Use the recall tool only when the user explicitly references a previous session \
+or conversation (e.g. "what did we discuss last time?"). For questions about \
+code, files, or data in the workspace, use the scratchpad instead.
+
+INTERNET & LIVE INFORMATION:
+- You have FULL internet access via the scratchpad. When the user asks about \
+current events, news, speeches, live data, or anything that requires up-to-date \
+information — USE THE SCRATCHPAD to fetch it. Do NOT say you can't access the \
+internet or live information.
+- For news and current events: use the scratchpad to fetch from news sites \
+(Reuters, AP News, CNN, BBC, etc.), search APIs, or scrape relevant pages. \
+Use requests + BeautifulSoup, or any other approach that works.
+- For financial data: use yfinance, requests to financial APIs, etc.
+- For any URL the user provides: fetch it directly with requests.
+- Think about WHICH sites are likely to have the information. You have vast \
+knowledge about what websites contain what kind of data — use that knowledge \
+to pick the right source, then fetch and parse it in the scratchpad.
+- If the first source doesn't work, try alternatives. Don't give up after one \
+attempt — try 2-3 different approaches before telling the user it's unavailable.
+
+PUBLIC DATA AND WORLD EVENTS (use these by default — no API keys required):
+Start with free, open sources. Only ask the user to connect paid services or personal \
+accounts if they request it or if free sources are insufficient.
+
+News & current events (via RSS — use feedparser):
+- Google News RSS: `https://news.google.com/rss/search?q={{query}}&hl={{lang}}&gl={{country}}` \
+— any topic, any country. Use country/language codes (gl=US&hl=en, gl=MX&hl=es, gl=BR&hl=pt-BR, \
+gl=JP&hl=ja, etc.). This is your primary news source.
+- Reuters: `https://www.rss.reuters.com/news/` (world, business, tech sections)
+- AP News: `https://rsshub.app/apnews/topics/{{topic}}` (top-news, politics, business, technology, science, entertainment)
+- BBC World: `http://feeds.bbci.co.uk/news/rss.xml` (also /world, /business, /technology)
+- NPR: `https://feeds.npr.org/1001/rss.xml` (news), `1006/rss.xml` (business)
+- For country-specific news, use Google News RSS with the country code — it aggregates \
+local sources automatically.
+- Parse feeds with `feedparser`: title, link, published date, summary. \
+Store as a list of dicts for dashboard integration.
+
+Financial & market data:
+- yfinance: stocks, ETFs, indices, crypto, forex — historical and real-time. \
+Use tickers like ^GSPC (S&P 500), ^DJI (Dow), ^IXIC (Nasdaq), BTC-USD, etc.
+- FRED (Federal Reserve): `https://fred.stlouisfed.org/` — macro indicators \
+(GDP, CPI, unemployment, interest rates, money supply). Use fredapi package \
+with free API key, or fetch CSV directly: \
+`https://fred.stlouisfed.org/graph/fredgraph.csv?id={{series_id}}` (no key needed for CSV).
+- CoinGecko: `https://api.coingecko.com/api/v3/` — crypto prices, market cap, \
+volume, trending coins. Free, no key.
+
+Economic & global data:
+- World Bank: `https://api.worldbank.org/v2/country/{{code}}/indicator/{{indicator}}?format=json` \
+— GDP, population, poverty, education, health by country. Free, no key.
+- OECD: `https://sdmx.oecd.org/public/rest/data/` — economic indicators for OECD countries.
+- Open Exchange Rates: `https://open.er-api.com/v6/latest/{{base}}` — free forex rates.
+
+Social & sentiment:
+- Reddit JSON: `https://www.reddit.com/r/{{subreddit}}/.json` — add .json to any \
+Reddit URL for structured data. Good for sentiment on specific topics.
+- HackerNews: `https://hacker-news.firebaseio.com/v0/` — tech news, top/new/best stories.
+
+When building "state of affairs" or country dashboards, ALWAYS layer multiple sources: \
+quantitative data (markets, economic indicators) + news context (RSS headlines) + \
+narrative synthesis. A chart without news context is just numbers; headlines without \
+data are just opinions. Combine them.
+
+PROACTIVE FOLLOW-UP SUGGESTIONS:
+After completing analysis on public datasets, think about whether the user's own data \
+could complement the analysis. If there's a natural personal data extension, offer it \
+in ONE sentence at the end of your response. Examples:
+- After stock/market analysis → "If you'd like, I can analyze your portfolio against \
+these benchmarks."
+- After economic/industry analysis → "I can also pull in your company's data to see \
+how you compare."
+- After email or communication analysis → "Want me to cross-reference this with your \
+calendar or contacts?"
+- After crypto analysis → "I can connect to your exchange if you want to see your \
+holdings in this context."
+Keep it brief, helpful, not pushy. Don't repeat the offer if the user ignores it. \
+Don't suggest personal data analysis if the user's question is purely informational \
+with no personal angle.
+
+CONTENT SHARING POLICY:
+- Publishing dashboards or reports to the web is done ONLY via the `publish_or_preview` tool. \
+Do NOT upload, post, or share generated files (HTML, data, images) to external hosting \
+services (paste sites, gists, CDNs, file hosts) via scratchpad code — unless the user \
+explicitly names the service and confirms. Reading from public APIs and writing to the \
+user's connected datasources (databases, CRMs, etc.) is fine — this rule only applies to \
+sharing generated output with the public internet.
+
+SCRATCHPAD:
+- Use the scratchpad for computation, data analysis, web scraping, plotting, file I/O, \
+shell commands, and anything that needs precise execution.
+- Each scratchpad has its own isolated environment — use the install action to add \
+libraries on the fly.
+- When you need to count characters, do math, parse data, or transform text — use the \
+scratchpad tool instead of guessing or doing it in your head.
+- Variables, imports, and data persist across cells — like a notebook you drive \
+programmatically. Use this for both quick one-off calculations and multi-step analysis.
+- get_llm() returns a pre-configured LLM client — use llm.complete(system=..., messages=[...]) \
+for AI-powered computation within scratchpad code. The call is synchronous.
+- llm.generate_object(MyModel, system=..., messages=[...]) extracts structured data into \
+Pydantic models. Define a class with BaseModel, and the LLM fills it. Supports list[Model] too.
+- agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) runs an LLM \
+tool-call loop inside scratchpad code. The LLM reasons and calls your tools iteratively. \
+handle_tool(name, inputs) is a plain sync function returning a string result. Use this for \
+multi-step AI workflows like classification, extraction, or analysis with structured outputs.
+- All .anton/.env variables are available as environment variables (os.environ).
+- Connected data source credentials are injected as namespaced environment \
+variables in the form DS_<ENGINE_NAME>__<FIELD> \
+(e.g. DS_POSTGRES_PROD_DB__HOST, DS_POSTGRES_PROD_DB__PASSWORD, \
+DS_HUBSPOT_MAIN__ACCESS_TOKEN). Use those variables directly in scratchpad \
+code and never read ~/.anton/data_vault/ files directly.
+- Flat variables like DS_HOST or DS_PASSWORD are used only temporarily \
+during internal connection test snippets. Do not assume they exist during \
+normal chat/runtime execution.
+- When the user asks how you solved something or wants to see your work, use the scratchpad \
+dump action — it shows a clean notebook-style summary without wasting tokens on reformatting.
+- Always use print() to produce output — scratchpad captures stdout.
+- IMPORTANT: The scratchpad starts with a clean namespace — nothing is pre-imported. \
+Always include all necessary imports at the top of each cell that uses them. \
+Re-importing is a no-op in Python so there is zero cost, and it guarantees the cell \
+works even if earlier cells failed or state was lost.
+- IMPORTANT: Each cell has a hard timeout of 120 seconds. If exceeded, the process is \
+killed and ALL state (variables, imports, data) is lost. For every exec call, provide \
+one_line_description and estimated_execution_time_seconds (integer). If your estimate \
+exceeds 90 seconds, you MUST break the work into smaller cells. Prefer vectorized \
+operations, batch I/O, and focused cells that do one thing well.
+- Host Python packages are available by default. Use the scratchpad install action to \
+add more — installed packages persist across resets.
+
+FILE ATTACHMENTS:
+- Users can drag files or paste clipboard images. These appear as <file path="..."> tags.
+- For binary files (images, PDFs), use the scratchpad to read and process them.
+- Clipboard images are saved to .anton/uploads/ — open with Pillow, OpenCV, etc.
+
+{visualizations_section}
+
+CONVERSATION DISCIPLINE (critical):
+- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
+and then act in the same turn — that skips the user's answer.
+- Only act when you have ALL the information you need. If you're unsure \
+about anything, ask first, then act in a LATER turn after receiving the answer.
+- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
+it in context of what you just asked. Do not ask them to repeat themselves.
+- Gather requirements incrementally through conversation. Do not front-load every \
+possible question at once — ask 1-3 at a time, then follow up.
+
+RUNTIME IDENTITY:
+{runtime_context}
+- You know what LLM provider and model you are running on. NEVER ask the user which \
+LLM or API they want — you already know. When building tools or code that needs an LLM, \
+use YOUR OWN provider and SDK (the one from the runtime info above).
+
+PROBLEM-SOLVING RESILIENCE:
+- When something fails (HTTP 403, import error, timeout, blocked request, etc.), pause \
+before asking the user for help. Ask yourself: "Can I solve this differently without \
+user input?"
+- Try creative workarounds first: different HTTP headers or user-agents, a public API \
+instead of scraping, archive.org/Wayback Machine snapshots, alternate libraries, \
+different data sources for the same information, caching/retrying with backoff, etc.
+- Exhaust at least 2-3 genuinely different approaches before involving the user. Each \
+attempt should be a meaningfully different strategy — not just retrying the same thing.
+- Only ask the user for things that truly require them: credentials they haven't shared, \
+ambiguous requirements you can't infer, access to private/internal systems, or a choice \
+between equally valid options.
+- When you do ask for help, briefly explain what you already tried and why it didn't work \
+so the user has full context and doesn't suggest things you've already done.
+
+GENERAL RULES:
+- Be conversational, concise, and direct. No filler. No bullet-point dumps unless asked.
+- Respond naturally to greetings, small talk, and follow-up questions.
+- When describing yourself, focus on problem-solving and collaboration — not listing \
+features. Be brief: a few sentences, not an essay.
+- After completing work, always end with what the user might want next: follow-up \
+questions, related actions, or deeper dives. If the answer involved computation or \
+data work, offer to show how you got there ("want me to dump the scratchpad so you \
+can see the steps?"). If the result could be extended, suggest it ("I can also break \
+this down by category if that helps"). Always leave a door open — never dead-end.
+- Never show raw code, diffs, or tool output unprompted — summarize in plain language. \
+But always let the user know the detail is available if they want it.
+- When you discover important information, use the memorize tool to encode it. \
+Use "always"/"never"/"when" for behavioral rules. Use "lesson" for facts. \
+Use "profile" for things about the user. Choose "global" for universal knowledge, \
+"project" for workspace-specific knowledge. \
+Only encode genuinely reusable knowledge — not transient conversation details.
+"""
+
+# ---------------------------------------------------------------------------
+# Visualization prompt variants — selected by ANTON_PROACTIVE_DASHBOARDS flag
+# ---------------------------------------------------------------------------
+
+_VISUALIZATIONS_PROACTIVE = """\
+VISUALIZATIONS (charts, plots, maps, dashboards, reports):
+
+Insights-first workflow — ALWAYS follow this order for dashboards and multi-chart requests:
+1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \
+structured results (numbers, percentages, rankings) — not HTML yet.
+2. STREAM INSIGHTS IMMEDIATELY: Before building any visualization, narrate your findings \
+to the user in the chat. They should get value within seconds, not after waiting for HTML. \
+Structure insights as:
+  - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \
+(use markdown tables). This gives the user the raw data immediately — positions, values, \
+returns, key metrics — before you interpret them.
+  - HEADLINE: One sentence, the single most important finding. Lead with impact, not description.
+  - CONTEXT: Compare against a benchmark, historical average, or expectation. Raw numbers \
+without comparison are meaningless.
+  - THE NON-OBVIOUS: What would an expert analyst notice? Disproportionate impacts, hidden \
+correlations, concentration risks, counterintuitive patterns. Don't restate what the user \
+can read in a table — tell them what the table doesn't show.
+  - ASSUMPTIONS: Be explicit. What data source? What time range? Closing vs adjusted prices? \
+Timezone? Real-time or delayed? Don't hide these — state them clearly.
+  - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \
+thresholds that matter, scenarios worth considering.
+3. WRITE A DASHBOARD BRIEF: Before coding the HTML, plan the dashboard out loud:
+  - What story does each chart tell? (not "a bar chart of X" but "this shows how Y \
+is driving Z, annotated at the inflection point")
+  - What is the visual hierarchy? Hero KPIs at top, main narrative chart first, \
+supporting charts below.
+  - What should be annotated? Key dates, threshold crossings, outliers.
+  - What color scheme ties it together? Consistent meaning (green=positive, red=negative) \
+across all charts.
+4. BUILD THE DASHBOARD — use multiple scratchpad cells, but produce ONE single self-contained HTML file:
+
+  CRITICAL: The final dashboard MUST be a single .html file with ALL data, CSS, and JS inlined. \
+Do NOT reference external local files (like data.js) — browsers block local file:// cross-references \
+for security reasons and the dashboard will silently fail to load data.
+
+  SECURITY (critical): Dashboards may be published to the web. NEVER embed API keys, tokens, \
+passwords, connection strings, or any credentials in the HTML, JS, or inline data. Fetch data \
+in scratchpad cells using credentials from environment variables, then serialize only the \
+resulting data into the dashboard. If the user explicitly asks to embed a credential \
+(e.g. for a live-updating dashboard), warn them that publishing will expose it and get \
+confirmation before proceeding.
+
+  Build the parts in separate cells, then assemble at the end:
+
+  CELL 1 — Serialize data to a JS string variable (programmatic, no HTML):
+  Serialize all computed data (dataframes, metrics, KPIs) into a Python string. Build a \
+Python dict with keys like "kpis", "tables", "charts" — each containing the relevant data. \
+Convert DataFrames with df.to_dict(orient='records'). Use json.dumps(data, default=str) to \
+handle dates, Decimal, numpy types. Store as a Python variable: \
+`data_js = 'const D = ' + json_string + ';'` — do NOT write to a separate file.
+
+  CELL 2 — Build CSS + HTML structure as a Python string variable:
+  Write the HTML head (styles, CDN script tags) and body structure (header, KPIs, chart divs, \
+tabs, tables) as a Python string variable `html_body`. This cell builds the template.
+
+  CELL 3+ — Build JS chart rendering logic as Python string variables:
+  Write the JavaScript that initializes charts, populates tables, handles tabs, etc. \
+Split across multiple cells if needed to avoid token limits. Store as `js_charts` etc.
+
+  FINAL CELL — Assemble and write the HTML file:
+  Combine: `html = html_body.replace('</body>', f'<script>{{data_js}}{{js_charts}}</script></body>')` \
+or similar. Write to `.anton/output/name.html` and open in browser.
+
+  SELF-CONTAINED OUTPUT (critical):
+  Prefer inlining everything — CSS in `<style>`, JS in `<script>`, data as JS variables. \
+A single .html file is the most portable and publishable format. \
+If the dataset is very large (>100KB of JSON), you may write it to a separate .js file \
+in the SAME directory (e.g. `.anton/output/dashboard_data.js`) and reference it with a \
+relative `<script src="dashboard_data.js">` tag. The publisher will auto-bundle sibling \
+files referenced in the HTML. Never reference files outside the output directory.
+
+  WHY: (1) Browsers block local file:// cross-references across directories. \
+(2) Splitting the build across cells catches JS/CSS errors early — if a cell has a syntax issue \
+in a string, you'll see it before the final assembly. (3) Large datasets in single cells timeout. \
+(4) Self-contained files can be published to the web via /publish without missing assets.
+
+  PYTHON → JS STRING SAFETY (critical):
+  When building JS code inside Python strings, escape sequences get resolved by Python BEFORE \
+writing to the file. This means '\\n' in Python becomes a literal newline in the output, which \
+breaks JavaScript string literals. Rules:
+  - Use '\\\\n' in Python if you need a literal \\n in the JS output
+  - Use raw strings (r"...") for JS code blocks when possible
+  - NEVER use '\\n', '\\t', or '\\\"' inside JS strings within Python — double-escape them
+  - After writing the file, sanity-check that no string literals span multiple lines
+
+Output format:
+- Unless the user explicitly asks for a different format, always output visualizations \
+as polished, single-file HTML pages — never raw PNGs or bare image files.
+- Save output to `.anton/output/` (create it if needed). Use descriptive filenames like \
+`cpi_portfolio.html`, not `output.html`.
+- Do NOT auto-open the file in the browser from scratchpad code. Instead, after writing \
+the HTML file, call the `publish_or_preview` tool with the file path and a short title. \
+This tool will interactively ask the user if they want to preview locally, publish to the \
+web, or skip. Let the tool handle the browser opening and publishing flow.
+
+Visual design:
+- Make it look good by default. Use a dark theme (#0d1117 background, #e6edf3 text), \
+clean typography (system sans-serif stack), generous padding, and responsive layout.
+- ALWAYS use Apache ECharts for interactive charts. Load it via CDN: \
+`<script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>`. \
+No Python dependencies needed — just write the HTML with inline JS. Use ECharts' built-in \
+dark theme: `echarts.init(dom, 'dark')`, then customize colors to match #0d1117 background.
+- NEVER use Plotly, matplotlib, or other charting libraries unless the user explicitly asks.
+
+Line smoothing (critical — smooth: true misrepresents volatile data):
+- DEFAULT: `smooth: false` on ALL line series. Straight segments between data points are \
+the honest representation — they show actual volatility, drawdowns, and inflection points.
+- EXCEPTION: Use `smooth: true` ONLY for cumulative/monotonic series (cumulative returns, \
+running totals, growth curves) where the trend matters more than point-to-point moves.
+- Decision heuristic: Does the line ever reverse direction meaningfully? If yes → smooth: false. \
+Is it a running sum, cumulative metric, or long-horizon trend? → smooth: true is acceptable.
+- Line widths: 2.5 for hero/primary lines, 1.5 for multi-line comparisons, 1 for secondary/reference lines.
+
+Chart readability (critical — labels must NEVER overlap):
+- Use `axisLabel: {{ rotate: -45 }}` or `{{ rotate: 45 }}` on crowded axes. \
+Set `grid: {{ containLabel: true }}` so labels never clip. Use `legend: {{ type: 'scroll', \
+bottom: 0 }}` to place scrollable legends below the chart. For pie/donut charts use \
+`label: {{ show: true, position: 'outside' }}` with `labelLayout: {{ hideOverlap: true }}`. \
+For bar charts with many categories, use horizontal bars (`yAxis` as category) or \
+abbreviate labels with `axisLabel: {{ formatter }}`. Always configure rich `tooltip` with \
+`formatter` functions for precise value display on hover. Use `dataZoom` for time series \
+so users can zoom into ranges.
+
+Layout and composition:
+- For non-chart visualizations (tables, reports, dashboards), write clean HTML/CSS directly. \
+Use CSS grid or flexbox. Add subtle styling: rounded corners, soft shadows, hover effects.
+- When showing multiple related visuals, combine them into a single page with sections, \
+not separate files. Ensure each chart has enough height (min 400px) and breathing room \
+between them so nothing feels cramped.
+- Hero KPI cards at the top (large numbers, color-coded positive/negative, with delta arrows).
+- Main narrative chart immediately below the KPIs — this is the chart that tells the story.
+- Supporting charts below, each with a clear subtitle explaining what it reveals.
+- Annotations on charts: use ECharts `markLine` for thresholds, `markPoint` for outliers, \
+and `markArea` for highlighted regions. A chart without annotations is a missed opportunity.
+- The goal: every visualization should look like a polished product page, not a homework \
+assignment. Think dark-mode dashboard, not Jupyter default.\
+"""
+
+_VISUALIZATIONS_CLI_ONLY = """\
+VISUALIZATIONS AND ANALYSIS OUTPUT:
+
+Do NOT proactively create HTML dashboards, charts, or browser-based visualizations. \
+All analysis output should be formatted for the CLI terminal.
+
+Insights-first workflow — ALWAYS follow this order for analysis and reports:
+1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \
+structured results (numbers, percentages, rankings).
+2. STREAM INSIGHTS IMMEDIATELY: Narrate your findings to the user in the chat. They should \
+get value within seconds. Structure insights as:
+  - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \
+(use markdown tables). This gives the user the raw data immediately — positions, values, \
+returns, key metrics — before you interpret them.
+  - HEADLINE: One sentence, the single most important finding. Lead with impact, not description.
+  - CONTEXT: Compare against a benchmark, historical average, or expectation. Raw numbers \
+without comparison are meaningless.
+  - THE NON-OBVIOUS: What would an expert analyst notice? Disproportionate impacts, hidden \
+correlations, concentration risks, counterintuitive patterns. Don't restate what the user \
+can read in a table — tell them what the table doesn't show.
+  - ASSUMPTIONS: Be explicit. What data source? What time range? Closing vs adjusted prices? \
+Timezone? Real-time or delayed? Don't hide these — state them clearly.
+  - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \
+thresholds that matter, scenarios worth considering.
+
+CLI output format:
+- Present all results as well-formatted markdown: tables, bullet points, headers, and \
+inline numbers. The terminal is the primary display — make it look great there.
+- Use markdown tables for tabular data. Keep columns aligned and readable.
+- Use bold/headers for section structure. Use bullet points for lists.
+- For large datasets, summarize the top N and offer to show more.
+- When the user EXPLICITLY asks for a chart, dashboard, plot, or HTML visualization, \
+THEN build it as a self-contained HTML file with inlined CSS, JS, and data. \
+Save to .anton/output/. Do NOT auto-open the file from scratchpad code — instead call the \
+`publish_or_preview` tool with the file path and title after writing it. \
+Use Apache ECharts (CDN), dark theme (#0d1117), and follow standard dashboard best practices. \
+If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \
+Never split CSS or chart logic into separate files — only large data payloads.\
+"""
+
+
+def build_visualizations_prompt(proactive: bool = False) -> str:
+    """Return the visualization section for the system prompt."""
+    return _VISUALIZATIONS_PROACTIVE if proactive else _VISUALIZATIONS_CLI_ONLY
diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py
new file mode 100644
index 00000000..ceae40bf
--- /dev/null
+++ b/anton/core/llm/provider.py
@@ -0,0 +1,165 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ToolCall:
+    id: str
+    name: str
+    input: dict[str, Any]
+
+
+@dataclass
+class Usage:
+    input_tokens: int = 0
+    output_tokens: int = 0
+    context_pressure: float = 0.0
+
+
+@dataclass
+class LLMResponse:
+    content: str
+    tool_calls: list[ToolCall] = field(default_factory=list)
+    usage: Usage = field(default_factory=Usage)
+    stop_reason: str | None = None
+
+@dataclass
+class StreamTextDelta:
+    text: str
+
+
+@dataclass
+class StreamToolUseStart:
+    id: str
+    name: str
+
+
+@dataclass
+class StreamToolUseDelta:
+    id: str
+    json_delta: str
+
+
+@dataclass
+class StreamToolUseEnd:
+    id: str
+
+
+@dataclass
+class StreamComplete:
+    response: LLMResponse
+
+
+@dataclass
+class StreamTaskProgress:
+    """Progress event from agent task execution (planning, building, executing)."""
+    phase: str
+    message: str
+    eta_seconds: float | None = None
+
+
+@dataclass
+class StreamToolResult:
+    """Tool result that should be displayed to the user (e.g. scratchpad dump)."""
+    content: str
+
+
+@dataclass
+class StreamContextCompacted:
+    """Notification that context was compacted to free up space."""
+    message: str
+
+
+StreamEvent = (
+    StreamTextDelta
+    | StreamToolUseStart
+    | StreamToolUseDelta
+    | StreamToolUseEnd
+    | StreamComplete
+    | StreamTaskProgress
+    | StreamToolResult
+    | StreamContextCompacted
+)
+
+
+_CONTEXT_WINDOWS: list[tuple[str, int]] = [
+    # Anton defaults (exact model IDs first)
+    ("claude-sonnet-4-6", 200_000),
+    ("claude-haiku-4-5-20251001", 200_000),
+    # Claude families
+    ("claude-opus-4", 200_000),
+    ("claude-sonnet-4", 200_000),
+    ("claude-haiku-4", 200_000),
+    ("claude-3", 200_000),
+    ("claude-", 200_000),
+    # OpenAI families
+    ("gpt-5", 400_000),
+    ("gpt-4.1", 1_000_000),
+    ("gpt-4o", 128_000),
+    ("gpt-4", 128_000),
+    ("o3", 200_000),
+    ("o1", 200_000),
+]
+_DEFAULT_CONTEXT_WINDOW = 128_000
+
+
+def compute_context_pressure(model: str, input_tokens: int) -> float:
+    """Return input_tokens / context_window as a 0.0–1.0 float."""
+    window = _DEFAULT_CONTEXT_WINDOW
+    for prefix, size in _CONTEXT_WINDOWS:
+        if model.startswith(prefix):
+            window = size
+            break
+    return min(input_tokens / window, 1.0)
+
+
+class ContextOverflowError(Exception):
+    """Raised when the LLM rejects a request due to context length exceeded."""
+
+    def __init__(self, message: str, input_tokens: int = 0, limit: int = 0):
+        super().__init__(message)
+        self.input_tokens = input_tokens
+        self.limit = limit
+
+
+class TokenLimitExceeded(Exception):
+    """Raised when the LLM returns 429 due to billing/token limits."""
+
+
+class LLMProvider(ABC):
+    @abstractmethod
+    async def complete(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        tool_choice: dict | None = None,
+        max_tokens: int = 4096,
+    ) -> LLMResponse: ...
+
+    async def stream(
+        self,
+        *,
+        model: str,
+        system: str,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        max_tokens: int = 4096,
+    ) -> AsyncIterator[StreamEvent]:
+        """Stream LLM responses. Default falls back to complete()."""
+        response = await self.complete(
+            model=model,
+            system=system,
+            messages=messages,
+            tools=tools,
+            max_tokens=max_tokens,
+        )
+        if response.content:
+            yield StreamTextDelta(text=response.content)
+        yield StreamComplete(response=response)

From b14d8f0d8ac0796215adfb196e3bd3da6327d58b Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 12:24:22 -0700
Subject: [PATCH 027/134] introduced the prompt field for ToolDef

---
 anton/core/tools/tool_defs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py
index ce221e3b..fb3ba4c9 100644
--- a/anton/core/tools/tool_defs.py
+++ b/anton/core/tools/tool_defs.py
@@ -1,7 +1,7 @@
 from anton.core.tools.tool_handlers import handle_scratchpad, handle_memorize, handle_recall
 
 from dataclasses import dataclass
-from typing import Callable
+from typing import Callable, Optional
 
 
 @dataclass
@@ -10,6 +10,7 @@ class ToolDef:
     description: str
     input_schema: dict
     handler: Callable  # async (session, tc_input) -> str
+    prompt: Optional[str] = None  # Optional prompt relevant to the tool to be injected into the system prompt.
 
 
 SCRATCHPAD_TOOL = ToolDef(

From ddec2adc35f0ef210bc8691458ed9125aa841fd8 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 12:30:36 -0700
Subject: [PATCH 028/134] moved publish_or_preview tool prompt to ToolDef

---
 anton/tools.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/anton/tools.py b/anton/tools.py
index 94cb828d..04ea780a 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -202,4 +202,13 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str
         "required": ["file_path"],
     },
     handler = handle_publish_or_preview,
+    prompt = (
+        "CONTENT SHARING POLICY:\n"
+        "- Publishing dashboards or reports to the web is done ONLY via the `publish_or_preview` tool. \n"
+        "- Do NOT upload, post, or share generated files (HTML, data, images) to external hosting \n"
+        "- services (paste sites, gists, CDNs, file hosts) via scratchpad code — unless the user \n"
+        "- explicitly names the service and confirms. Reading from public APIs and writing to the \n"
+        "- user's connected datasources (databases, CRMs, etc.) is fine — this rule only applies to \n"
+        "- sharing generated output with the public internet."
+    ),
 )

From d404071c706eba00bf58aa7d94bbd5bc7cb02b83 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 12:30:43 -0700
Subject: [PATCH 029/134] updated imports in session

---
 anton/core/session.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index 13938158..57d5467e 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -4,8 +4,8 @@
 from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING
 
-from anton.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt
-from anton.llm.provider import (
+from anton.core.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt
+from anton.core.llm.provider import (
     ContextOverflowError,
     StreamComplete,
     StreamContextCompacted,
@@ -29,7 +29,7 @@
     from rich.console import Console
     from anton.context.self_awareness import SelfAwarenessContext
     from anton.chat_ui import EscapeWatcher
-    from anton.llm.client import LLMClient
+    from anton.core.llm.client import LLMClient
     from anton.memory.cortex import Cortex
     from anton.memory.episodes import EpisodicMemory
     from anton.memory.history_store import HistoryStore

From a995766a6c240c14eae2beee6049cb6daf9467d6 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 12:35:44 -0700
Subject: [PATCH 030/134] updated imports to point to core.llm

---
 anton/chat.py                | 4 ++--
 anton/chat_session.py        | 2 +-
 anton/cli.py                 | 4 ++--
 anton/core/llm/anthropic.py  | 4 ++--
 anton/core/llm/client.py     | 4 ++--
 anton/core/llm/openai.py     | 4 ++--
 anton/memory/consolidator.py | 2 +-
 anton/memory/cortex.py       | 2 +-
 anton/minds_client.py        | 2 +-
 anton/scratchpad_boot.py     | 4 ++--
 10 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 000aa3c1..39dcfc14 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -18,7 +18,7 @@
     save_clipboard_image,
 )
 from anton.core.session import ChatSession, TOKEN_STATUS_CACHE_TTL
-from anton.llm.provider import (
+from anton.core.llm.provider import (
     TokenLimitExceeded,
     StreamComplete,
     StreamContextCompacted,
@@ -911,7 +911,7 @@ async def _chat_loop(
     console: Console, settings: AntonSettings, *, resume: bool = False, first_run: bool = False, desktop_first_run: bool = False
 ) -> None:
     from anton.context.self_awareness import SelfAwarenessContext
-    from anton.llm.client import LLMClient
+    from anton.core.llm.client import LLMClient
     from anton.memory.cortex import Cortex
     from anton.workspace import Workspace
 
diff --git a/anton/chat_session.py b/anton/chat_session.py
index a042ab52..a10d7dad 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -65,7 +65,7 @@ def rebuild_session(
     session_id: str | None = None,
 ) -> "ChatSession":
     """Rebuild LLMClient + ChatSession after settings change."""
-    from anton.llm.client import LLMClient
+    from anton.core.llm.client import LLMClient
     from anton.chat import ChatSession
 
     state["llm_client"] = LLMClient.from_settings(settings)
diff --git a/anton/cli.py b/anton/cli.py
index 7a26df6a..3871a059 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -21,10 +21,10 @@
 from anton import __version__
 
 from anton.utils.prompt import prompt_or_cancel
-from anton.llm.openai import build_chat_completion_kwargs
+from anton.core.llm.openai import build_chat_completion_kwargs
 
 from anton.chat import ChatSession
-from anton.llm.client import LLMClient
+from anton.core.llm.client import LLMClient
 from anton.scratchpad import ScratchpadManager
 
 from anton.commands.datasource import (
diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
index c940b4e5..7d61f132 100644
--- a/anton/core/llm/anthropic.py
+++ b/anton/core/llm/anthropic.py
@@ -60,7 +60,7 @@ async def complete(
             if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
+                from .provider import TokenLimitExceeded
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
@@ -172,7 +172,7 @@ async def stream(
             if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
+                from .provider import TokenLimitExceeded
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py
index 8a773608..a96c8ae7 100644
--- a/anton/core/llm/client.py
+++ b/anton/core/llm/client.py
@@ -86,8 +86,8 @@ async def code(
 
     @classmethod
     def from_settings(cls, settings: AntonSettings) -> LLMClient:
-        from anton.llm.anthropic import AnthropicProvider
-        from anton.llm.openai import OpenAIProvider
+        from .anthropic import AnthropicProvider
+        from .openai import OpenAIProvider
 
         providers = {
             "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key),
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
index 26236faf..95440393 100644
--- a/anton/core/llm/openai.py
+++ b/anton/core/llm/openai.py
@@ -234,7 +234,7 @@ async def complete(
             if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
+                from .provider import TokenLimitExceeded
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
@@ -362,7 +362,7 @@ async def stream(
             if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
+                from .provider import TokenLimitExceeded
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
diff --git a/anton/memory/consolidator.py b/anton/memory/consolidator.py
index d5ccecda..621aa10e 100644
--- a/anton/memory/consolidator.py
+++ b/anton/memory/consolidator.py
@@ -28,7 +28,7 @@
 from anton.memory.hippocampus import Engram
 
 if TYPE_CHECKING:
-    from anton.llm.client import LLMClient
+    from anton.core.llm.client import LLMClient
     from anton.scratchpad import Cell
 
 
diff --git a/anton/memory/cortex.py b/anton/memory/cortex.py
index 4a3d185c..bb3344bd 100644
--- a/anton/memory/cortex.py
+++ b/anton/memory/cortex.py
@@ -26,7 +26,7 @@
 from anton.memory.hippocampus import Engram, Hippocampus
 
 if TYPE_CHECKING:
-    from anton.llm.client import LLMClient
+    from anton.core.llm.client import LLMClient
 
 
 _IDENTITY_EXTRACT_PROMPT = """\
diff --git a/anton/minds_client.py b/anton/minds_client.py
index 3e1a0046..a0df7372 100644
--- a/anton/minds_client.py
+++ b/anton/minds_client.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from anton.llm.openai import build_chat_completion_kwargs
+from anton.core.llm.openai import build_chat_completion_kwargs
 
 if TYPE_CHECKING:
     from anton.settings import AntonSettings
diff --git a/anton/scratchpad_boot.py b/anton/scratchpad_boot.py
index f3267548..7f205ea5 100644
--- a/anton/scratchpad_boot.py
+++ b/anton/scratchpad_boot.py
@@ -19,9 +19,9 @@
 
         _scratchpad_provider_name = os.environ.get("ANTON_SCRATCHPAD_PROVIDER", "anthropic")
         if _scratchpad_provider_name in ("openai", "openai-compatible"):
-            from anton.llm.openai import OpenAIProvider as _ProviderClass
+            from anton.core.llm.openai import OpenAIProvider as _ProviderClass
         else:
-            from anton.llm.anthropic import AnthropicProvider as _ProviderClass
+            from anton.core.llm.anthropic import AnthropicProvider as _ProviderClass
 
         _llm_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
         if _scratchpad_provider_name in ("openai", "openai-compatible"):

From 0888679165a9430622efddc50f340827e4246135 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 12:36:17 -0700
Subject: [PATCH 031/134] removed old llm components

---
 anton/llm/__init__.py  |   0
 anton/llm/anthropic.py | 196 -------------------
 anton/llm/client.py    | 112 -----------
 anton/llm/openai.py    | 394 --------------------------------------
 anton/llm/prompts.py   | 415 -----------------------------------------
 anton/llm/provider.py  | 165 ----------------
 6 files changed, 1282 deletions(-)
 delete mode 100644 anton/llm/__init__.py
 delete mode 100644 anton/llm/anthropic.py
 delete mode 100644 anton/llm/client.py
 delete mode 100644 anton/llm/openai.py
 delete mode 100644 anton/llm/prompts.py
 delete mode 100644 anton/llm/provider.py

diff --git a/anton/llm/__init__.py b/anton/llm/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/anton/llm/anthropic.py b/anton/llm/anthropic.py
deleted file mode 100644
index a88176d5..00000000
--- a/anton/llm/anthropic.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from __future__ import annotations
-
-import json
-from collections.abc import AsyncIterator
-
-import anthropic
-
-from anton.llm.provider import (
-    ContextOverflowError,
-    LLMProvider,
-    LLMResponse,
-    StreamComplete,
-    StreamEvent,
-    StreamTextDelta,
-    StreamToolUseDelta,
-    StreamToolUseEnd,
-    StreamToolUseStart,
-    ToolCall,
-    Usage,
-    compute_context_pressure,
-)
-
-
-class AnthropicProvider(LLMProvider):
-    def __init__(self, api_key: str | None = None) -> None:
-        kwargs = {}
-        if api_key:
-            kwargs["api_key"] = api_key
-        self._client = anthropic.AsyncAnthropic(**kwargs)
-
-    async def complete(
-        self,
-        *,
-        model: str,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        tool_choice: dict | None = None,
-        max_tokens: int = 4096,
-    ) -> LLMResponse:
-        kwargs: dict = {
-            "model": model,
-            "max_tokens": max_tokens,
-            "system": system,
-            "messages": messages,
-        }
-        if tools:
-            kwargs["tools"] = tools
-        if tool_choice:
-            kwargs["tool_choice"] = tool_choice
-
-        try:
-            response = await self._client.messages.create(**kwargs)
-        except anthropic.BadRequestError as exc:
-            msg = str(exc).lower()
-            if "prompt is too long" in msg or "context limit" in msg:
-                raise ContextOverflowError(str(exc)) from exc
-            raise
-        except anthropic.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
-                msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
-                raise TokenLimitExceeded(msg) from exc
-            else:
-                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
-            raise ConnectionError(msg) from exc
-        except anthropic.APIConnectionError as exc:
-            raise ConnectionError(
-                "Could not reach the LLM server — check your connection or try again in a moment."
-            ) from exc
-
-        content_text = ""
-        tool_calls: list[ToolCall] = []
-
-        for block in response.content:
-            if block.type == "text":
-                content_text += block.text
-            elif block.type == "tool_use":
-                tool_calls.append(
-                    ToolCall(id=block.id, name=block.name, input=block.input)
-                )
-
-        input_tokens = response.usage.input_tokens
-        return LLMResponse(
-            content=content_text,
-            tool_calls=tool_calls,
-            usage=Usage(
-                input_tokens=input_tokens,
-                output_tokens=response.usage.output_tokens,
-                context_pressure=compute_context_pressure(model, input_tokens),
-            ),
-            stop_reason=response.stop_reason,
-        )
-
-    async def stream(
-        self,
-        *,
-        model: str,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        max_tokens: int = 4096,
-    ) -> AsyncIterator[StreamEvent]:
-        kwargs: dict = {
-            "model": model,
-            "max_tokens": max_tokens,
-            "system": system,
-            "messages": messages,
-        }
-        if tools:
-            kwargs["tools"] = tools
-
-        content_text = ""
-        tool_calls: list[ToolCall] = []
-        input_tokens = 0
-        output_tokens = 0
-        stop_reason: str | None = None
-
-        # Track content blocks by index for tool correlation
-        blocks: dict[int, dict] = {}
-
-        try:
-            async with self._client.messages.stream(**kwargs) as stream:
-                async for event in stream:
-                    if event.type == "message_start":
-                        usage = event.message.usage
-                        input_tokens = usage.input_tokens
-                        output_tokens = getattr(usage, "output_tokens", 0)
-
-                    elif event.type == "content_block_start":
-                        idx = event.index
-                        block = event.content_block
-                        if block.type == "tool_use":
-                            blocks[idx] = {"type": "tool_use", "id": block.id, "name": block.name, "json_parts": []}
-                            yield StreamToolUseStart(id=block.id, name=block.name)
-                        else:
-                            blocks[idx] = {"type": "text"}
-
-                    elif event.type == "content_block_delta":
-                        idx = event.index
-                        delta = event.delta
-                        if delta.type == "text_delta":
-                            content_text += delta.text
-                            yield StreamTextDelta(text=delta.text)
-                        elif delta.type == "input_json_delta":
-                            info = blocks.get(idx, {})
-                            if info.get("type") == "tool_use":
-                                info["json_parts"].append(delta.partial_json)
-                                yield StreamToolUseDelta(id=info["id"], json_delta=delta.partial_json)
-
-                    elif event.type == "content_block_stop":
-                        idx = event.index
-                        info = blocks.get(idx, {})
-                        if info.get("type") == "tool_use":
-                            raw_json = "".join(info["json_parts"])
-                            parsed_input = json.loads(raw_json) if raw_json else {}
-                            tool_calls.append(
-                                ToolCall(id=info["id"], name=info["name"], input=parsed_input)
-                            )
-                            yield StreamToolUseEnd(id=info["id"])
-
-                    elif event.type == "message_delta":
-                        stop_reason = event.delta.stop_reason
-                        output_tokens = event.usage.output_tokens
-        except anthropic.BadRequestError as exc:
-            msg = str(exc).lower()
-            if "prompt is too long" in msg or "context limit" in msg:
-                raise ContextOverflowError(str(exc)) from exc
-            raise
-        except anthropic.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
-                msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
-                raise TokenLimitExceeded(msg) from exc
-            else:
-                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
-            raise ConnectionError(msg) from exc
-        except anthropic.APIConnectionError as exc:
-            raise ConnectionError(
-                "Could not reach the LLM server — check your connection or try again in a moment."
-            ) from exc
-
-        yield StreamComplete(
-            response=LLMResponse(
-                content=content_text,
-                tool_calls=tool_calls,
-                usage=Usage(
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    context_pressure=compute_context_pressure(model, input_tokens),
-                ),
-                stop_reason=stop_reason,
-            )
-        )
diff --git a/anton/llm/client.py b/anton/llm/client.py
deleted file mode 100644
index a58c2170..00000000
--- a/anton/llm/client.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from __future__ import annotations
-
-from collections.abc import AsyncIterator
-from typing import TYPE_CHECKING
-
-from anton.llm.provider import LLMProvider, LLMResponse, StreamEvent
-
-if TYPE_CHECKING:
-    from anton.config.settings import AntonSettings
-
-
-class LLMClient:
-    def __init__(
-        self,
-        *,
-        planning_provider: LLMProvider,
-        planning_model: str,
-        coding_provider: LLMProvider,
-        coding_model: str,
-        max_tokens: int = 8192,
-    ) -> None:
-        self._planning_provider = planning_provider
-        self._planning_model = planning_model
-        self._coding_provider = coding_provider
-        self._coding_model = coding_model
-        self._max_tokens = max_tokens
-
-    async def plan(
-        self,
-        *,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        max_tokens: int | None = None,
-    ) -> LLMResponse:
-        return await self._planning_provider.complete(
-            model=self._planning_model,
-            system=system,
-            messages=messages,
-            tools=tools,
-            max_tokens=max_tokens or self._max_tokens,
-        )
-
-    async def plan_stream(
-        self,
-        *,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        max_tokens: int | None = None,
-    ) -> AsyncIterator[StreamEvent]:
-        async for event in self._planning_provider.stream(
-            model=self._planning_model,
-            system=system,
-            messages=messages,
-            tools=tools,
-            max_tokens=max_tokens or self._max_tokens,
-        ):
-            yield event
-
-    @property
-    def coding_provider(self) -> LLMProvider:
-        """The LLM provider used for coding/skill execution."""
-        return self._coding_provider
-
-    @property
-    def coding_model(self) -> str:
-        """The model name used for coding/skill execution."""
-        return self._coding_model
-
-    async def code(
-        self,
-        *,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        max_tokens: int | None = None,
-    ) -> LLMResponse:
-        return await self._coding_provider.complete(
-            model=self._coding_model,
-            system=system,
-            messages=messages,
-            tools=tools,
-            max_tokens=max_tokens or self._max_tokens,
-        )
-
-    @classmethod
-    def from_settings(cls, settings: AntonSettings) -> LLMClient:
-        from anton.llm.anthropic import AnthropicProvider
-        from anton.llm.openai import OpenAIProvider
-
-        providers = {
-            "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key),
-            "openai": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify),
-            "openai-compatible": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify),
-        }
-
-        planning_factory = providers.get(settings.planning_provider)
-        coding_factory = providers.get(settings.coding_provider)
-
-        if planning_factory is None:
-            raise ValueError(f"Unknown planning provider: {settings.planning_provider}")
-        if coding_factory is None:
-            raise ValueError(f"Unknown coding provider: {settings.coding_provider}")
-
-        return cls(
-            planning_provider=planning_factory(),
-            planning_model=settings.planning_model,
-            coding_provider=coding_factory(),
-            coding_model=settings.coding_model,
-            max_tokens=getattr(settings, "max_tokens", 8192),
-        )
diff --git a/anton/llm/openai.py b/anton/llm/openai.py
deleted file mode 100644
index d57a709d..00000000
--- a/anton/llm/openai.py
+++ /dev/null
@@ -1,394 +0,0 @@
-from __future__ import annotations
-
-import json
-from collections.abc import AsyncIterator
-
-import openai
-
-from anton.llm.provider import (
-    ContextOverflowError,
-    LLMProvider,
-    LLMResponse,
-    StreamComplete,
-    StreamEvent,
-    StreamTextDelta,
-    StreamToolUseDelta,
-    StreamToolUseEnd,
-    StreamToolUseStart,
-    ToolCall,
-    Usage,
-    compute_context_pressure,
-)
-
-
-def _translate_tools(tools: list[dict]) -> list[dict]:
-    """Anthropic tool format -> OpenAI function-calling format."""
-    result = []
-    for tool in tools:
-        result.append({
-            "type": "function",
-            "function": {
-                "name": tool["name"],
-                "description": tool.get("description", ""),
-                "parameters": tool.get("input_schema", {}),
-            },
-        })
-    return result
-
-
-def _translate_tool_choice(tool_choice: dict) -> dict | str:
-    """Anthropic tool_choice -> OpenAI tool_choice."""
-    tc_type = tool_choice.get("type")
-    if tc_type == "tool":
-        return {"type": "function", "function": {"name": tool_choice["name"]}}
-    if tc_type == "any":
-        return "required"
-    if tc_type == "auto":
-        return "auto"
-    return "auto"
-
-
-def _translate_messages(system: str, messages: list[dict]) -> list[dict]:
-    """Convert Anthropic-style messages to OpenAI chat format.
-
-    Handles:
-    - system prompt -> {"role": "system", ...}
-    - plain text messages pass through
-    - assistant messages with tool_use content blocks -> tool_calls array
-    - user messages with tool_result content blocks -> role:tool messages
-    """
-    result: list[dict] = []
-    if system:
-        result.append({"role": "system", "content": system})
-
-    for msg in messages:
-        role = msg["role"]
-        content = msg.get("content")
-
-        # Plain string content — pass through
-        if isinstance(content, str):
-            result.append({"role": role, "content": content})
-            continue
-
-        # Content is a list of blocks (Anthropic format)
-        if isinstance(content, list):
-            if role == "assistant":
-                result.extend(_translate_assistant_blocks(content))
-            elif role == "user":
-                result.extend(_translate_user_blocks(content))
-            else:
-                # Fallback: join text blocks
-                text = " ".join(
-                    b.get("text", "") for b in content if b.get("type") == "text"
-                )
-                result.append({"role": role, "content": text or ""})
-            continue
-
-        # Fallback
-        result.append({"role": role, "content": str(content) if content else ""})
-
-    return result
-
-
-def _translate_assistant_blocks(blocks: list[dict]) -> list[dict]:
-    """Convert assistant content blocks to OpenAI message(s)."""
-    text_parts: list[str] = []
-    tool_calls: list[dict] = []
-
-    for block in blocks:
-        if block.get("type") == "text":
-            text_parts.append(block["text"])
-        elif block.get("type") == "tool_use":
-            tool_calls.append({
-                "id": block["id"],
-                "type": "function",
-                "function": {
-                    "name": block["name"],
-                    "arguments": json.dumps(block.get("input", {})),
-                },
-            })
-
-    msg: dict = {"role": "assistant"}
-    content = "\n".join(text_parts) if text_parts else None
-    msg["content"] = content
-    if tool_calls:
-        msg["tool_calls"] = tool_calls
-    return [msg]
-
-
-def _translate_user_blocks(blocks: list[dict]) -> list[dict]:
-    """Convert user content blocks (including tool_result and image) to OpenAI messages."""
-    result: list[dict] = []
-    content_parts: list[dict] = []  # Accumulates text + image_url blocks
-
-    for block in blocks:
-        if block.get("type") == "tool_result":
-            # Flush any accumulated content parts first
-            if content_parts:
-                result.append({"role": "user", "content": content_parts})
-                content_parts = []
-            # tool_result -> role:tool message
-            content = block.get("content", "")
-            if isinstance(content, list):
-                content = "\n".join(
-                    b.get("text", "") for b in content if b.get("type") == "text"
-                )
-            result.append({
-                "role": "tool",
-                "tool_call_id": block["tool_use_id"],
-                "content": str(content),
-            })
-        elif block.get("type") == "text":
-            content_parts.append({"type": "text", "text": block.get("text", "")})
-        elif block.get("type") == "image":
-            # Anthropic image block -> OpenAI image_url block
-            source = block.get("source", {})
-            if source.get("type") == "base64":
-                media_type = source.get("media_type", "image/png")
-                data = source.get("data", "")
-                content_parts.append({
-                    "type": "image_url",
-                    "image_url": {"url": f"data:{media_type};base64,{data}"},
-                })
-
-    if content_parts:
-        # If only text parts, flatten to a simple string for compatibility
-        if all(p.get("type") == "text" for p in content_parts):
-            result.append({
-                "role": "user",
-                "content": "\n".join(p["text"] for p in content_parts),
-            })
-        else:
-            result.append({"role": "user", "content": content_parts})
-
-    return result
-
-
-def build_chat_completion_kwargs(
-    *,
-    model: str,
-    messages: list[dict],
-    max_tokens: int,
-    stream: bool = False,
-) -> dict:
-    """Build chat.completions kwargs using modern OpenAI parameter names."""
-    kwargs: dict = {
-        "model": model,
-        "messages": messages,
-        "max_completion_tokens": max_tokens,
-    }
-    if stream:
-        kwargs["stream"] = True
-        kwargs["stream_options"] = {"include_usage": True}
-    return kwargs
-
-
-class OpenAIProvider(LLMProvider):
-    def __init__(
-        self,
-        api_key: str | None = None,
-        base_url: str | None = None,
-        ssl_verify: bool = True,
-    ) -> None:
-        import httpx
-
-        kwargs = {}
-        if api_key:
-            kwargs["api_key"] = api_key
-        if base_url:
-            kwargs["base_url"] = base_url
-        if not ssl_verify:
-            kwargs["http_client"] = httpx.AsyncClient(verify=False)
-        self._client = openai.AsyncOpenAI(**kwargs)
-
-    async def complete(
-        self,
-        *,
-        model: str,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        tool_choice: dict | None = None,
-        max_tokens: int = 4096,
-    ) -> LLMResponse:
-        oai_messages = _translate_messages(system, messages)
-
-        kwargs = build_chat_completion_kwargs(
-            model=model,
-            messages=oai_messages,
-            max_tokens=max_tokens,
-        )
-        if tools:
-            kwargs["tools"] = _translate_tools(tools)
-        if tool_choice:
-            kwargs["tool_choice"] = _translate_tool_choice(tool_choice)
-
-        try:
-            response = await self._client.chat.completions.create(**kwargs)
-        except openai.BadRequestError as exc:
-            msg = str(exc).lower()
-            if "context_length_exceeded" in msg or "maximum context length" in msg:
-                raise ContextOverflowError(str(exc)) from exc
-            raise
-        except openai.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
-                msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
-                raise TokenLimitExceeded(msg) from exc
-            else:
-                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
-            raise ConnectionError(msg) from exc
-        except openai.APIConnectionError as exc:
-            raise ConnectionError(
-                "Could not reach the LLM server — check your connection or try again in a moment."
-            ) from exc
-
-        choice = response.choices[0]
-        message = choice.message
-
-        content_text = message.content or ""
-        tool_calls: list[ToolCall] = []
-
-        if message.tool_calls:
-            for tc in message.tool_calls:
-                tool_calls.append(
-                    ToolCall(
-                        id=tc.id,
-                        name=tc.function.name,
-                        input=json.loads(tc.function.arguments) if tc.function.arguments else {},
-                    )
-                )
-
-        usage_obj = response.usage
-        input_tokens = usage_obj.prompt_tokens if usage_obj else 0
-        return LLMResponse(
-            content=content_text,
-            tool_calls=tool_calls,
-            usage=Usage(
-                input_tokens=input_tokens,
-                output_tokens=usage_obj.completion_tokens if usage_obj else 0,
-                context_pressure=compute_context_pressure(model, input_tokens),
-            ),
-            stop_reason=choice.finish_reason,
-        )
-
-    async def stream(
-        self,
-        *,
-        model: str,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        max_tokens: int = 4096,
-    ) -> AsyncIterator[StreamEvent]:
-        oai_messages = _translate_messages(system, messages)
-
-        kwargs = build_chat_completion_kwargs(
-            model=model,
-            messages=oai_messages,
-            max_tokens=max_tokens,
-            stream=True,
-        )
-        if tools:
-            kwargs["tools"] = _translate_tools(tools)
-
-        content_text = ""
-        tool_calls: list[ToolCall] = []
-        input_tokens = 0
-        output_tokens = 0
-        stop_reason: str | None = None
-
-        # Track tool call deltas by index
-        tc_state: dict[int, dict] = {}
-
-        try:
-            stream = await self._client.chat.completions.create(**kwargs)
-            async for chunk in stream:
-                if chunk.usage:
-                    input_tokens = chunk.usage.prompt_tokens
-                    output_tokens = chunk.usage.completion_tokens
-
-                if not chunk.choices:
-                    continue
-
-                delta = chunk.choices[0].delta
-                finish = chunk.choices[0].finish_reason
-
-                if finish:
-                    stop_reason = finish
-
-                # Text content
-                if delta.content:
-                    content_text += delta.content
-                    yield StreamTextDelta(text=delta.content)
-
-                # Tool call deltas
-                if delta.tool_calls:
-                    for tc_delta in delta.tool_calls:
-                        idx = tc_delta.index
-                        if idx not in tc_state:
-                            # New tool call
-                            tc_state[idx] = {
-                                "id": tc_delta.id or "",
-                                "name": tc_delta.function.name if tc_delta.function and tc_delta.function.name else "",
-                                "args_parts": [],
-                            }
-                            if tc_state[idx]["id"] and tc_state[idx]["name"]:
-                                yield StreamToolUseStart(
-                                    id=tc_state[idx]["id"],
-                                    name=tc_state[idx]["name"],
-                                )
-                        else:
-                            # Update id/name if provided in later chunks
-                            if tc_delta.id:
-                                tc_state[idx]["id"] = tc_delta.id
-                            if tc_delta.function and tc_delta.function.name:
-                                tc_state[idx]["name"] = tc_delta.function.name
-
-                        # Accumulate argument fragments
-                        if tc_delta.function and tc_delta.function.arguments:
-                            tc_state[idx]["args_parts"].append(tc_delta.function.arguments)
-                            yield StreamToolUseDelta(
-                                id=tc_state[idx]["id"],
-                                json_delta=tc_delta.function.arguments,
-                            )
-        except openai.BadRequestError as exc:
-            msg = str(exc).lower()
-            if "context_length_exceeded" in msg or "maximum context length" in msg:
-                raise ContextOverflowError(str(exc)) from exc
-            raise
-        except openai.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
-                msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
-                from anton.llm.provider import TokenLimitExceeded
-                raise TokenLimitExceeded(msg) from exc
-            else:
-                msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
-            raise ConnectionError(msg) from exc
-        except openai.APIConnectionError as exc:
-            raise ConnectionError(
-                "Could not reach the LLM server — check your connection or try again in a moment."
-            ) from exc
-
-        # Finalize tool calls
-        for idx in sorted(tc_state):
-            info = tc_state[idx]
-            raw_json = "".join(info["args_parts"])
-            parsed = json.loads(raw_json) if raw_json else {}
-            tool_calls.append(ToolCall(id=info["id"], name=info["name"], input=parsed))
-            yield StreamToolUseEnd(id=info["id"])
-
-        yield StreamComplete(
-            response=LLMResponse(
-                content=content_text,
-                tool_calls=tool_calls,
-                usage=Usage(
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    context_pressure=compute_context_pressure(model, input_tokens),
-                ),
-                stop_reason=stop_reason,
-            )
-        )
diff --git a/anton/llm/prompts.py b/anton/llm/prompts.py
deleted file mode 100644
index cce3ea9f..00000000
--- a/anton/llm/prompts.py
+++ /dev/null
@@ -1,415 +0,0 @@
-LEARNING_EXTRACT_PROMPT = """\
-Analyze this task execution and extract reusable learnings.
-For each learning, provide:
-- topic: short snake_case category name
-- content: the learning detail (1-3 sentences)
-- summary: one-line summary for indexing
-
-Return a JSON array. If no meaningful learnings, return [].
-
-Example output:
-[{"topic": "file_operations", "content": "Always check if a file exists before reading.", "summary": "Check file existence before reads"}]
-"""
-
-CHAT_SYSTEM_PROMPT = """\
-You are Anton — a self-evolving autonomous system that collaborates with people to \
-solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \
-computer, and you use that computer to get things done.
-
-Current date and time: {current_datetime}
-
-WHO YOU ARE:
-- You solve problems — not just write code. If someone needs emails classified, data \
-analyzed, a server monitored, or a workflow automated, you figure out how.
-- You learn and evolve. Every task teaches you something. You remember what worked, \
-what didn't, and get better over time. Your memory is local to this workspace.
-- You collaborate. You think alongside the user, ask smart questions, and work through \
-problems together — not just take orders.
-
-YOUR CAPABILITIES:
-- **Internet access**: You DO have access to the internet via the scratchpad. You can \
-fetch data from APIs, scrape websites, download files, and pull live data. Always use \
-the scratchpad for any internet access — requests, urllib, yfinance, etc.
-- **Scratchpad execution**: Give you a problem, you break it down and execute it \
-step by step — reading files, running commands, writing code, searching codebases. \
-The scratchpad is your primary execution engine — it has its own isolated environment \
-and can install packages on the fly.
-- **Persistent memory**: You have a brain-inspired memory system with rules (always/never/when), \
-lessons (facts), and identity (profile). Memories persist across sessions at both global \
-(~/.anton/memory/) and project (<workspace>/.anton/memory/) scopes.
-- **Self-awareness**: You can learn and persist facts about the project, the user's \
-preferences, and conventions via the memorize tool — so you don't start from \
-scratch every session.
-- **Episodic memory**: Searchable archive of past conversations. \
-Use the recall tool only when the user explicitly references a previous session \
-or conversation (e.g. "what did we discuss last time?"). For questions about \
-code, files, or data in the workspace, use the scratchpad instead.
-
-INTERNET & LIVE INFORMATION:
-- You have FULL internet access via the scratchpad. When the user asks about \
-current events, news, speeches, live data, or anything that requires up-to-date \
-information — USE THE SCRATCHPAD to fetch it. Do NOT say you can't access the \
-internet or live information.
-- For news and current events: use the scratchpad to fetch from news sites \
-(Reuters, AP News, CNN, BBC, etc.), search APIs, or scrape relevant pages. \
-Use requests + BeautifulSoup, or any other approach that works.
-- For financial data: use yfinance, requests to financial APIs, etc.
-- For any URL the user provides: fetch it directly with requests.
-- Think about WHICH sites are likely to have the information. You have vast \
-knowledge about what websites contain what kind of data — use that knowledge \
-to pick the right source, then fetch and parse it in the scratchpad.
-- If the first source doesn't work, try alternatives. Don't give up after one \
-attempt — try 2-3 different approaches before telling the user it's unavailable.
-
-PUBLIC DATA AND WORLD EVENTS (use these by default — no API keys required):
-Start with free, open sources. Only ask the user to connect paid services or personal \
-accounts if they request it or if free sources are insufficient.
-
-News & current events (via RSS — use feedparser):
-- Google News RSS: `https://news.google.com/rss/search?q={{query}}&hl={{lang}}&gl={{country}}` \
-— any topic, any country. Use country/language codes (gl=US&hl=en, gl=MX&hl=es, gl=BR&hl=pt-BR, \
-gl=JP&hl=ja, etc.). This is your primary news source.
-- Reuters: `https://www.rss.reuters.com/news/` (world, business, tech sections)
-- AP News: `https://rsshub.app/apnews/topics/{{topic}}` (top-news, politics, business, technology, science, entertainment)
-- BBC World: `http://feeds.bbci.co.uk/news/rss.xml` (also /world, /business, /technology)
-- NPR: `https://feeds.npr.org/1001/rss.xml` (news), `1006/rss.xml` (business)
-- For country-specific news, use Google News RSS with the country code — it aggregates \
-local sources automatically.
-- Parse feeds with `feedparser`: title, link, published date, summary. \
-Store as a list of dicts for dashboard integration.
-
-Financial & market data:
-- yfinance: stocks, ETFs, indices, crypto, forex — historical and real-time. \
-Use tickers like ^GSPC (S&P 500), ^DJI (Dow), ^IXIC (Nasdaq), BTC-USD, etc.
-- FRED (Federal Reserve): `https://fred.stlouisfed.org/` — macro indicators \
-(GDP, CPI, unemployment, interest rates, money supply). Use fredapi package \
-with free API key, or fetch CSV directly: \
-`https://fred.stlouisfed.org/graph/fredgraph.csv?id={{series_id}}` (no key needed for CSV).
-- CoinGecko: `https://api.coingecko.com/api/v3/` — crypto prices, market cap, \
-volume, trending coins. Free, no key.
-
-Economic & global data:
-- World Bank: `https://api.worldbank.org/v2/country/{{code}}/indicator/{{indicator}}?format=json` \
-— GDP, population, poverty, education, health by country. Free, no key.
-- OECD: `https://sdmx.oecd.org/public/rest/data/` — economic indicators for OECD countries.
-- Open Exchange Rates: `https://open.er-api.com/v6/latest/{{base}}` — free forex rates.
-
-Social & sentiment:
-- Reddit JSON: `https://www.reddit.com/r/{{subreddit}}/.json` — add .json to any \
-Reddit URL for structured data. Good for sentiment on specific topics.
-- HackerNews: `https://hacker-news.firebaseio.com/v0/` — tech news, top/new/best stories.
-
-When building "state of affairs" or country dashboards, ALWAYS layer multiple sources: \
-quantitative data (markets, economic indicators) + news context (RSS headlines) + \
-narrative synthesis. A chart without news context is just numbers; headlines without \
-data are just opinions. Combine them.
-
-PROACTIVE FOLLOW-UP SUGGESTIONS:
-After completing analysis on public datasets, think about whether the user's own data \
-could complement the analysis. If there's a natural personal data extension, offer it \
-in ONE sentence at the end of your response. Examples:
-- After stock/market analysis → "If you'd like, I can analyze your portfolio against \
-these benchmarks."
-- After economic/industry analysis → "I can also pull in your company's data to see \
-how you compare."
-- After email or communication analysis → "Want me to cross-reference this with your \
-calendar or contacts?"
-- After crypto analysis → "I can connect to your exchange if you want to see your \
-holdings in this context."
-Keep it brief, helpful, not pushy. Don't repeat the offer if the user ignores it. \
-Don't suggest personal data analysis if the user's question is purely informational \
-with no personal angle.
-
-CONTENT SHARING POLICY:
-- Publishing dashboards or reports to the web is done ONLY via the `publish_or_preview` tool. \
-Do NOT upload, post, or share generated files (HTML, data, images) to external hosting \
-services (paste sites, gists, CDNs, file hosts) via scratchpad code — unless the user \
-explicitly names the service and confirms. Reading from public APIs and writing to the \
-user's connected datasources (databases, CRMs, etc.) is fine — this rule only applies to \
-sharing generated output with the public internet.
-
-SCRATCHPAD:
-- Use the scratchpad for computation, data analysis, web scraping, plotting, file I/O, \
-shell commands, and anything that needs precise execution.
-- Each scratchpad has its own isolated environment — use the install action to add \
-libraries on the fly.
-- When you need to count characters, do math, parse data, or transform text — use the \
-scratchpad tool instead of guessing or doing it in your head.
-- Variables, imports, and data persist across cells — like a notebook you drive \
-programmatically. Use this for both quick one-off calculations and multi-step analysis.
-- get_llm() returns a pre-configured LLM client — use llm.complete(system=..., messages=[...]) \
-for AI-powered computation within scratchpad code. The call is synchronous.
-- llm.generate_object(MyModel, system=..., messages=[...]) extracts structured data into \
-Pydantic models. Define a class with BaseModel, and the LLM fills it. Supports list[Model] too.
-- agentic_loop(system=..., user_message=..., tools=[...], handle_tool=fn) runs an LLM \
-tool-call loop inside scratchpad code. The LLM reasons and calls your tools iteratively. \
-handle_tool(name, inputs) is a plain sync function returning a string result. Use this for \
-multi-step AI workflows like classification, extraction, or analysis with structured outputs.
-- All .anton/.env variables are available as environment variables (os.environ).
-- Connected data source credentials are injected as namespaced environment \
-variables in the form DS_<ENGINE_NAME>__<FIELD> \
-(e.g. DS_POSTGRES_PROD_DB__HOST, DS_POSTGRES_PROD_DB__PASSWORD, \
-DS_HUBSPOT_MAIN__ACCESS_TOKEN). Use those variables directly in scratchpad \
-code and never read ~/.anton/data_vault/ files directly.
-- Flat variables like DS_HOST or DS_PASSWORD are used only temporarily \
-during internal connection test snippets. Do not assume they exist during \
-normal chat/runtime execution.
-- When the user asks how you solved something or wants to see your work, use the scratchpad \
-dump action — it shows a clean notebook-style summary without wasting tokens on reformatting.
-- Always use print() to produce output — scratchpad captures stdout.
-- IMPORTANT: The scratchpad starts with a clean namespace — nothing is pre-imported. \
-Always include all necessary imports at the top of each cell that uses them. \
-Re-importing is a no-op in Python so there is zero cost, and it guarantees the cell \
-works even if earlier cells failed or state was lost.
-- IMPORTANT: Each cell has a hard timeout of 120 seconds. If exceeded, the process is \
-killed and ALL state (variables, imports, data) is lost. For every exec call, provide \
-one_line_description and estimated_execution_time_seconds (integer). If your estimate \
-exceeds 90 seconds, you MUST break the work into smaller cells. Prefer vectorized \
-operations, batch I/O, and focused cells that do one thing well.
-- Host Python packages are available by default. Use the scratchpad install action to \
-add more — installed packages persist across resets.
-
-FILE ATTACHMENTS:
-- Users can drag files or paste clipboard images. These appear as <file path="..."> tags.
-- For binary files (images, PDFs), use the scratchpad to read and process them.
-- Clipboard images are saved to .anton/uploads/ — open with Pillow, OpenCV, etc.
-
-{visualizations_section}
-
-CONVERSATION DISCIPLINE (critical):
-- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
-and then act in the same turn — that skips the user's answer.
-- Only act when you have ALL the information you need. If you're unsure \
-about anything, ask first, then act in a LATER turn after receiving the answer.
-- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
-it in context of what you just asked. Do not ask them to repeat themselves.
-- Gather requirements incrementally through conversation. Do not front-load every \
-possible question at once — ask 1-3 at a time, then follow up.
-
-RUNTIME IDENTITY:
-{runtime_context}
-- You know what LLM provider and model you are running on. NEVER ask the user which \
-LLM or API they want — you already know. When building tools or code that needs an LLM, \
-use YOUR OWN provider and SDK (the one from the runtime info above).
-
-PROBLEM-SOLVING RESILIENCE:
-- When something fails (HTTP 403, import error, timeout, blocked request, etc.), pause \
-before asking the user for help. Ask yourself: "Can I solve this differently without \
-user input?"
-- Try creative workarounds first: different HTTP headers or user-agents, a public API \
-instead of scraping, archive.org/Wayback Machine snapshots, alternate libraries, \
-different data sources for the same information, caching/retrying with backoff, etc.
-- Exhaust at least 2-3 genuinely different approaches before involving the user. Each \
-attempt should be a meaningfully different strategy — not just retrying the same thing.
-- Only ask the user for things that truly require them: credentials they haven't shared, \
-ambiguous requirements you can't infer, access to private/internal systems, or a choice \
-between equally valid options.
-- When you do ask for help, briefly explain what you already tried and why it didn't work \
-so the user has full context and doesn't suggest things you've already done.
-
-GENERAL RULES:
-- Be conversational, concise, and direct. No filler. No bullet-point dumps unless asked.
-- Respond naturally to greetings, small talk, and follow-up questions.
-- When describing yourself, focus on problem-solving and collaboration — not listing \
-features. Be brief: a few sentences, not an essay.
-- After completing work, always end with what the user might want next: follow-up \
-questions, related actions, or deeper dives. If the answer involved computation or \
-data work, offer to show how you got there ("want me to dump the scratchpad so you \
-can see the steps?"). If the result could be extended, suggest it ("I can also break \
-this down by category if that helps"). Always leave a door open — never dead-end.
-- Never show raw code, diffs, or tool output unprompted — summarize in plain language. \
-But always let the user know the detail is available if they want it.
-- When you discover important information, use the memorize tool to encode it. \
-Use "always"/"never"/"when" for behavioral rules. Use "lesson" for facts. \
-Use "profile" for things about the user. Choose "global" for universal knowledge, \
-"project" for workspace-specific knowledge. \
-Only encode genuinely reusable knowledge — not transient conversation details.
-"""
-
-# ---------------------------------------------------------------------------
-# Visualization prompt variants — selected by ANTON_PROACTIVE_DASHBOARDS flag
-# ---------------------------------------------------------------------------
-
-_VISUALIZATIONS_PROACTIVE = """\
-VISUALIZATIONS (charts, plots, maps, dashboards, reports):
-
-Insights-first workflow — ALWAYS follow this order for dashboards and multi-chart requests:
-1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \
-structured results (numbers, percentages, rankings) — not HTML yet.
-2. STREAM INSIGHTS IMMEDIATELY: Before building any visualization, narrate your findings \
-to the user in the chat. They should get value within seconds, not after waiting for HTML. \
-Structure insights as:
-  - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \
-(use markdown tables). This gives the user the raw data immediately — positions, values, \
-returns, key metrics — before you interpret them.
-  - HEADLINE: One sentence, the single most important finding. Lead with impact, not description.
-  - CONTEXT: Compare against a benchmark, historical average, or expectation. Raw numbers \
-without comparison are meaningless.
-  - THE NON-OBVIOUS: What would an expert analyst notice? Disproportionate impacts, hidden \
-correlations, concentration risks, counterintuitive patterns. Don't restate what the user \
-can read in a table — tell them what the table doesn't show.
-  - ASSUMPTIONS: Be explicit. What data source? What time range? Closing vs adjusted prices? \
-Timezone? Real-time or delayed? Don't hide these — state them clearly.
-  - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \
-thresholds that matter, scenarios worth considering.
-3. WRITE A DASHBOARD BRIEF: Before coding the HTML, plan the dashboard out loud:
-  - What story does each chart tell? (not "a bar chart of X" but "this shows how Y \
-is driving Z, annotated at the inflection point")
-  - What is the visual hierarchy? Hero KPIs at top, main narrative chart first, \
-supporting charts below.
-  - What should be annotated? Key dates, threshold crossings, outliers.
-  - What color scheme ties it together? Consistent meaning (green=positive, red=negative) \
-across all charts.
-4. BUILD THE DASHBOARD — use multiple scratchpad cells, but produce ONE single self-contained HTML file:
-
-  CRITICAL: The final dashboard MUST be a single .html file with ALL data, CSS, and JS inlined. \
-Do NOT reference external local files (like data.js) — browsers block local file:// cross-references \
-for security reasons and the dashboard will silently fail to load data.
-
-  SECURITY (critical): Dashboards may be published to the web. NEVER embed API keys, tokens, \
-passwords, connection strings, or any credentials in the HTML, JS, or inline data. Fetch data \
-in scratchpad cells using credentials from environment variables, then serialize only the \
-resulting data into the dashboard. If the user explicitly asks to embed a credential \
-(e.g. for a live-updating dashboard), warn them that publishing will expose it and get \
-confirmation before proceeding.
-
-  Build the parts in separate cells, then assemble at the end:
-
-  CELL 1 — Serialize data to a JS string variable (programmatic, no HTML):
-  Serialize all computed data (dataframes, metrics, KPIs) into a Python string. Build a \
-Python dict with keys like "kpis", "tables", "charts" — each containing the relevant data. \
-Convert DataFrames with df.to_dict(orient='records'). Use json.dumps(data, default=str) to \
-handle dates, Decimal, numpy types. Store as a Python variable: \
-`data_js = 'const D = ' + json_string + ';'` — do NOT write to a separate file.
-
-  CELL 2 — Build CSS + HTML structure as a Python string variable:
-  Write the HTML head (styles, CDN script tags) and body structure (header, KPIs, chart divs, \
-tabs, tables) as a Python string variable `html_body`. This cell builds the template.
-
-  CELL 3+ — Build JS chart rendering logic as Python string variables:
-  Write the JavaScript that initializes charts, populates tables, handles tabs, etc. \
-Split across multiple cells if needed to avoid token limits. Store as `js_charts` etc.
-
-  FINAL CELL — Assemble and write the HTML file:
-  Combine: `html = html_body.replace('</body>', f'<script>{{data_js}}{{js_charts}}</script></body>')` \
-or similar. Write to `.anton/output/name.html` and open in browser.
-
-  SELF-CONTAINED OUTPUT (critical):
-  Prefer inlining everything — CSS in `<style>`, JS in `<script>`, data as JS variables. \
-A single .html file is the most portable and publishable format. \
-If the dataset is very large (>100KB of JSON), you may write it to a separate .js file \
-in the SAME directory (e.g. `.anton/output/dashboard_data.js`) and reference it with a \
-relative `<script src="dashboard_data.js">` tag. The publisher will auto-bundle sibling \
-files referenced in the HTML. Never reference files outside the output directory.
-
-  WHY: (1) Browsers block local file:// cross-references across directories. \
-(2) Splitting the build across cells catches JS/CSS errors early — if a cell has a syntax issue \
-in a string, you'll see it before the final assembly. (3) Large datasets in single cells timeout. \
-(4) Self-contained files can be published to the web via /publish without missing assets.
-
-  PYTHON → JS STRING SAFETY (critical):
-  When building JS code inside Python strings, escape sequences get resolved by Python BEFORE \
-writing to the file. This means '\\n' in Python becomes a literal newline in the output, which \
-breaks JavaScript string literals. Rules:
-  - Use '\\\\n' in Python if you need a literal \\n in the JS output
-  - Use raw strings (r"...") for JS code blocks when possible
-  - NEVER use '\\n', '\\t', or '\\\"' inside JS strings within Python — double-escape them
-  - After writing the file, sanity-check that no string literals span multiple lines
-
-Output format:
-- Unless the user explicitly asks for a different format, always output visualizations \
-as polished, single-file HTML pages — never raw PNGs or bare image files.
-- Save output to `.anton/output/` (create it if needed). Use descriptive filenames like \
-`cpi_portfolio.html`, not `output.html`.
-- Do NOT auto-open the file in the browser from scratchpad code. Instead, after writing \
-the HTML file, call the `publish_or_preview` tool with the file path and a short title. \
-This tool will interactively ask the user if they want to preview locally, publish to the \
-web, or skip. Let the tool handle the browser opening and publishing flow.
-
-Visual design:
-- Make it look good by default. Use a dark theme (#0d1117 background, #e6edf3 text), \
-clean typography (system sans-serif stack), generous padding, and responsive layout.
-- ALWAYS use Apache ECharts for interactive charts. Load it via CDN: \
-`<script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>`. \
-No Python dependencies needed — just write the HTML with inline JS. Use ECharts' built-in \
-dark theme: `echarts.init(dom, 'dark')`, then customize colors to match #0d1117 background.
-- NEVER use Plotly, matplotlib, or other charting libraries unless the user explicitly asks.
-
-Line smoothing (critical — smooth: true misrepresents volatile data):
-- DEFAULT: `smooth: false` on ALL line series. Straight segments between data points are \
-the honest representation — they show actual volatility, drawdowns, and inflection points.
-- EXCEPTION: Use `smooth: true` ONLY for cumulative/monotonic series (cumulative returns, \
-running totals, growth curves) where the trend matters more than point-to-point moves.
-- Decision heuristic: Does the line ever reverse direction meaningfully? If yes → smooth: false. \
-Is it a running sum, cumulative metric, or long-horizon trend? → smooth: true is acceptable.
-- Line widths: 2.5 for hero/primary lines, 1.5 for multi-line comparisons, 1 for secondary/reference lines.
-
-Chart readability (critical — labels must NEVER overlap):
-- Use `axisLabel: {{ rotate: -45 }}` or `{{ rotate: 45 }}` on crowded axes. \
-Set `grid: {{ containLabel: true }}` so labels never clip. Use `legend: {{ type: 'scroll', \
-bottom: 0 }}` to place scrollable legends below the chart. For pie/donut charts use \
-`label: {{ show: true, position: 'outside' }}` with `labelLayout: {{ hideOverlap: true }}`. \
-For bar charts with many categories, use horizontal bars (`yAxis` as category) or \
-abbreviate labels with `axisLabel: {{ formatter }}`. Always configure rich `tooltip` with \
-`formatter` functions for precise value display on hover. Use `dataZoom` for time series \
-so users can zoom into ranges.
-
-Layout and composition:
-- For non-chart visualizations (tables, reports, dashboards), write clean HTML/CSS directly. \
-Use CSS grid or flexbox. Add subtle styling: rounded corners, soft shadows, hover effects.
-- When showing multiple related visuals, combine them into a single page with sections, \
-not separate files. Ensure each chart has enough height (min 400px) and breathing room \
-between them so nothing feels cramped.
-- Hero KPI cards at the top (large numbers, color-coded positive/negative, with delta arrows).
-- Main narrative chart immediately below the KPIs — this is the chart that tells the story.
-- Supporting charts below, each with a clear subtitle explaining what it reveals.
-- Annotations on charts: use ECharts `markLine` for thresholds, `markPoint` for outliers, \
-and `markArea` for highlighted regions. A chart without annotations is a missed opportunity.
-- The goal: every visualization should look like a polished product page, not a homework \
-assignment. Think dark-mode dashboard, not Jupyter default.\
-"""
-
-_VISUALIZATIONS_CLI_ONLY = """\
-VISUALIZATIONS AND ANALYSIS OUTPUT:
-
-Do NOT proactively create HTML dashboards, charts, or browser-based visualizations. \
-All analysis output should be formatted for the CLI terminal.
-
-Insights-first workflow — ALWAYS follow this order for analysis and reports:
-1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \
-structured results (numbers, percentages, rankings).
-2. STREAM INSIGHTS IMMEDIATELY: Narrate your findings to the user in the chat. They should \
-get value within seconds. Structure insights as:
-  - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \
-(use markdown tables). This gives the user the raw data immediately — positions, values, \
-returns, key metrics — before you interpret them.
-  - HEADLINE: One sentence, the single most important finding. Lead with impact, not description.
-  - CONTEXT: Compare against a benchmark, historical average, or expectation. Raw numbers \
-without comparison are meaningless.
-  - THE NON-OBVIOUS: What would an expert analyst notice? Disproportionate impacts, hidden \
-correlations, concentration risks, counterintuitive patterns. Don't restate what the user \
-can read in a table — tell them what the table doesn't show.
-  - ASSUMPTIONS: Be explicit. What data source? What time range? Closing vs adjusted prices? \
-Timezone? Real-time or delayed? Don't hide these — state them clearly.
-  - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \
-thresholds that matter, scenarios worth considering.
-
-CLI output format:
-- Present all results as well-formatted markdown: tables, bullet points, headers, and \
-inline numbers. The terminal is the primary display — make it look great there.
-- Use markdown tables for tabular data. Keep columns aligned and readable.
-- Use bold/headers for section structure. Use bullet points for lists.
-- For large datasets, summarize the top N and offer to show more.
-- When the user EXPLICITLY asks for a chart, dashboard, plot, or HTML visualization, \
-THEN build it as a self-contained HTML file with inlined CSS, JS, and data. \
-Save to .anton/output/. Do NOT auto-open the file from scratchpad code — instead call the \
-`publish_or_preview` tool with the file path and title after writing it. \
-Use Apache ECharts (CDN), dark theme (#0d1117), and follow standard dashboard best practices. \
-If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \
-Never split CSS or chart logic into separate files — only large data payloads.\
-"""
-
-
-def build_visualizations_prompt(proactive: bool = False) -> str:
-    """Return the visualization section for the system prompt."""
-    return _VISUALIZATIONS_PROACTIVE if proactive else _VISUALIZATIONS_CLI_ONLY
diff --git a/anton/llm/provider.py b/anton/llm/provider.py
deleted file mode 100644
index ceae40bf..00000000
--- a/anton/llm/provider.py
+++ /dev/null
@@ -1,165 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from collections.abc import AsyncIterator
-from dataclasses import dataclass, field
-from typing import Any
-
-
-@dataclass
-class ToolCall:
-    id: str
-    name: str
-    input: dict[str, Any]
-
-
-@dataclass
-class Usage:
-    input_tokens: int = 0
-    output_tokens: int = 0
-    context_pressure: float = 0.0
-
-
-@dataclass
-class LLMResponse:
-    content: str
-    tool_calls: list[ToolCall] = field(default_factory=list)
-    usage: Usage = field(default_factory=Usage)
-    stop_reason: str | None = None
-
-@dataclass
-class StreamTextDelta:
-    text: str
-
-
-@dataclass
-class StreamToolUseStart:
-    id: str
-    name: str
-
-
-@dataclass
-class StreamToolUseDelta:
-    id: str
-    json_delta: str
-
-
-@dataclass
-class StreamToolUseEnd:
-    id: str
-
-
-@dataclass
-class StreamComplete:
-    response: LLMResponse
-
-
-@dataclass
-class StreamTaskProgress:
-    """Progress event from agent task execution (planning, building, executing)."""
-    phase: str
-    message: str
-    eta_seconds: float | None = None
-
-
-@dataclass
-class StreamToolResult:
-    """Tool result that should be displayed to the user (e.g. scratchpad dump)."""
-    content: str
-
-
-@dataclass
-class StreamContextCompacted:
-    """Notification that context was compacted to free up space."""
-    message: str
-
-
-StreamEvent = (
-    StreamTextDelta
-    | StreamToolUseStart
-    | StreamToolUseDelta
-    | StreamToolUseEnd
-    | StreamComplete
-    | StreamTaskProgress
-    | StreamToolResult
-    | StreamContextCompacted
-)
-
-
-_CONTEXT_WINDOWS: list[tuple[str, int]] = [
-    # Anton defaults (exact model IDs first)
-    ("claude-sonnet-4-6", 200_000),
-    ("claude-haiku-4-5-20251001", 200_000),
-    # Claude families
-    ("claude-opus-4", 200_000),
-    ("claude-sonnet-4", 200_000),
-    ("claude-haiku-4", 200_000),
-    ("claude-3", 200_000),
-    ("claude-", 200_000),
-    # OpenAI families
-    ("gpt-5", 400_000),
-    ("gpt-4.1", 1_000_000),
-    ("gpt-4o", 128_000),
-    ("gpt-4", 128_000),
-    ("o3", 200_000),
-    ("o1", 200_000),
-]
-_DEFAULT_CONTEXT_WINDOW = 128_000
-
-
-def compute_context_pressure(model: str, input_tokens: int) -> float:
-    """Return input_tokens / context_window as a 0.0–1.0 float."""
-    window = _DEFAULT_CONTEXT_WINDOW
-    for prefix, size in _CONTEXT_WINDOWS:
-        if model.startswith(prefix):
-            window = size
-            break
-    return min(input_tokens / window, 1.0)
-
-
-class ContextOverflowError(Exception):
-    """Raised when the LLM rejects a request due to context length exceeded."""
-
-    def __init__(self, message: str, input_tokens: int = 0, limit: int = 0):
-        super().__init__(message)
-        self.input_tokens = input_tokens
-        self.limit = limit
-
-
-class TokenLimitExceeded(Exception):
-    """Raised when the LLM returns 429 due to billing/token limits."""
-
-
-class LLMProvider(ABC):
-    @abstractmethod
-    async def complete(
-        self,
-        *,
-        model: str,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        tool_choice: dict | None = None,
-        max_tokens: int = 4096,
-    ) -> LLMResponse: ...
-
-    async def stream(
-        self,
-        *,
-        model: str,
-        system: str,
-        messages: list[dict],
-        tools: list[dict] | None = None,
-        max_tokens: int = 4096,
-    ) -> AsyncIterator[StreamEvent]:
-        """Stream LLM responses. Default falls back to complete()."""
-        response = await self.complete(
-            model=model,
-            system=system,
-            messages=messages,
-            tools=tools,
-            max_tokens=max_tokens,
-        )
-        if response.content:
-            yield StreamTextDelta(text=response.content)
-        yield StreamComplete(response=response)

From d181240e523562b0a078bf9f76b215cf00342d5c Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 18:39:58 -0700
Subject: [PATCH 032/134] separated core prompts from terminal app prompts

---
 anton/core/llm/prompts.py | 96 ++++++++++-----------------------------
 anton/prompts.py          |  9 ++++
 2 files changed, 34 insertions(+), 71 deletions(-)
 create mode 100644 anton/prompts.py

diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
index cce3ea9f..98a54917 100644
--- a/anton/core/llm/prompts.py
+++ b/anton/core/llm/prompts.py
@@ -1,16 +1,7 @@
-LEARNING_EXTRACT_PROMPT = """\
-Analyze this task execution and extract reusable learnings.
-For each learning, provide:
-- topic: short snake_case category name
-- content: the learning detail (1-3 sentences)
-- summary: one-line summary for indexing
-
-Return a JSON array. If no meaningful learnings, return [].
-
-Example output:
-[{"topic": "file_operations", "content": "Always check if a file exists before reading.", "summary": "Check file existence before reads"}]
-"""
-
+# TODO: Update references to memory directories when new memory abstractions are implemented.
+# (Lines )
+# TODO: Update references to data vault directory? Will it be used this way across our environments?
+# (Lines )
 CHAT_SYSTEM_PROMPT = """\
 You are Anton — a self-evolving autonomous system that collaborates with people to \
 solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \
@@ -120,14 +111,6 @@
 Don't suggest personal data analysis if the user's question is purely informational \
 with no personal angle.
 
-CONTENT SHARING POLICY:
-- Publishing dashboards or reports to the web is done ONLY via the `publish_or_preview` tool. \
-Do NOT upload, post, or share generated files (HTML, data, images) to external hosting \
-services (paste sites, gists, CDNs, file hosts) via scratchpad code — unless the user \
-explicitly names the service and confirms. Reading from public APIs and writing to the \
-user's connected datasources (databases, CRMs, etc.) is fine — this rule only applies to \
-sharing generated output with the public internet.
-
 SCRATCHPAD:
 - Use the scratchpad for computation, data analysis, web scraping, plotting, file I/O, \
 shell commands, and anything that needs precise execution.
@@ -169,11 +152,6 @@
 - Host Python packages are available by default. Use the scratchpad install action to \
 add more — installed packages persist across resets.
 
-FILE ATTACHMENTS:
-- Users can drag files or paste clipboard images. These appear as <file path="..."> tags.
-- For binary files (images, PDFs), use the scratchpad to read and process them.
-- Clipboard images are saved to .anton/uploads/ — open with Pillow, OpenCV, etc.
-
 {visualizations_section}
 
 CONVERSATION DISCIPLINE (critical):
@@ -230,15 +208,14 @@
 # Visualization prompt variants — selected by ANTON_PROACTIVE_DASHBOARDS flag
 # ---------------------------------------------------------------------------
 
-_VISUALIZATIONS_PROACTIVE = """\
+BASE_VISUALIZATIONS_PROMPT = """\
 VISUALIZATIONS (charts, plots, maps, dashboards, reports):
 
-Insights-first workflow — ALWAYS follow this order for dashboards and multi-chart requests:
+Insights-first workflow — ALWAYS follow this order for analysis and reports:
 1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \
-structured results (numbers, percentages, rankings) — not HTML yet.
-2. STREAM INSIGHTS IMMEDIATELY: Before building any visualization, narrate your findings \
-to the user in the chat. They should get value within seconds, not after waiting for HTML. \
-Structure insights as:
+structured results (numbers, percentages, rankings).
+2. STREAM INSIGHTS IMMEDIATELY: Narrate your findings to the user in the chat. They should \
+get value within seconds. Structure insights as:
   - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \
 (use markdown tables). This gives the user the raw data immediately — positions, values, \
 returns, key metrics — before you interpret them.
@@ -252,7 +229,14 @@
 Timezone? Real-time or delayed? Don't hide these — state them clearly.
   - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \
 thresholds that matter, scenarios worth considering.
-3. WRITE A DASHBOARD BRIEF: Before coding the HTML, plan the dashboard out loud:
+
+Output format:
+{output_format}
+"""
+
+
+VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT = """\
+WRITE A DASHBOARD BRIEF: Before coding the HTML, plan the dashboard out loud:
   - What story does each chart tell? (not "a bar chart of X" but "this shows how Y \
 is driving Z, annotated at the inflection point")
   - What is the visual hierarchy? Hero KPIs at top, main narrative chart first, \
@@ -260,7 +244,8 @@
   - What should be annotated? Key dates, threshold crossings, outliers.
   - What color scheme ties it together? Consistent meaning (green=positive, red=negative) \
 across all charts.
-4. BUILD THE DASHBOARD — use multiple scratchpad cells, but produce ONE single self-contained HTML file:
+
+BUILD THE DASHBOARD — use multiple scratchpad cells, but produce ONE single self-contained HTML file:
 
   CRITICAL: The final dashboard MUST be a single .html file with ALL data, CSS, and JS inlined. \
 Do NOT reference external local files (like data.js) — browsers block local file:// cross-references \
@@ -292,13 +277,13 @@
 
   FINAL CELL — Assemble and write the HTML file:
   Combine: `html = html_body.replace('</body>', f'<script>{{data_js}}{{js_charts}}</script></body>')` \
-or similar. Write to `.anton/output/name.html` and open in browser.
+or similar.
 
   SELF-CONTAINED OUTPUT (critical):
   Prefer inlining everything — CSS in `<style>`, JS in `<script>`, data as JS variables. \
 A single .html file is the most portable and publishable format. \
 If the dataset is very large (>100KB of JSON), you may write it to a separate .js file \
-in the SAME directory (e.g. `.anton/output/dashboard_data.js`) and reference it with a \
+in the SAME directory and reference it with a \
 relative `<script src="dashboard_data.js">` tag. The publisher will auto-bundle sibling \
 files referenced in the HTML. Never reference files outside the output directory.
 
@@ -319,12 +304,7 @@
 Output format:
 - Unless the user explicitly asks for a different format, always output visualizations \
 as polished, single-file HTML pages — never raw PNGs or bare image files.
-- Save output to `.anton/output/` (create it if needed). Use descriptive filenames like \
-`cpi_portfolio.html`, not `output.html`.
-- Do NOT auto-open the file in the browser from scratchpad code. Instead, after writing \
-the HTML file, call the `publish_or_preview` tool with the file path and a short title. \
-This tool will interactively ask the user if they want to preview locally, publish to the \
-web, or skip. Let the tool handle the browser opening and publishing flow.
+Save output to `{output_path}` (create it if needed).
 
 Visual design:
 - Make it look good by default. Use a dark theme (#0d1117 background, #e6edf3 text), \
@@ -369,32 +349,12 @@
 assignment. Think dark-mode dashboard, not Jupyter default.\
 """
 
-_VISUALIZATIONS_CLI_ONLY = """\
-VISUALIZATIONS AND ANALYSIS OUTPUT:
 
+# TODO: Should we remove mentions of the terminal here?
+VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT = """\
 Do NOT proactively create HTML dashboards, charts, or browser-based visualizations. \
 All analysis output should be formatted for the CLI terminal.
 
-Insights-first workflow — ALWAYS follow this order for analysis and reports:
-1. FETCH DATA FIRST: Use one scratchpad call to pull data and compute key metrics. Return \
-structured results (numbers, percentages, rankings).
-2. STREAM INSIGHTS IMMEDIATELY: Narrate your findings to the user in the chat. They should \
-get value within seconds. Structure insights as:
-  - DATA HIGHLIGHTS: Start with a compact summary table showing the key numbers at a glance \
-(use markdown tables). This gives the user the raw data immediately — positions, values, \
-returns, key metrics — before you interpret them.
-  - HEADLINE: One sentence, the single most important finding. Lead with impact, not description.
-  - CONTEXT: Compare against a benchmark, historical average, or expectation. Raw numbers \
-without comparison are meaningless.
-  - THE NON-OBVIOUS: What would an expert analyst notice? Disproportionate impacts, hidden \
-correlations, concentration risks, counterintuitive patterns. Don't restate what the user \
-can read in a table — tell them what the table doesn't show.
-  - ASSUMPTIONS: Be explicit. What data source? What time range? Closing vs adjusted prices? \
-Timezone? Real-time or delayed? Don't hide these — state them clearly.
-  - ACTIONABLE EDGE: What could the user do with this information? Risks to watch, \
-thresholds that matter, scenarios worth considering.
-
-CLI output format:
 - Present all results as well-formatted markdown: tables, bullet points, headers, and \
 inline numbers. The terminal is the primary display — make it look great there.
 - Use markdown tables for tabular data. Keep columns aligned and readable.
@@ -402,14 +362,8 @@
 - For large datasets, summarize the top N and offer to show more.
 - When the user EXPLICITLY asks for a chart, dashboard, plot, or HTML visualization, \
 THEN build it as a self-contained HTML file with inlined CSS, JS, and data. \
-Save to .anton/output/. Do NOT auto-open the file from scratchpad code — instead call the \
-`publish_or_preview` tool with the file path and title after writing it. \
+Save to `{output_path}`.
 Use Apache ECharts (CDN), dark theme (#0d1117), and follow standard dashboard best practices. \
 If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \
 Never split CSS or chart logic into separate files — only large data payloads.\
 """
-
-
-def build_visualizations_prompt(proactive: bool = False) -> str:
-    """Return the visualization section for the system prompt."""
-    return _VISUALIZATIONS_PROACTIVE if proactive else _VISUALIZATIONS_CLI_ONLY
diff --git a/anton/prompts.py b/anton/prompts.py
new file mode 100644
index 00000000..1a036dde
--- /dev/null
+++ b/anton/prompts.py
@@ -0,0 +1,9 @@
+"""Extra prompts for the open source terminal agent."""
+
+FILE_ATTACHMENTS_PROMPT = """
+FILE ATTACHMENTS:
+- Users can drag files or paste clipboard images. These appear as <file path="..."> tags.
+- For binary files (images, PDFs), use the scratchpad to read and process them.
+- Clipboard images are saved to .anton/uploads/ — open with Pillow, OpenCV, etc.
+"""
+

From ff6606909b893f03054b4e409ba1970a5818447b Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 18:40:08 -0700
Subject: [PATCH 033/134] introduced output_dir config

---
 anton/config/settings.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/anton/config/settings.py b/anton/config/settings.py
index c12d9d29..1212f85f 100644
--- a/anton/config/settings.py
+++ b/anton/config/settings.py
@@ -40,6 +40,8 @@ class AntonSettings(BaseSettings):
 
     context_dir: str = ".anton/context"
 
+    output_dir: str = ".anton/output"
+
     memory_mode: str = "autopilot"  # autopilot | copilot | off
 
     episodic_memory: bool = True  # episodic memory archive — on by default
@@ -110,3 +112,5 @@ def resolve_workspace(self, folder: str | None = None) -> None:
             self.memory_dir = str(base / self.memory_dir)
         if not Path(self.context_dir).is_absolute():
             self.context_dir = str(base / self.context_dir)
+        if not Path(self.output_dir).is_absolute():
+            self.output_dir = str(base / self.output_dir)

From 264da922e0f0ee50730b1e8ee139fb742b523989 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 18:40:31 -0700
Subject: [PATCH 034/134] removed prompt from dump in registry

---
 anton/core/tools/registry.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 2579b5ff..ba26ca75 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -45,8 +45,9 @@ def dump(self) -> list[dict]:
         """
         tool_defs = []
         for tool_def in self._tools:
-            # Remove the handler from the tool definition.
+            # Remove the handler and prompt from the tool definition.
             tool_def = asdict(tool_def)
             tool_def.pop("handler")
+            tool_def.pop("prompt")
             tool_defs.append(tool_def)
         return tool_defs

From b16e17aefd3ee19683d39546f30f4906774a14fd Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 18:42:09 -0700
Subject: [PATCH 035/134] introduced prompt builder pipeline

---
 anton/core/llm/prompt_builder.py | 100 +++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 anton/core/llm/prompt_builder.py

diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
new file mode 100644
index 00000000..c3d6858a
--- /dev/null
+++ b/anton/core/llm/prompt_builder.py
@@ -0,0 +1,100 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from .prompts import (
+    BASE_VISUALIZATIONS_PROMPT, 
+    CHAT_SYSTEM_PROMPT, 
+    VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT,
+    VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT,
+)
+
+if TYPE_CHECKING:
+    from anton.core.tools.tool_defs import ToolDef
+
+
+class ChatSystemPromptBuilder:
+    """
+    Build Anton's chat system prompt from core components.
+    """
+
+    def _build_tool_prompts_section(self, tool_defs: list["ToolDef"] | None) -> str:
+        """Build an optional system-prompt section from `ToolDef.prompt`."""
+        if not tool_defs:
+            return ""
+
+        chunks: list[str] = []
+        for tool in tool_defs:
+            prompt = getattr(tool, "prompt", None)
+
+            if not prompt:
+                continue
+
+            prompt_text = str(prompt).strip()
+            if not prompt_text:
+                continue
+
+            chunks.append(prompt_text)
+
+        if not chunks:
+            return ""
+
+        return "\n\n".join(chunks)
+
+    def _build_visualizations_section(
+        self,
+        *,
+        proactive_dashboards: bool,
+        output_path: str,
+    ) -> str:
+        visualizations_output_format_prompt = (
+            VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT
+            if proactive_dashboards
+            else VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT
+        )
+        # The output-format prompt can reference `{output_path}`.
+        output_format = visualizations_output_format_prompt.format(output_path=output_path)
+        return BASE_VISUALIZATIONS_PROMPT.format(output_format=output_format)
+
+    def build(
+        self,
+        *,
+        settings,
+        current_datetime: str,
+        runtime_context: str,
+        proactive_dashboards: bool,
+        tool_defs: list["ToolDef"] | None = None,
+        memory_context: str = "",
+        project_context: str = "",
+        datasource_context: str = "",
+    ) -> str:
+        output_path = f"{Path(str(settings.output_dir)).as_posix().rstrip('/')}/"
+
+        visualizations_section = self._build_visualizations_section(
+            proactive_dashboards=proactive_dashboards,
+            output_path=output_path,
+        )
+
+        prompt = CHAT_SYSTEM_PROMPT.format(
+            runtime_context=runtime_context,
+            visualizations_section=visualizations_section,
+            current_datetime=current_datetime,
+        )
+
+        tool_prompts = self._build_tool_prompts_section(tool_defs)
+        if tool_prompts:
+            prompt += tool_prompts
+
+        if memory_context:
+            prompt += memory_context
+        if project_context:
+            prompt += project_context
+        if datasource_context:
+            prompt += datasource_context
+
+        return prompt
+
+
+__all__ = ["ChatSystemPromptBuilder"]
+

From 08bc98f6211d6a2b8188763445058c4853958029 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 18:55:54 -0700
Subject: [PATCH 036/134] incorporated prompt builder to session

---
 anton/core/llm/prompt_builder.py |  3 +++
 anton/core/session.py            | 39 +++++++++++++++++---------------
 anton/core/tools/registry.py     |  6 +++++
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index c3d6858a..563ac296 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -67,6 +67,7 @@ def build(
         tool_defs: list["ToolDef"] | None = None,
         memory_context: str = "",
         project_context: str = "",
+        self_awareness_context: str = "",
         datasource_context: str = "",
     ) -> str:
         output_path = f"{Path(str(settings.output_dir)).as_posix().rstrip('/')}/"
@@ -90,6 +91,8 @@ def build(
             prompt += memory_context
         if project_context:
             prompt += project_context
+        if self_awareness_context:
+            prompt += self_awareness_context
         if datasource_context:
             prompt += datasource_context
 
diff --git a/anton/core/session.py b/anton/core/session.py
index 57d5467e..5bf75184 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -4,7 +4,8 @@
 from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING
 
-from anton.core.llm.prompts import CHAT_SYSTEM_PROMPT, build_visualizations_prompt
+from anton.core.llm.prompts import CHAT_SYSTEM_PROMPT
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
 from anton.core.llm.provider import (
     ContextOverflowError,
     StreamComplete,
@@ -189,32 +190,34 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
         _now = _dt.datetime.now()
         _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p")
 
-        prompt = CHAT_SYSTEM_PROMPT.format(
-            runtime_context=self._runtime_context,
-            visualizations_section=build_visualizations_prompt(
-                self._proactive_dashboards
-            ),
-            current_datetime=_current_datetime,
-        )
         # Inject memory context (replaces old self_awareness)
         if self._cortex is not None:
             memory_section = await self._cortex.build_memory_context(user_message)
-            if memory_section:
-                prompt += memory_section
+
         elif self._self_awareness is not None:
             # Fallback for legacy usage (tests, etc.)
             sa_section = self._self_awareness.build_prompt_section()
-            if sa_section:
-                prompt += sa_section
+
         # Inject anton.md project context (user-written takes priority)
         if self._workspace is not None:
             md_context = self._workspace.build_anton_md_context()
-            if md_context:
-                prompt += md_context
+
         # Inject connected datasource context without credentials
         ds_ctx = build_datasource_context(active_only=self._active_datasource)
-        if ds_ctx:
-            prompt += ds_ctx
+
+        prompt_builder = ChatSystemPromptBuilder()
+        prompt = prompt_builder.build(
+            settings=self._settings,
+            current_datetime=_current_datetime,
+            runtime_context=self._runtime_context,
+            proactive_dashboards=self._proactive_dashboards,
+            tool_defs=self.tool_registry.get_tool_defs(),
+            memory_context=memory_section,
+            project_context=md_context,
+            self_awareness_context=sa_section,
+            datasource_context=ds_ctx,
+        )
+
         return prompt
 
     # Packages the LLM is most likely to care about when writing scratchpad code.
@@ -423,8 +426,8 @@ async def turn(self, user_input: str | list[dict]) -> str:
         self._history.append({"role": "user", "content": user_input})
 
         user_msg_str = user_input if isinstance(user_input, str) else ""
-        system = await self._build_system_prompt(user_msg_str)
         tools = self._build_tools()
+        system = await self._build_system_prompt(user_msg_str)
 
         try:
             response = await self._llm.plan(
@@ -646,8 +649,8 @@ async def _stream_and_handle_tools(
         self, user_message: str = ""
     ) -> AsyncIterator[StreamEvent]:
         """Stream one LLM call, handle tool loops, yield all events."""
-        system = await self._build_system_prompt(user_message)
         tools = self._build_tools()
+        system = await self._build_system_prompt(user_message)
 
         # Guard against summarizing an already-summarized history within the same
         # turn (e.g. ContextOverflowError on first call + pressure > threshold on
diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index ba26ca75..8e58c3a9 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -51,3 +51,9 @@ def dump(self) -> list[dict]:
             tool_def.pop("prompt")
             tool_defs.append(tool_def)
         return tool_defs
+
+    def get_tool_defs(self) -> list["ToolDef"]:
+        """
+        Get the tool definitions.
+        """
+        return self._tools

From 34f5dd1f77c7919ef63174c29311f1aec41d7289 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Wed, 8 Apr 2026 19:05:10 -0700
Subject: [PATCH 037/134] fixed prompt builder composition

---
 anton/chat_session.py            |  1 +
 anton/core/llm/prompt_builder.py |  4 ++--
 anton/core/session.py            | 13 ++++++++++---
 anton/core/tools/registry.py     |  4 ++++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/anton/chat_session.py b/anton/chat_session.py
index a10d7dad..f8127a67 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -98,4 +98,5 @@ def rebuild_session(
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        output_dir=settings.output_dir,
     )
diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index 563ac296..867e9e23 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -60,17 +60,17 @@ def _build_visualizations_section(
     def build(
         self,
         *,
-        settings,
         current_datetime: str,
         runtime_context: str,
         proactive_dashboards: bool,
+        output_dir: str,
         tool_defs: list["ToolDef"] | None = None,
         memory_context: str = "",
         project_context: str = "",
         self_awareness_context: str = "",
         datasource_context: str = "",
     ) -> str:
-        output_path = f"{Path(str(settings.output_dir)).as_posix().rstrip('/')}/"
+        output_path = f"{Path(str(output_dir)).as_posix().rstrip('/')}/"
 
         visualizations_section = self._build_visualizations_section(
             proactive_dashboards=proactive_dashboards,
diff --git a/anton/core/session.py b/anton/core/session.py
index 5bf75184..478c0aae 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -4,7 +4,6 @@
 from collections.abc import AsyncIterator
 from typing import TYPE_CHECKING
 
-from anton.core.llm.prompts import CHAT_SYSTEM_PROMPT
 from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
 from anton.core.llm.provider import (
     ContextOverflowError,
@@ -106,6 +105,7 @@ def __init__(
         history_store: HistoryStore | None = None,
         session_id: str | None = None,
         proactive_dashboards: bool = False,
+        output_dir: str = "",
         tools: list[ToolDef] | None = None,
     ) -> None:
         self._llm = llm_client
@@ -115,6 +115,7 @@ def __init__(
         self._runtime_context = runtime_context
         self._proactive_dashboards = proactive_dashboards
         self._extra_tools = tools or []
+        self._output_dir = output_dir
         self._workspace = workspace
         self._console = console
         self._history: list[dict] = list(initial_history) if initial_history else []
@@ -191,23 +192,29 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
         _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p")
 
         # Inject memory context (replaces old self_awareness)
+        memory_section = ""
         if self._cortex is not None:
             memory_section = await self._cortex.build_memory_context(user_message)
 
-        elif self._self_awareness is not None:
+        sa_section = ""
+        if self._self_awareness is not None and self._cortex is None:
             # Fallback for legacy usage (tests, etc.)
             sa_section = self._self_awareness.build_prompt_section()
 
         # Inject anton.md project context (user-written takes priority)
+        md_context = ""
         if self._workspace is not None:
             md_context = self._workspace.build_anton_md_context()
 
         # Inject connected datasource context without credentials
         ds_ctx = build_datasource_context(active_only=self._active_datasource)
 
+        # Ensure the registry is populated before we extract tool prompts.
+        self._build_tools()
+
         prompt_builder = ChatSystemPromptBuilder()
         prompt = prompt_builder.build(
-            settings=self._settings,
+            output_dir=self._output_dir,
             current_datetime=_current_datetime,
             runtime_context=self._runtime_context,
             proactive_dashboards=self._proactive_dashboards,
diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 8e58c3a9..5368e053 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -27,6 +27,10 @@ def register_tool(self, tool_def: ToolDef) -> None:
         """
         self._tools.append(tool_def)
 
+    def get_tool_defs(self) -> list["ToolDef"]:
+        """Return the registered `ToolDef` objects (for prompt injection, etc.)."""
+        return list(self._tools)
+
     async def dispatch_tool(
         self, session: "ChatSession", tool_name: str, tc_input: dict
     ) -> str:

From 679d53b7fccc98ae98ce6eebbf7f13413a666f23 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:10:02 +0200
Subject: [PATCH 038/134] Fix registry dump

---
 anton/core/tools/registry.py | 53 ++++++++++++++----------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/anton/core/tools/registry.py b/anton/core/tools/registry.py
index 5368e053..d8b1c605 100644
--- a/anton/core/tools/registry.py
+++ b/anton/core/tools/registry.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from dataclasses import asdict
+
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -11,53 +11,42 @@ class ToolRegistry:
     """
     Registry of tools available to the LLM.
     """
+
     def __init__(self) -> None:
-        # Register core tools.
-        self._tools = []
+        self._tools: list[ToolDef] = []
 
     def __bool__(self) -> bool:
-        """
-        Return True if there are any tools registered.
-        """
         return bool(self._tools)
 
-    def register_tool(self, tool_def: ToolDef) -> None:
-        """
-        Register a new (extra to core) tool.
-        """
+    def register_tool(self, tool_def: "ToolDef") -> None:
+        """Register a tool. Skips duplicates by name."""
+        if any(t.name == tool_def.name for t in self._tools):
+            return
         self._tools.append(tool_def)
 
     def get_tool_defs(self) -> list["ToolDef"]:
-        """Return the registered `ToolDef` objects (for prompt injection, etc.)."""
+        """Return registered ToolDef objects (for prompt injection, etc.)."""
         return list(self._tools)
 
     async def dispatch_tool(
         self, session: "ChatSession", tool_name: str, tc_input: dict
     ) -> str:
-        """
-        Dispatch a tool call by name. Returns result text.
-        """
-        tool_def = next((tool for tool in self._tools if tool.name == tool_name), None)
+        """Dispatch a tool call by name. Returns result text."""
+        tool_def = next((t for t in self._tools if t.name == tool_name), None)
         if tool_def is None:
             raise ValueError(f"Tool {tool_name} not found")
         return await tool_def.handler(session, tc_input)
 
     def dump(self) -> list[dict]:
         """
-        Dump the registry as a list of tool definitions.
-        This is used to build the tools list for the LLM. As a result, the handler is not needed.
-        """
-        tool_defs = []
-        for tool_def in self._tools:
-            # Remove the handler and prompt from the tool definition.
-            tool_def = asdict(tool_def)
-            tool_def.pop("handler")
-            tool_def.pop("prompt")
-            tool_defs.append(tool_def)
-        return tool_defs
-
-    def get_tool_defs(self) -> list["ToolDef"]:
-        """
-        Get the tool definitions.
-        """
-        return self._tools
+        Dump the registry as a list of LLM-facing tool schemas.
+        Excludes handler and prompt — those are internal only.
+        """
+        return [
+            {
+                "name": t.name,
+                "description": t.description,
+                "input_schema": t.input_schema,
+            }
+            for t in self._tools
+        ]

From ab4ac16303b13cd172c7c5250dcd50ee34297be9 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:16:23 +0200
Subject: [PATCH 039/134] Move core constants to settings

---
 anton/config/settings.py |  4 ++-
 anton/core/session.py    | 63 ++++++++++++++++++++--------------------
 2 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/anton/config/settings.py b/anton/config/settings.py
index 1212f85f..dfe987a6 100644
--- a/anton/config/settings.py
+++ b/anton/config/settings.py
@@ -5,6 +5,8 @@
 from pydantic import PrivateAttr, field_validator
 from pydantic_settings import BaseSettings
 
+from anton.core.settings import CoreSettings
+
 
 def _build_env_files() -> list[str]:
     """Build .env loading chain: cwd/.env -> .anton/.env -> ~/.anton/.env"""
@@ -21,7 +23,7 @@ def _build_env_files() -> list[str]:
 _ENV_FILES = _build_env_files()
 
 
-class AntonSettings(BaseSettings):
+class AntonSettings(CoreSettings):
     model_config = {"env_prefix": "ANTON_", "env_file": _ENV_FILES, "env_file_encoding": "utf-8", "extra": "ignore"}
 
     planning_provider: str = "anthropic"
diff --git a/anton/core/session.py b/anton/core/session.py
index 478c0aae..3de4c1fe 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -24,6 +24,22 @@
     build_datasource_context,
     scrub_credentials,
 )
+from anton.core.settings import CoreSettings as _CoreSettings
+
+_s = _CoreSettings()
+MAX_TOOL_ROUNDS = _s.max_tool_rounds
+MAX_CONTINUATIONS = _s.max_continuations
+CONTEXT_PRESSURE_THRESHOLD = _s.context_pressure_threshold
+MAX_CONSECUTIVE_ERRORS = _s.max_consecutive_errors
+RESILIENCE_NUDGE_AT = _s.resilience_nudge_at
+TOKEN_STATUS_CACHE_TTL = _s.token_status_cache_ttl
+
+RESILIENCE_NUDGE = (
+    "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or "
+    "asking the user for help, try a creative workaround — different headers/user-agent, "
+    "a public API, archive.org, an alternate library, or a completely different data source. "
+    "Only involve the user if the problem truly requires something only they can provide."
+)
 
 if TYPE_CHECKING:
     from rich.console import Console
@@ -36,23 +52,6 @@
     from anton.workspace import Workspace
 
 
-# TODO: Move to settings?
-_MAX_TOOL_ROUNDS = 25  # Hard limit on consecutive tool-call rounds per turn
-_MAX_CONTINUATIONS = 3  # Max times the verification loop can restart the tool loop
-_CONTEXT_PRESSURE_THRESHOLD = 0.7  # Trigger compaction when context is 70% full
-_MAX_CONSECUTIVE_ERRORS = 5  # Stop if the same tool fails this many times in a row
-_RESILIENCE_NUDGE_AT = 2  # Inject resilience nudge after this many consecutive errors
-_RESILIENCE_NUDGE = (
-    "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or "
-    "asking the user for help, try a creative workaround — different headers/user-agent, "
-    "a public API, archive.org, an alternate library, or a completely different data source. "
-    "Only involve the user if the problem truly requires something only they can provide."
-)
-
-# TODO: Is this enough for now?
-TOKEN_STATUS_CACHE_TTL = 60.0
-
-
 def _apply_error_tracking(
     result_text: str,
     tool_name: str,
@@ -71,13 +70,13 @@ def _apply_error_tracking(
         resilience_nudged.discard(tool_name)
 
     streak = error_streak.get(tool_name, 0)
-    if streak >= _RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged:
-        result_text += _RESILIENCE_NUDGE
+    if streak >= RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged:
+        result_text += RESILIENCE_NUDGE
         resilience_nudged.add(tool_name)
 
-    if streak >= _MAX_CONSECUTIVE_ERRORS:
+    if streak >= MAX_CONSECUTIVE_ERRORS:
         result_text += (
-            f"\n\nSYSTEM: The '{tool_name}' tool has failed {_MAX_CONSECUTIVE_ERRORS} times "
+            f"\n\nSYSTEM: The '{tool_name}' tool has failed {MAX_CONSECUTIVE_ERRORS} times "
             "in a row. Stop retrying this approach. Either try a completely different "
             "strategy or tell the user what's going wrong so they can help."
         )
@@ -452,7 +451,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
             )
 
         # Proactive compaction
-        if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD:
+        if response.usage.context_pressure > CONTEXT_PRESSURE_THRESHOLD:
             await self._summarize_history()
             self._compact_scratchpads()
 
@@ -463,7 +462,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
 
         while response.tool_calls:
             tool_round += 1
-            if tool_round > _MAX_TOOL_ROUNDS:
+            if tool_round > MAX_TOOL_ROUNDS:
                 self._history.append(
                     {"role": "assistant", "content": response.content or ""}
                 )
@@ -471,7 +470,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
                     {
                         "role": "user",
                         "content": (
-                            f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
+                            f"SYSTEM: You have used {MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
                             "Pause here. Summarize what you have accomplished so far and what remains. "
                             "If you believe you are on a good track and can finish the task with more steps, "
                             "tell the user and ask if they'd like you to continue. "
@@ -545,7 +544,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
                 )
 
             # Proactive compaction during tool loop
-            if response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD:
+            if response.usage.context_pressure > CONTEXT_PRESSURE_THRESHOLD:
                 await self._summarize_history()
                 self._compact_scratchpads()
 
@@ -746,7 +745,7 @@ async def _stream_and_handle_tools(
         # Proactive compaction
         if (
             not _compacted_this_turn
-            and llm_response.usage.context_pressure > _CONTEXT_PRESSURE_THRESHOLD
+            and llm_response.usage.context_pressure > CONTEXT_PRESSURE_THRESHOLD
         ):
             await self._summarize_history()
             self._compact_scratchpads()
@@ -768,7 +767,7 @@ async def _stream_and_handle_tools(
 
             while llm_response.tool_calls:
                 tool_round += 1
-                if tool_round > _MAX_TOOL_ROUNDS:
+                if tool_round > MAX_TOOL_ROUNDS:
                     _max_rounds_hit = True
                     self._history.append(
                         {"role": "assistant", "content": llm_response.content or ""}
@@ -777,7 +776,7 @@ async def _stream_and_handle_tools(
                         {
                             "role": "user",
                             "content": (
-                                f"SYSTEM: You have used {_MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
+                                f"SYSTEM: You have used {MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
                                 "Pause here. Summarize what you have accomplished so far and what remains. "
                                 "If you believe you are on a good track and can finish the task with more steps, "
                                 "tell the user and ask if they'd like you to continue. "
@@ -1022,7 +1021,7 @@ async def _stream_and_handle_tools(
                 if (
                     not _compacted_this_turn
                     and llm_response.usage.context_pressure
-                    > _CONTEXT_PRESSURE_THRESHOLD
+                    > CONTEXT_PRESSURE_THRESHOLD
                 ):
                     await self._summarize_history()
                     self._compact_scratchpads()
@@ -1041,7 +1040,7 @@ async def _stream_and_handle_tools(
             reply = llm_response.content or ""
             self._history.append({"role": "assistant", "content": reply})
 
-            if continuation >= _MAX_CONTINUATIONS:
+            if continuation >= MAX_CONTINUATIONS:
                 # Budget exhausted — ask LLM to diagnose and present to user
                 self._history.append(
                     {
@@ -1134,7 +1133,7 @@ async def _stream_and_handle_tools(
                     "role": "user",
                     "content": (
                         f"SYSTEM: Task verification determined this task is not yet complete "
-                        f"(attempt {continuation}/{_MAX_CONTINUATIONS}).\n"
+                        f"(attempt {continuation}/{MAX_CONTINUATIONS}).\n"
                         f"Verifier assessment: {reason}\n\n"
                         "Continue working on the original request. Pick up where you left off "
                         "and finish the remaining work. Do not repeat work already done."
@@ -1143,7 +1142,7 @@ async def _stream_and_handle_tools(
             )
             yield StreamTaskProgress(
                 phase="analyzing",
-                message=f"Task incomplete — continuing ({continuation}/{_MAX_CONTINUATIONS})...",
+                message=f"Task incomplete — continuing ({continuation}/{MAX_CONTINUATIONS})...",
             )
 
             # Re-enter tool loop: get next LLM response with tools available

From 6e0f8d77fe36bbae78acac75530c69684ca650e4 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:16:34 +0200
Subject: [PATCH 040/134] Add core settings

---
 anton/core/settings.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 anton/core/settings.py

diff --git a/anton/core/settings.py b/anton/core/settings.py
new file mode 100644
index 00000000..4cf1b9c6
--- /dev/null
+++ b/anton/core/settings.py
@@ -0,0 +1,13 @@
+from pydantic_settings import BaseSettings
+
+
+class CoreSettings(BaseSettings):
+    model_config = {"env_prefix": "ANTON_", "extra": "ignore"}
+
+    # Session orchestration tuning
+    max_tool_rounds: int = 25
+    max_continuations: int = 3
+    context_pressure_threshold: float = 0.7
+    max_consecutive_errors: int = 5
+    resilience_nudge_at: int = 2
+    token_status_cache_ttl: float = 60.0

From 50d16ec8c9bae5329a4b548e214c8937bfc62779 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:22:27 +0200
Subject: [PATCH 041/134] Initialize constants on creation

---
 anton/core/session.py | 115 ++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 59 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index 3de4c1fe..4f37a5c1 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING
 
 from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompts import RESILIENCE_NUDGE
 from anton.core.llm.provider import (
     ContextOverflowError,
     StreamComplete,
@@ -24,22 +25,8 @@
     build_datasource_context,
     scrub_credentials,
 )
-from anton.core.settings import CoreSettings as _CoreSettings
-
-_s = _CoreSettings()
-MAX_TOOL_ROUNDS = _s.max_tool_rounds
-MAX_CONTINUATIONS = _s.max_continuations
-CONTEXT_PRESSURE_THRESHOLD = _s.context_pressure_threshold
-MAX_CONSECUTIVE_ERRORS = _s.max_consecutive_errors
-RESILIENCE_NUDGE_AT = _s.resilience_nudge_at
-TOKEN_STATUS_CACHE_TTL = _s.token_status_cache_ttl
-
-RESILIENCE_NUDGE = (
-    "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or "
-    "asking the user for help, try a creative workaround — different headers/user-agent, "
-    "a public API, archive.org, an alternate library, or a completely different data source. "
-    "Only involve the user if the problem truly requires something only they can provide."
-)
+from anton.core.settings import CoreSettings
+
 
 if TYPE_CHECKING:
     from rich.console import Console
@@ -52,36 +39,6 @@
     from anton.workspace import Workspace
 
 
-def _apply_error_tracking(
-    result_text: str,
-    tool_name: str,
-    error_streak: dict[str, int],
-    resilience_nudged: set[str],
-) -> str:
-    """Track consecutive errors per tool and append nudge/circuit-breaker messages."""
-    is_error = any(
-        marker in result_text
-        for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:")
-    )
-    if is_error:
-        error_streak[tool_name] = error_streak.get(tool_name, 0) + 1
-    else:
-        error_streak[tool_name] = 0
-        resilience_nudged.discard(tool_name)
-
-    streak = error_streak.get(tool_name, 0)
-    if streak >= RESILIENCE_NUDGE_AT and tool_name not in resilience_nudged:
-        result_text += RESILIENCE_NUDGE
-        resilience_nudged.add(tool_name)
-
-    if streak >= MAX_CONSECUTIVE_ERRORS:
-        result_text += (
-            f"\n\nSYSTEM: The '{tool_name}' tool has failed {MAX_CONSECUTIVE_ERRORS} times "
-            "in a row. Stop retrying this approach. Either try a completely different "
-            "strategy or tell the user what's going wrong so they can help."
-        )
-
-    return result_text
 
 
 class ChatSession:
@@ -91,6 +48,7 @@ def __init__(
         self,
         llm_client: LLMClient,
         *,
+        settings: CoreSettings | None = None,
         self_awareness: SelfAwarenessContext | None = None,
         cortex: Cortex | None = None,
         episodic: EpisodicMemory | None = None,
@@ -107,6 +65,13 @@ def __init__(
         output_dir: str = "",
         tools: list[ToolDef] | None = None,
     ) -> None:
+        s = settings or CoreSettings()
+        self._max_tool_rounds = s.max_tool_rounds
+        self._max_continuations = s.max_continuations
+        self._context_pressure_threshold = s.context_pressure_threshold
+        self._max_consecutive_errors = s.max_consecutive_errors
+        self._resilience_nudge_at = s.resilience_nudge_at
+        self._token_status_cache_ttl = s.token_status_cache_ttl
         self._llm = llm_client
         self._self_awareness = self_awareness
         self._cortex = cortex
@@ -142,6 +107,38 @@ def __init__(
     def history(self) -> list[dict]:
         return self._history
 
+    def _apply_error_tracking(
+        self,
+        result_text: str,
+        tool_name: str,
+        error_streak: dict[str, int],
+        resilience_nudged: set[str],
+    ) -> str:
+        """Track consecutive errors per tool and append nudge/circuit-breaker messages."""
+        is_error = any(
+            marker in result_text
+            for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:")
+        )
+        if is_error:
+            error_streak[tool_name] = error_streak.get(tool_name, 0) + 1
+        else:
+            error_streak[tool_name] = 0
+            resilience_nudged.discard(tool_name)
+
+        streak = error_streak.get(tool_name, 0)
+        if streak >= self._resilience_nudge_at and tool_name not in resilience_nudged:
+            result_text += RESILIENCE_NUDGE
+            resilience_nudged.add(tool_name)
+
+        if streak >= self._max_consecutive_errors:
+            result_text += (
+                f"\n\nSYSTEM: The '{tool_name}' tool has failed {self._max_consecutive_errors} times "
+                "in a row. Stop retrying this approach. Either try a completely different "
+                "strategy or tell the user what's going wrong so they can help."
+            )
+
+        return result_text
+
     def repair_history(self) -> None:
         """Fix dangling tool_use blocks left by mid-stream cancellation.
 
@@ -451,7 +448,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
             )
 
         # Proactive compaction
-        if response.usage.context_pressure > CONTEXT_PRESSURE_THRESHOLD:
+        if response.usage.context_pressure > self._context_pressure_threshold:
             await self._summarize_history()
             self._compact_scratchpads()
 
@@ -462,7 +459,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
 
         while response.tool_calls:
             tool_round += 1
-            if tool_round > MAX_TOOL_ROUNDS:
+            if tool_round > self._max_tool_rounds:
                 self._history.append(
                     {"role": "assistant", "content": response.content or ""}
                 )
@@ -470,7 +467,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
                     {
                         "role": "user",
                         "content": (
-                            f"SYSTEM: You have used {MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
+                            f"SYSTEM: You have used {self._max_tool_rounds} tool-call rounds on this turn. "
                             "Pause here. Summarize what you have accomplished so far and what remains. "
                             "If you believe you are on a good track and can finish the task with more steps, "
                             "tell the user and ask if they'd like you to continue. "
@@ -510,7 +507,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
                     result_text = f"Tool '{tc.name}' failed: {exc}"
 
                 result_text = scrub_credentials(result_text)
-                result_text = _apply_error_tracking(
+                result_text = self._apply_error_tracking(
                     result_text,
                     tc.name,
                     error_streak,
@@ -544,7 +541,7 @@ async def turn(self, user_input: str | list[dict]) -> str:
                 )
 
             # Proactive compaction during tool loop
-            if response.usage.context_pressure > CONTEXT_PRESSURE_THRESHOLD:
+            if response.usage.context_pressure > self._context_pressure_threshold:
                 await self._summarize_history()
                 self._compact_scratchpads()
 
@@ -745,7 +742,7 @@ async def _stream_and_handle_tools(
         # Proactive compaction
         if (
             not _compacted_this_turn
-            and llm_response.usage.context_pressure > CONTEXT_PRESSURE_THRESHOLD
+            and llm_response.usage.context_pressure > self._context_pressure_threshold
         ):
             await self._summarize_history()
             self._compact_scratchpads()
@@ -767,7 +764,7 @@ async def _stream_and_handle_tools(
 
             while llm_response.tool_calls:
                 tool_round += 1
-                if tool_round > MAX_TOOL_ROUNDS:
+                if tool_round > self._max_tool_rounds:
                     _max_rounds_hit = True
                     self._history.append(
                         {"role": "assistant", "content": llm_response.content or ""}
@@ -776,7 +773,7 @@ async def _stream_and_handle_tools(
                         {
                             "role": "user",
                             "content": (
-                                f"SYSTEM: You have used {MAX_TOOL_ROUNDS} tool-call rounds on this turn. "
+                                f"SYSTEM: You have used {self._max_tool_rounds} tool-call rounds on this turn. "
                                 "Pause here. Summarize what you have accomplished so far and what remains. "
                                 "If you believe you are on a good track and can finish the task with more steps, "
                                 "tell the user and ask if they'd like you to continue. "
@@ -921,7 +918,7 @@ async def _stream_and_handle_tools(
                             tool=tc.name,
                         )
                     result_text = scrub_credentials(result_text)
-                    result_text = _apply_error_tracking(
+                    result_text = self._apply_error_tracking(
                         result_text, tc.name, error_streak, resilience_nudged
                     )
                     tool_results.append(
@@ -1021,7 +1018,7 @@ async def _stream_and_handle_tools(
                 if (
                     not _compacted_this_turn
                     and llm_response.usage.context_pressure
-                    > CONTEXT_PRESSURE_THRESHOLD
+                    > self._context_pressure_threshold
                 ):
                     await self._summarize_history()
                     self._compact_scratchpads()
@@ -1040,7 +1037,7 @@ async def _stream_and_handle_tools(
             reply = llm_response.content or ""
             self._history.append({"role": "assistant", "content": reply})
 
-            if continuation >= MAX_CONTINUATIONS:
+            if continuation >= self._max_continuations:
                 # Budget exhausted — ask LLM to diagnose and present to user
                 self._history.append(
                     {
@@ -1133,7 +1130,7 @@ async def _stream_and_handle_tools(
                     "role": "user",
                     "content": (
                         f"SYSTEM: Task verification determined this task is not yet complete "
-                        f"(attempt {continuation}/{MAX_CONTINUATIONS}).\n"
+                        f"(attempt {continuation}/{self._max_continuations}).\n"
                         f"Verifier assessment: {reason}\n\n"
                         "Continue working on the original request. Pick up where you left off "
                         "and finish the remaining work. Do not repeat work already done."
@@ -1142,7 +1139,7 @@ async def _stream_and_handle_tools(
             )
             yield StreamTaskProgress(
                 phase="analyzing",
-                message=f"Task incomplete — continuing ({continuation}/{MAX_CONTINUATIONS})...",
+                message=f"Task incomplete — continuing ({continuation}/{self._max_continuations})...",
             )
 
             # Re-enter tool loop: get next LLM response with tools available

From e8c5f18560ffdb4e043aa90559727e28e558ef9a Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:22:38 +0200
Subject: [PATCH 042/134] Move nudge from const to prompt

---
 anton/core/llm/prompts.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
index 98a54917..d9b65ef8 100644
--- a/anton/core/llm/prompts.py
+++ b/anton/core/llm/prompts.py
@@ -367,3 +367,11 @@
 If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \
 Never split CSS or chart logic into separate files — only large data payloads.\
 """
+
+
+RESILIENCE_NUDGE = (
+    "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or "
+    "asking the user for help, try a creative workaround — different headers/user-agent, "
+    "a public API, archive.org, an alternate library, or a completely different data source. "
+    "Only involve the user if the problem truly requires something only they can provide."
+)

From c98eca18b90a75a17df5eeab129a8e92affd7acd Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:55:28 +0200
Subject: [PATCH 043/134] Improt from settings

---
 anton/chat.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 39dcfc14..0536e4dc 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -17,7 +17,7 @@
     parse_dropped_paths as _parse_dropped_paths,
     save_clipboard_image,
 )
-from anton.core.session import ChatSession, TOKEN_STATUS_CACHE_TTL
+from anton.core.session import ChatSession
 from anton.core.llm.provider import (
     TokenLimitExceeded,
     StreamComplete,
@@ -912,7 +912,8 @@ async def _chat_loop(
 ) -> None:
     from anton.context.self_awareness import SelfAwarenessContext
     from anton.core.llm.client import LLMClient
-    from anton.memory.cortex import Cortex
+    from anton.core.memory.cortex import Cortex
+    from anton.core.memory.hippocampus import Hippocampus
     from anton.workspace import Workspace
 
     # Use a mutable container so closures always see the current client
@@ -940,8 +941,8 @@ async def _chat_loop(
     project_memory_dir = settings.workspace_path / ".anton" / "memory"
 
     cortex = Cortex(
-        global_dir=global_memory_dir,
-        project_dir=project_memory_dir,
+        global_hc=Hippocampus(global_memory_dir),
+        project_hc=Hippocampus(project_memory_dir),
         mode=settings.memory_mode,
         llm_client=state["llm_client"],
     )
@@ -1358,7 +1359,7 @@ def _bottom_toolbar():
                 if settings.minds_api_key and settings.minds_url:
                     #TODO: Lets check if this is best solution
                     now = time.monotonic()
-                    if last_token_status_checked_at is None or (now - last_token_status_checked_at) >= TOKEN_STATUS_CACHE_TTL:
+                    if last_token_status_checked_at is None or (now - last_token_status_checked_at) >= settings.token_status_cache_ttl:
                         last_token_status = check_minds_token_limits(
                             settings.minds_url.rstrip("/"),
                             settings.minds_api_key,

From 407d2bf4b6586b03f09e0c8ff0f9a2c1606af817 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:55:34 +0200
Subject: [PATCH 044/134] new memory

---
 anton/core/memory/__init__.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/anton/core/memory/__init__.py b/anton/core/memory/__init__.py
index e69de29b..ab60a07c 100644
--- a/anton/core/memory/__init__.py
+++ b/anton/core/memory/__init__.py
@@ -0,0 +1,15 @@
+from anton.core.memory.base import HippocampusProtocol
+from anton.core.memory.hippocampus import Engram, Hippocampus
+from anton.core.memory.episodes import Episode, EpisodicMemory
+from anton.core.memory.consolidator import Consolidator
+from anton.core.memory.cortex import Cortex
+
+__all__ = [
+    "HippocampusProtocol",
+    "Engram",
+    "Hippocampus",
+    "Episode",
+    "EpisodicMemory",
+    "Consolidator",
+    "Cortex",
+]

From 66b573add882753945764427a2360454d43adc54 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:56:07 +0200
Subject: [PATCH 045/134] Base interface

---
 anton/core/memory/base.py | 67 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 anton/core/memory/base.py

diff --git a/anton/core/memory/base.py b/anton/core/memory/base.py
new file mode 100644
index 00000000..26b71437
--- /dev/null
+++ b/anton/core/memory/base.py
@@ -0,0 +1,67 @@
+"""HippocampusProtocol — structural interface for memory backend swappability.
+
+The Protocol defines the public contract of a Hippocampus instance so that
+Enterprise adapters can provide alternate backends (e.g. database-backed,
+cloud-synced) without inheriting from the file-based implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class HippocampusProtocol(Protocol):
+    """Structural protocol for a single-scope memory store.
+
+    Implementors handle read/write at one scope (global or project).
+    The concrete ``Hippocampus`` class in ``core/memory/hippocampus.py``
+    satisfies this protocol automatically via structural sub-typing.
+    """
+
+    def recall_identity(self) -> str:
+        """Return the identity snapshot (profile.md equivalent)."""
+        ...
+
+    def recall_rules(self) -> str:
+        """Return behavioral gates (rules.md equivalent)."""
+        ...
+
+    def recall_lessons(self, token_budget: int = 1000) -> str:
+        """Return semantic facts within the given token budget."""
+        ...
+
+    def recall_topic(self, slug: str) -> str:
+        """Return deep domain expertise for a topic slug."""
+        ...
+
+    def recall_scratchpad_wisdom(self) -> str:
+        """Return procedural knowledge relevant to scratchpad execution."""
+        ...
+
+    def encode_rule(
+        self,
+        text: str,
+        kind: Literal["always", "never", "when"],
+        confidence: str = "medium",
+        source: str = "llm",
+    ) -> None:
+        """Write a behavioral gate to storage."""
+        ...
+
+    def encode_lesson(
+        self,
+        text: str,
+        topic: str = "",
+        source: str = "llm",
+    ) -> None:
+        """Write a semantic fact to storage."""
+        ...
+
+    def rewrite_identity(self, entries: list[str]) -> None:
+        """Replace the identity snapshot in full."""
+        ...
+
+    def entry_count(self) -> int:
+        """Count total entries across rules and lessons stores."""
+        ...

From 2bff14cd717ea1f400b1b75fb8dbf2a43c64d520 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:56:22 +0200
Subject: [PATCH 046/134] Move consolidator and cortex

---
 anton/core/memory/consolidator.py | 199 +++++++++++++
 anton/core/memory/cortex.py       | 466 ++++++++++++++++++++++++++++++
 2 files changed, 665 insertions(+)
 create mode 100644 anton/core/memory/consolidator.py
 create mode 100644 anton/core/memory/cortex.py

diff --git a/anton/core/memory/consolidator.py b/anton/core/memory/consolidator.py
new file mode 100644
index 00000000..8a0e8bb6
--- /dev/null
+++ b/anton/core/memory/consolidator.py
@@ -0,0 +1,199 @@
+"""Consolidator — Anton's sleep-like memory consolidation process.
+
+Named for hippocampal-cortical replay during Slow-Wave Sleep (SWS).
+
+During sleep, the hippocampus "replays" recent experiences to the neocortex
+in compressed, accelerated bursts (sharp-wave ripples). This offline process:
+  - Reviews what happened during waking hours
+  - Extracts statistical regularities and important lessons
+  - Transfers knowledge from episodic (hippocampal) to semantic (cortical) storage
+  - Is selective — emotionally tagged and goal-relevant experiences get priority
+
+The Consolidator mirrors this exactly: after a scratchpad session ends, it
+replays the cell history, asks "what would I tell myself to do differently?",
+and encodes the resulting lessons into long-term memory via the Cortex.
+
+Like sleep, consolidation is:
+  - Offline (runs after the task, not during)
+  - Compressed (summarizes cells, doesn't replay in full)
+  - Selective (only triggers when there were errors, long sessions, or cancellations)
+  - Background (doesn't block the user's next interaction)
+"""
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING
+
+from anton.core.memory.hippocampus import Engram
+
+if TYPE_CHECKING:
+    from anton.core.llm.client import LLMClient
+    from anton.scratchpad import Cell
+
+
+_CONSOLIDATION_PROMPT = """\
+You are a memory consolidation system for an AI coding assistant.
+
+Review this scratchpad session (sequence of code cells with their results) and
+extract durable, reusable lessons. Focus on:
+
+1. **Rules** — patterns to always/never follow:
+   - "Always call progress() before long API calls in scratchpad"
+   - "Never use time.sleep() in scratchpad cells"
+   - Conditional rules: "If fetching paginated data → use async + progress()"
+
+2. **Lessons** — factual knowledge discovered:
+   - API behaviors: "CoinGecko free tier rate-limits at ~50 req/min"
+   - Library quirks: "pandas read_csv needs encoding='utf-8-sig' for BOM files"
+   - Data facts: "Bitcoin price data via /coins/bitcoin/market_chart/range"
+
+Return a JSON array of objects:
+[
+  {
+    "text": "the memory to encode",
+    "kind": "always" | "never" | "when" | "lesson",
+    "scope": "global" | "project",
+    "topic": "optional-topic-slug",
+    "confidence": "high" | "medium"
+  }
+]
+
+Rules for scope:
+- "project": DEFAULT — use this for most memories. Anything related to the current
+  codebase, its APIs, file paths, libraries, patterns, conventions, or behaviors
+  observed during this session belongs here.
+- "global": RARE — only for truly universal knowledge that applies to any project
+  (e.g. general language quirks, stdlib gotchas). When in doubt, use "project".
+
+Rules for confidence:
+- "high": clearly correct, verified by the session results
+- "medium": probably correct but worth confirming
+
+If no meaningful lessons exist, return [].
+Do NOT extract trivial observations. Only encode genuinely reusable knowledge.
+"""
+
+
+class Consolidator:
+    """Extracts durable lessons from scratchpad sessions via offline replay.
+
+    Brain analog: hippocampal sharp-wave ripples during SWS that replay
+    compressed versions of waking experiences to the neocortex for
+    long-term storage.
+    """
+
+    def should_replay(self, cells: list[Cell]) -> bool:
+        """Heuristic gate — determines if this session warrants consolidation.
+
+        Like the amygdala tagging experiences for priority replay:
+        emotionally significant events (errors, long sessions, cancellations)
+        are preferentially consolidated. No LLM call needed.
+
+        Triggers when:
+          - Any cell had an error (negative emotional valence)
+          - Session was long (>=5 cells — rich experience to mine)
+          - Any cell was cancelled (interrupted action — what went wrong?)
+        """
+        if len(cells) < 2:
+            return False
+
+        # Long sessions are worth reviewing
+        if len(cells) >= 5:
+            return True
+
+        # Errors are high-signal learning opportunities
+        for cell in cells:
+            if cell.error:
+                return True
+
+        # Check for cancellation markers in stderr
+        for cell in cells:
+            if cell.stderr and ("cancelled" in cell.stderr.lower() or "killed" in cell.stderr.lower()):
+                return True
+
+        return False
+
+    async def replay_and_extract(self, cells: list[Cell], llm_client: LLMClient) -> list[Engram]:
+        """Replay the scratchpad session and extract lessons.
+
+        Like SWS replay: compresses the full session into a compact summary,
+        then runs a fast LLM pass asking:
+          "If you were to do this task again, what would you tell yourself?"
+
+        Returns structured Engram objects ready for encoding via the Cortex.
+        """
+        # Build compact cell summary
+        summary_lines: list[str] = []
+        for i, cell in enumerate(cells, 1):
+            desc = cell.description or "(no description)"
+            status = "error" if cell.error else "ok"
+            output_preview = ""
+            if cell.stdout:
+                first_line = cell.stdout.strip().split("\n")[0][:200]
+                output_preview = f" → {first_line}"
+            elif cell.error:
+                first_line = cell.error.strip().split("\n")[-1][:200]
+                output_preview = f" → ERROR: {first_line}"
+
+            summary_lines.append(f"Cell {i} [{status}]: {desc}{output_preview}")
+
+            # Include code snippet for error cells (helpful context)
+            if cell.error and cell.code:
+                code_preview = cell.code[:300]
+                if len(cell.code) > 300:
+                    code_preview += "..."
+                summary_lines.append(f"  Code: {code_preview}")
+
+        session_summary = "\n".join(summary_lines)
+
+        try:
+            response = await llm_client.code(
+                system=_CONSOLIDATION_PROMPT,
+                messages=[{"role": "user", "content": session_summary}],
+                max_tokens=2048,
+            )
+
+            raw = response.content.strip()
+            # Handle markdown code fences
+            if raw.startswith("```"):
+                raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:]
+                if raw.endswith("```"):
+                    raw = raw[:-3]
+                raw = raw.strip()
+
+            items = json.loads(raw)
+            if not isinstance(items, list):
+                return []
+
+        except Exception:
+            return []
+
+        engrams: list[Engram] = []
+        for item in items:
+            if not isinstance(item, dict) or "text" not in item:
+                continue
+
+            kind = item.get("kind", "lesson")
+            if kind not in ("always", "never", "when", "lesson"):
+                kind = "lesson"
+
+            scope = item.get("scope", "project")
+            if scope not in ("global", "project"):
+                scope = "project"
+
+            confidence = item.get("confidence", "medium")
+            if confidence not in ("high", "medium", "low"):
+                confidence = "medium"
+
+            engrams.append(Engram(
+                text=item["text"],
+                kind=kind,
+                scope=scope,
+                confidence=confidence,
+                topic=item.get("topic", ""),
+                source="consolidation",
+            ))
+
+        # Cap extraction to prevent memory bloat from single sessions
+        return engrams[:5]
diff --git a/anton/core/memory/cortex.py b/anton/core/memory/cortex.py
new file mode 100644
index 00000000..2ebefb23
--- /dev/null
+++ b/anton/core/memory/cortex.py
@@ -0,0 +1,466 @@
+"""Cortex — Anton's executive memory coordinator.
+
+Named for the Prefrontal Cortex (PFC), the brain's executive center that
+orchestrates memory retrieval by sending top-down signals to the hippocampus
+and other memory systems.
+
+The dorsolateral PFC handles strategic retrieval — selecting which memories
+to pull into working memory. The ventromedial PFC integrates across memory
+systems to provide coherent context. The Cortex class mirrors both:
+
+  - build_memory_context() → dlPFC: strategic retrieval for the system prompt
+  - get_scratchpad_context() → vmPFC: integrating relevant knowledge for tools
+  - encode() → executive decision to encode (directing the hippocampus)
+  - encoding_gate() → encoding gate modulated by the memory mode
+
+The Cortex coordinates two Hippocampus instances (global + project scope),
+like how the PFC coordinates retrieval from multiple brain memory systems.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from anton.core.memory.base import HippocampusProtocol
+from anton.core.memory.hippocampus import Engram, Hippocampus
+
+if TYPE_CHECKING:
+    from anton.core.llm.client import LLMClient
+
+
+_IDENTITY_EXTRACT_PROMPT = """\
+Extract identity facts from this user message. Return a JSON array of strings,
+each a concise fact about the user (name, timezone, expertise, preferences, tools).
+
+If no identity-relevant information is found, return [].
+
+Examples of identity facts:
+- "Name: Jorge"
+- "Timezone: PST"
+- "Prefers dark mode"
+- "Uses uv over pip"
+
+Only extract facts that are clearly about the user's identity, preferences,
+or working style. Ignore transient conversation details.
+"""
+
+_COMPACTION_PROMPT = """\
+You are a memory compaction system. Review these memory entries and:
+1. Remove exact duplicates
+2. Merge entries that say the same thing differently — keep the clearest version
+3. Remove entries that are superseded by newer, more specific entries
+4. Keep all unique, useful entries
+
+Return a JSON object with:
+- "kept": array of entry strings to keep — preserve the trailing `<!-- ... -->` metadata comment on each entry exactly as it appears
+- "merged": array of strings describing what was merged
+- "pruned": array of strings describing what was removed and why
+
+Be conservative — when in doubt, keep the entry.
+"""
+
+
+class Cortex:
+    """Executive coordinator for Anton's memory systems.
+
+    Manages two HippocampusProtocol instances (global + project scope), decides what
+    memories to load into working memory (the context window), and gates
+    encoding based on the current memory mode (the neuromodulatory setting).
+    """
+
+    def __init__(
+        self,
+        global_hc: HippocampusProtocol,
+        project_hc: HippocampusProtocol,
+        mode: str = "autopilot",
+        llm_client: LLMClient | None = None,
+    ) -> None:
+        """Initialize the executive with two hippocampal stores.
+
+        Args:
+            global_hc: Memory store for cross-project memories (global scope)
+            project_hc: Memory store for project-specific memories
+            mode: Memory mode — autopilot|copilot|off (encoding gate)
+            llm_client: For LLM-assisted operations (profile extraction, compaction)
+        """
+        self.global_hc = global_hc
+        self.project_hc = project_hc
+        self.mode = mode
+        self._llm = llm_client
+        self._turn_count = 0
+
+    # ~6000 chars ≈ ~1500 tokens — above this, use LLM to filter rules
+    _RULES_BUDGET_CHARS = 6000
+
+    _RULES_RETRIEVAL_PROMPT = """\
+Given the user's current message, select only the conditional (When/If) rules that are \
+relevant. Return the selected rules exactly as they appear, one per line (keep the "- " prefix).
+If all rules are relevant, return them all. If none are relevant, return "NONE".
+Do NOT add, modify, or summarize rules — return them verbatim.
+"""
+
+    async def build_memory_context(self, user_message: str = "") -> str:
+        """Assemble memories for the system prompt — the 'working memory' load.
+
+        Like the dlPFC performing strategic retrieval: selects what enters
+        the context window based on relevance and budget.
+
+        Args:
+            user_message: Current user message for cue-dependent retrieval.
+                When rules exceed the token budget, only relevant rules are loaded.
+        """
+        sections: list[str] = []
+
+        # 1. Identity (global only — identity is singular)
+        identity = self.global_hc.recall_identity()
+        if identity:
+            sections.append(f"## Your Memory — Identity\n{identity}")
+
+        # 2. Global rules (with smart retrieval)
+        global_rules = self.global_hc.recall_rules()
+        if global_rules:
+            global_rules = await self._retrieve_relevant_rules(global_rules, user_message)
+            if global_rules:
+                sections.append(f"## Your Memory — Global Rules\n{global_rules}")
+
+        # 3. Project rules (with smart retrieval)
+        project_rules = self.project_hc.recall_rules()
+        if project_rules:
+            project_rules = await self._retrieve_relevant_rules(project_rules, user_message)
+            if project_rules:
+                sections.append(f"## Your Memory — Project Rules\n{project_rules}")
+
+        # 4. Global lessons
+        global_lessons = self.global_hc.recall_lessons(token_budget=1000)
+        if global_lessons:
+            sections.append(f"## Your Memory — Global Lessons\n{global_lessons}")
+
+        # 5. Project lessons
+        project_lessons = self.project_hc.recall_lessons(token_budget=1000)
+        if project_lessons:
+            sections.append(f"## Your Memory — Project Lessons\n{project_lessons}")
+
+        # 6. Minds datasource context (auto-loaded if present)
+        minds_topic = self.project_hc.recall_topic("minds-datasource")
+        if minds_topic:
+            sections.append(f"## Minds — Datasource Context\n{minds_topic}")
+
+        if not sections:
+            return ""
+
+        return "\n\n" + "\n\n".join(sections)
+
+    async def _retrieve_relevant_rules(self, all_rules: str, user_message: str) -> str:
+        """Filter rules to only those relevant to the current user message.
+
+        Brain analog: dlPFC cue-dependent recall — the prefrontal cortex
+        selects which memories to activate based on current goals, rather
+        than loading everything into working memory.
+
+        Always/Never rules are behavioral constraints — always loaded in full.
+        Only conditional (When/If) rules are filtered by relevance.
+        If rules are under budget or no LLM is available, returns as-is.
+        """
+        if not user_message or self._llm is None:
+            return all_rules
+        if len(all_rules) <= self._RULES_BUDGET_CHARS:
+            return all_rules
+
+        # Split rules into mandatory (Always/Never) and filterable (When)
+        lines = all_rules.splitlines()
+        mandatory_lines: list[str] = []
+        when_lines: list[str] = []
+        current_section = ""
+
+        for line in lines:
+            stripped = line.strip()
+            if stripped.startswith("## Always"):
+                current_section = "always"
+                mandatory_lines.append(line)
+            elif stripped.startswith("## Never"):
+                current_section = "never"
+                mandatory_lines.append(line)
+            elif stripped.startswith("## When"):
+                current_section = "when"
+                mandatory_lines.append(line)  # keep the header
+            elif stripped.startswith("## ") or stripped.startswith("# "):
+                current_section = ""
+                mandatory_lines.append(line)
+            elif current_section == "when":
+                when_lines.append(line)
+            else:
+                mandatory_lines.append(line)
+
+        # If When section is small, no need to filter
+        when_text = "\n".join(when_lines).strip()
+        if not when_text or len(when_text) < 1000:
+            return all_rules
+
+        # Filter only the When rules
+        try:
+            response = await self._llm.code(
+                system=self._RULES_RETRIEVAL_PROMPT,
+                messages=[{
+                    "role": "user",
+                    "content": f"User message: {user_message}\n\nRules:\n{when_text}",
+                }],
+                max_tokens=4096,
+            )
+            result = response.content.strip()
+            if result == "NONE":
+                filtered_when = ""
+            elif result:
+                filtered_when = result
+            else:
+                filtered_when = when_text
+        except Exception:
+            filtered_when = when_text
+
+        # Reassemble: mandatory sections + filtered When rules
+        output = "\n".join(mandatory_lines)
+        if filtered_when:
+            output += "\n" + filtered_when
+        return output
+
+    def get_scratchpad_context(self) -> str:
+        """Retrieve procedural knowledge for scratchpad tool injection.
+
+        Like the vmPFC integrating memories for action planning — combines
+        global + project scratchpad wisdom into a coherent set of guidelines.
+        """
+        parts: list[str] = []
+
+        global_wisdom = self.global_hc.recall_scratchpad_wisdom()
+        if global_wisdom:
+            parts.append(global_wisdom)
+
+        project_wisdom = self.project_hc.recall_scratchpad_wisdom()
+        if project_wisdom:
+            parts.append(project_wisdom)
+
+        return "\n".join(parts)
+
+    async def encode(self, engrams: list[Engram]) -> list[str]:
+        """Direct the hippocampus to encode new memories.
+
+        Routes each engram to the appropriate hippocampal store based on scope.
+        Returns list of actions taken for logging.
+        """
+        if self.mode == "off":
+            return ["Memory encoding is disabled."]
+
+        actions: list[str] = []
+        for engram in engrams:
+            hc = self.global_hc if engram.scope == "global" else self.project_hc
+
+            if engram.kind == "profile":
+                # Profile entries accumulate, then rewrite
+                existing = hc.recall_identity()
+                entries = []
+                if existing:
+                    for line in existing.splitlines():
+                        stripped = line.strip()
+                        if stripped.startswith("- "):
+                            entries.append(stripped[2:])
+                        elif stripped and not stripped.startswith("#"):
+                            entries.append(stripped)
+                entries.append(engram.text)
+                hc.rewrite_identity(entries)
+                actions.append(f"Updated identity: {engram.text}")
+
+            elif engram.kind in ("always", "never", "when"):
+                hc.encode_rule(
+                    engram.text,
+                    kind=engram.kind,
+                    confidence=engram.confidence,
+                    source=engram.source,
+                )
+                actions.append(f"Encoded {engram.kind} rule: {engram.text}")
+
+            elif engram.kind == "lesson":
+                hc.encode_lesson(
+                    engram.text,
+                    topic=engram.topic,
+                    source=engram.source,
+                )
+                actions.append(f"Encoded lesson: {engram.text}")
+
+        return actions
+
+    def encoding_gate(self, engram: Engram) -> bool:
+        """Whether this engram needs user confirmation before encoding.
+
+        Brain analog: the Locus Coeruleus-NE system modulating encoding gain.
+        - autopilot (high NE): encode everything → never confirm
+        - copilot (moderate NE): auto-encode high-confidence, confirm ambiguous
+        - off (suppressed ACh): never encode (but also never writes)
+
+        Confirmations are always deferred until after the user has received
+        their answer — never shown during scratchpad execution or mid-turn.
+        """
+        if self.mode == "autopilot":
+            return False
+        if self.mode == "off":
+            return False  # Won't reach encoding anyway
+        # copilot: auto-encode high confidence user-sourced, confirm rest
+        return engram.confidence != "high"
+
+    # --- Compaction: Systems Consolidation + Synaptic Homeostasis ---
+
+    _COMPACTION_THRESHOLD = 20  # entries before compaction triggers
+    _VACUUM_INTERVAL = 10  # check compaction every N turns
+
+    def needs_compaction(self) -> bool:
+        """Check if memory files have grown beyond the compaction threshold.
+
+        Brain analog: synaptic saturation — during waking hours, synapses
+        strengthen indiscriminately. When the load exceeds a threshold,
+        consolidation/pruning is triggered.
+        """
+        return (
+            self.global_hc.entry_count() > self._COMPACTION_THRESHOLD
+            or self.project_hc.entry_count() > self._COMPACTION_THRESHOLD
+        )
+
+    async def compact_all(self) -> None:
+        """Run systems consolidation on all memory files.
+
+        Brain analog: the Synaptic Homeostasis Hypothesis (Tononi-Cirelli).
+        Uses the coding model for fast, cheap deduplication.
+        """
+        if self._llm is None:
+            return
+
+        for hc in (self.global_hc, self.project_hc):
+            if not isinstance(hc, Hippocampus):
+                continue  # compaction is file-specific; non-file backends skip
+            if hc.entry_count() > self._COMPACTION_THRESHOLD:
+                await self._compact_file(hc, hc._lessons_path, "lesson")
+                await self._compact_file(hc, hc._rules_path, "rules")
+
+    async def vacuum(self) -> None:
+        """Run compaction unconditionally on all memory files.
+
+        Public entry point for on-demand cleanup (e.g. after /connect).
+        Unlike compact_all(), skips the threshold check — always runs.
+        """
+        if self._llm is None:
+            return
+        for hc in (self.global_hc, self.project_hc):
+            if not isinstance(hc, Hippocampus):
+                continue  # compaction is file-specific; non-file backends skip
+            await self._compact_file(hc, hc._lessons_path, "lesson")
+            await self._compact_file(hc, hc._rules_path, "rules")
+
+    def maybe_vacuum(self) -> None:
+        """Periodic vacuum check — call after each assistant turn.
+
+        Every _VACUUM_INTERVAL turns, checks if compaction is needed and
+        fires it in the background if so.
+        """
+        import asyncio
+
+        self._turn_count += 1
+        if self._turn_count % self._VACUUM_INTERVAL != 0:
+            return
+        if not self.needs_compaction():
+            return
+        asyncio.create_task(self.compact_all())
+
+    async def _compact_file(self, hc: Hippocampus, path: Path, kind: str) -> None:
+        """Compact a single memory file using LLM-assisted deduplication."""
+        if not path.is_file():
+            return
+
+        content = path.read_text(encoding="utf-8")
+        entries = [ln.strip() for ln in content.splitlines() if ln.strip().startswith("- ")]
+
+        if len(entries) < 8:
+            return
+
+        try:
+            response = await self._llm.code(
+                system=_COMPACTION_PROMPT,
+                messages=[{"role": "user", "content": "\n".join(entries)}],
+                max_tokens=4096,
+            )
+            result = json.loads(response.content)
+            kept = result.get("kept", entries)
+        except Exception:
+            return  # Don't corrupt memory on failure
+
+        if not kept:
+            return
+
+        # Rebuild the file
+        if kind == "rules":
+            # Preserve section structure
+            always = [e for e in kept if "always" in e.lower() or not any(
+                k in e.lower() for k in ("never", "when", "if ")
+            )]
+            never = [e for e in kept if "never" in e.lower()]
+            when_rules = [e for e in kept if "when" in e.lower() or "if " in e.lower()]
+
+            lines = ["# Rules\n", "## Always"]
+            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in always)
+            lines.extend(["", "## Never"])
+            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in never)
+            lines.extend(["", "## When"])
+            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in when_rules)
+            new_content = "\n".join(lines) + "\n"
+        else:
+            lines = ["# Lessons"]
+            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in kept)
+            new_content = "\n".join(lines) + "\n"
+
+        hc._encode_with_lock(path, new_content, mode="write")
+
+    async def maybe_update_identity(self, user_message: str) -> None:
+        """Check if conversation reveals identity facts worth profiling.
+
+        Brain analog: the Default Mode Network passively monitoring for
+        self-relevant information. Runs infrequently (every ~5 turns)
+        to avoid overhead. Uses fast coding model for classification.
+        """
+        if self._llm is None or self.mode == "off":
+            return
+
+        try:
+            response = await self._llm.code(
+                system=_IDENTITY_EXTRACT_PROMPT,
+                messages=[{"role": "user", "content": user_message}],
+                max_tokens=512,
+            )
+            facts = json.loads(response.content)
+            if not isinstance(facts, list) or not facts:
+                return
+        except Exception:
+            return
+
+        # Merge with existing identity
+        existing = self.global_hc.recall_identity()
+        existing_entries: list[str] = []
+        if existing:
+            for line in existing.splitlines():
+                stripped = line.strip()
+                if stripped.startswith("- "):
+                    existing_entries.append(stripped[2:])
+                elif stripped and not stripped.startswith("#"):
+                    existing_entries.append(stripped)
+
+        # Add new facts, avoiding duplicates
+        for fact in facts:
+            if isinstance(fact, str) and fact not in existing_entries:
+                # Check if this updates an existing fact (same key prefix)
+                key = fact.split(":")[0].strip().lower() if ":" in fact else ""
+                if key:
+                    existing_entries = [
+                        e for e in existing_entries
+                        if not e.lower().startswith(key + ":")
+                    ]
+                existing_entries.append(fact)
+
+        if existing_entries:
+            self.global_hc.rewrite_identity(existing_entries)

From b375db1348e39f7345ab98b80a35e5373baea7b2 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:56:33 +0200
Subject: [PATCH 047/134] Move episodes and hipocampus

---
 anton/core/memory/episodes.py    | 225 +++++++++++++++++
 anton/core/memory/hippocampus.py | 407 +++++++++++++++++++++++++++++++
 2 files changed, 632 insertions(+)
 create mode 100644 anton/core/memory/episodes.py
 create mode 100644 anton/core/memory/hippocampus.py

diff --git a/anton/core/memory/episodes.py b/anton/core/memory/episodes.py
new file mode 100644
index 00000000..339fd135
--- /dev/null
+++ b/anton/core/memory/episodes.py
@@ -0,0 +1,225 @@
+"""Episodic memory — timestamped, searchable archive of conversations.
+
+Brain analog: Medial Temporal Lobe episodic memory system.  Logs every
+turn (user input, assistant response, tool calls, scratchpad cells) as
+JSONL.  Fire-and-forget: never blocks anything.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+
+@dataclass
+class Episode:
+    ts: str  # ISO 8601
+    session: str  # Session ID (matches filename stem)
+    turn: int
+    role: str  # "user" | "assistant" | "tool_call" | "tool_result" | "scratchpad"
+    content: str
+    meta: dict = field(default_factory=dict)
+
+
+_MAX_TOOL_INPUT = 2000
+_MAX_TOOL_RESULT = 2000
+
+
+class EpisodicMemory:
+    """Append-only conversation archive stored as per-session JSONL files."""
+
+    def __init__(self, episodes_dir: Path, *, enabled: bool = True) -> None:
+        self._dir = episodes_dir
+        self._enabled = enabled
+        self._session_id: str | None = None
+        self._file: Path | None = None
+
+
+    @property
+    def enabled(self) -> bool:
+        return self._enabled
+
+    @enabled.setter
+    def enabled(self, value: bool) -> None:
+        self._enabled = value
+
+    def start_session(self) -> str:
+        """Create a new JSONL file for this session and return the session ID."""
+        now = datetime.now(timezone.utc)
+        self._session_id = now.strftime("%Y%m%d_%H%M%S")
+        self._dir.mkdir(parents=True, exist_ok=True)
+        self._file = self._dir / f"{self._session_id}.jsonl"
+        self._file.touch()
+        return self._session_id
+
+    def resume_session(self, session_id: str) -> str:
+        """Resume an existing session by reusing its session ID and file."""
+        self._session_id = session_id
+        self._dir.mkdir(parents=True, exist_ok=True)
+        self._file = self._dir / f"{self._session_id}.jsonl"
+        if not self._file.exists():
+            self._file.touch()
+        return self._session_id
+
+    def log(self, episode: Episode) -> None:
+        """Append an episode to the current session file.  Never raises."""
+        if not self._enabled or self._file is None:
+            return
+        try:
+            import sys
+
+            line = json.dumps(asdict(episode), ensure_ascii=False) + "\n"
+            with self._file.open("a", encoding="utf-8") as f:
+                if sys.platform != "win32":
+                    import fcntl
+                    fcntl.flock(f, fcntl.LOCK_EX)
+                    f.write(line)
+                    fcntl.flock(f, fcntl.LOCK_UN)
+                else:
+                    f.write(line)
+        except Exception:
+            pass  # Fire-and-forget
+
+    def log_turn(
+        self,
+        turn: int,
+        role: str,
+        content: str,
+        **meta: object,
+    ) -> None:
+        """Convenience wrapper around log()."""
+        if not self._enabled or self._session_id is None:
+            return
+        # Truncate tool content
+        if role == "tool_call":
+            content = content[:_MAX_TOOL_INPUT]
+        elif role == "tool_result":
+            content = content[:_MAX_TOOL_RESULT]
+
+        self.log(Episode(
+            ts=datetime.now(timezone.utc).isoformat(),
+            session=self._session_id,
+            turn=turn,
+            role=role,
+            content=content,
+            meta=dict(meta),
+        ))
+
+    def recall(
+        self,
+        query: str,
+        *,
+        max_results: int = 20,
+        days_back: int | None = None,
+    ) -> list[Episode]:
+        """Search episodes for *query* (case-insensitive substring match).
+
+        When a user turn matches, returns the full episode context: the
+        matching turn plus the assistant response, tool calls, and scratchpad
+        results from the same turn. This mirrors real episodic recall — you
+        remember the whole episode, not just the cue.
+
+        Returns newest-first, capped at *max_results* episodes (each episode
+        may include multiple turns).
+        """
+        if not self._dir.is_dir():
+            return []
+
+        cutoff: datetime | None = None
+        if days_back is not None:
+            cutoff = datetime.now(timezone.utc) - timedelta(days=days_back)
+
+        pattern = re.compile(re.escape(query), re.IGNORECASE)
+        matches: list[Episode] = []
+        seen_turns: set[tuple[str, int]] = set()  # (session, turn) dedup
+
+        # Iterate files newest-first (filenames sort chronologically)
+        for path in sorted(self._dir.glob("*.jsonl"), reverse=True):
+            if cutoff is not None:
+                stem = path.stem
+                try:
+                    file_dt = datetime.strptime(stem, "%Y%m%d_%H%M%S").replace(
+                        tzinfo=timezone.utc,
+                    )
+                    if file_dt < cutoff:
+                        continue
+                except ValueError:
+                    pass
+
+            try:
+                lines = path.read_text(encoding="utf-8").strip().splitlines()
+            except Exception:
+                continue
+
+            # Parse all episodes in this file for context lookups
+            all_episodes: list[Episode] = []
+            for line in lines:
+                if not line.strip():
+                    continue
+                try:
+                    all_episodes.append(Episode(**json.loads(line)))
+                except Exception:
+                    continue
+
+            # Build turn index: (session, turn) -> list of episodes
+            turn_index: dict[tuple[str, int], list[Episode]] = {}
+            for ep in all_episodes:
+                key = (ep.session, ep.turn)
+                turn_index.setdefault(key, []).append(ep)
+
+            # Search newest-first
+            for ep in reversed(all_episodes):
+                if not pattern.search(ep.content):
+                    continue
+
+                key = (ep.session, ep.turn)
+                if key in seen_turns:
+                    continue
+                seen_turns.add(key)
+
+                # Include the matching turn's full context
+                turn_episodes = turn_index.get(key, [ep])
+                matches.extend(turn_episodes)
+
+                # Also grab the next turn if it has an assistant response
+                if ep.role == "user":
+                    next_key = (ep.session, ep.turn + 1)
+                    if next_key not in seen_turns:
+                        next_eps = turn_index.get(next_key, [])
+                        has_response = any(
+                            e.role in ("assistant", "tool_result", "scratchpad")
+                            for e in next_eps
+                        )
+                        if next_eps and has_response:
+                            seen_turns.add(next_key)
+                            matches.extend(next_eps)
+
+                if len(seen_turns) >= max_results:
+                    return matches
+
+        return matches
+
+    def recall_formatted(
+        self,
+        query: str,
+        **kwargs: object,
+    ) -> str:
+        """Return a human-readable string of matching episodes."""
+        episodes = self.recall(query, **kwargs)  # type: ignore[arg-type]
+        if not episodes:
+            return f"No episodes found matching '{query}'."
+        lines: list[str] = []
+        for ep in episodes:
+            # Show more content for assistant/scratchpad responses
+            max_len = 2000 if ep.role in ("assistant", "scratchpad", "tool_result") else 500
+            lines.append(f"[{ep.ts}] ({ep.role}) {ep.content[:max_len]}")
+        return "\n".join(lines)
+
+    def session_count(self) -> int:
+        """Count the number of session files."""
+        if not self._dir.is_dir():
+            return 0
+        return sum(1 for _ in self._dir.glob("*.jsonl"))
diff --git a/anton/core/memory/hippocampus.py b/anton/core/memory/hippocampus.py
new file mode 100644
index 00000000..4bea725e
--- /dev/null
+++ b/anton/core/memory/hippocampus.py
@@ -0,0 +1,407 @@
+"""Hippocampus — Anton's memory encoding and retrieval engine.
+
+Named for the brain's hippocampus (CA3/CA1 subfields), which handles the
+fundamental operations of memory: encoding new traces (writing) and
+pattern-completing partial cues into full memories (reading).
+
+The hippocampus doesn't decide *what* to remember — that's the cortex's job.
+It simply executes storage and retrieval at a single scope (global or project),
+like how the brain's hippocampus encodes at the level of individual memory traces
+without executive judgment about importance.
+
+Each Hippocampus instance manages one scope's files:
+  - profile.md  → identity (mPFC / Default Mode Network analogy)
+  - rules.md    → behavioral gates (Basal Ganglia / OFC analogy)
+  - lessons.md  → semantic facts (Anterior Temporal Lobe analogy)
+  - topics/*.md → domain expertise (Cortical Association Areas analogy)
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+
+
+@dataclass
+class Engram:
+    """A single memory trace — the fundamental unit of memory.
+
+    Named for Karl Lashley's 'engram' — the physical substrate of a memory.
+    Each engram carries its content plus metadata about confidence, origin,
+    and topic for later retrieval and consolidation.
+    """
+
+    text: str
+    kind: Literal["always", "never", "when", "lesson", "profile"]
+    scope: Literal["global", "project"]
+    confidence: Literal["high", "medium", "low"] = "medium"
+    topic: str = ""
+    source: Literal["user", "consolidation", "llm"] = "llm"
+
+
+class Hippocampus:
+    """Reads and writes memory traces at a single scope (global OR project).
+
+    Like the hippocampal CA3 region (pattern completion for reads) and CA1
+    region (pattern separation for writes), this class handles the low-level
+    mechanics of memory storage without higher-order decisions about relevance
+    or importance.
+    """
+
+    def __init__(self, base_dir: Path) -> None:
+        """Initialize for a single scope.
+
+        Args:
+            base_dir: ~/.anton/memory/ (global) or <project>/.anton/memory/ (project)
+        """
+        self._dir = base_dir
+        self._profile_path = base_dir / "profile.md"
+        self._rules_path = base_dir / "rules.md"
+        self._lessons_path = base_dir / "lessons.md"
+        self._topics_dir = base_dir / "topics"
+
+
+    def recall_identity(self) -> str:
+        """Load the always-on self-model (profile.md).
+
+        Brain analog: medial Prefrontal Cortex / Default Mode Network.
+        This is the identity substrate — always active, never "looked up",
+        it contextualizes all other processing. Global scope only.
+        """
+        if not self._profile_path.is_file():
+            return ""
+        try:
+            return self._profile_path.read_text(encoding="utf-8").strip()
+        except (OSError, UnicodeDecodeError):
+            return ""
+
+    def recall_rules(self) -> str:
+        """Load behavioral gates (rules.md) as formatted always/never/when.
+
+        Brain analog: Basal Ganglia (Go/No-Go pathways) + Orbitofrontal Cortex
+        (conditional behavioral rules). These aren't memories to recall —
+        they're constraints that shape action selection.
+        """
+        if not self._rules_path.is_file():
+            return ""
+        try:
+            return self._rules_path.read_text(encoding="utf-8").strip()
+        except (OSError, UnicodeDecodeError):
+            return ""
+
+    def recall_lessons(self, token_budget: int = 1000) -> str:
+        """Load semantic knowledge (lessons.md), most recent first, within budget.
+
+        Brain analog: Anterior Temporal Lobe — the convergence hub for semantic
+        facts distilled from many episodes. Budget enforced at ~4 chars/token.
+        """
+        if not self._lessons_path.is_file():
+            return ""
+        try:
+            content = self._lessons_path.read_text(encoding="utf-8").strip()
+        except (OSError, UnicodeDecodeError):
+            return ""
+
+        if not content:
+            return ""
+
+        # Extract individual entries (lines starting with "- ")
+        lines = [ln for ln in content.splitlines() if ln.strip()]
+        # Keep header, then entries in reverse order (most recent last → first)
+        header_lines = []
+        entry_lines = []
+        for ln in lines:
+            if ln.startswith("- ") or ln.startswith("  "):
+                entry_lines.append(ln)
+            else:
+                header_lines.append(ln)
+
+        # Reverse entries so most recent are first
+        entry_lines.reverse()
+
+        # Budget: ~4 chars per token
+        char_budget = token_budget * 4
+        result_lines = list(header_lines)
+        used = sum(len(ln) for ln in result_lines)
+
+        for ln in entry_lines:
+            if used + len(ln) + 1 > char_budget:
+                break
+            result_lines.append(ln)
+            used += len(ln) + 1
+
+        return "\n".join(result_lines)
+
+    def recall_topic(self, slug: str) -> str:
+        """Load deep domain expertise on demand (topics/{slug}.md).
+
+        Brain analog: Cortical Association Areas — specialized regions activated
+        associatively when contextual cues indicate relevance.
+        """
+        safe_slug = self._sanitize_slug(slug)
+        path = self._topics_dir / f"{safe_slug}.md"
+        if not path.is_file():
+            return ""
+        try:
+            return path.read_text(encoding="utf-8").strip()
+        except (OSError, UnicodeDecodeError):
+            return ""
+
+    def recall_scratchpad_wisdom(self) -> str:
+        """Retrieve procedural knowledge relevant to scratchpad execution.
+
+        Returns all "when" rules + lessons with topic starting with "scratchpad-".
+        Injected into tool descriptions so the LLM sees them when composing code.
+        """
+        parts: list[str] = []
+
+        # Extract "when" rules
+        rules = self.recall_rules()
+        if rules:
+            in_when = False
+            for line in rules.splitlines():
+                if line.strip().startswith("## When"):
+                    in_when = True
+                    continue
+                elif line.strip().startswith("## "):
+                    in_when = False
+                    continue
+                if in_when and line.strip().startswith("- "):
+                    parts.append(line.strip())
+
+        # Extract scratchpad-related lessons
+        lessons = self._read_full_lessons()
+        for line in lessons.splitlines():
+            if line.strip().startswith("- ") and "scratchpad" in line.lower():
+                stripped = line.strip()
+                if stripped not in parts:
+                    parts.append(stripped)
+
+        # Check topics/scratchpad-*.md files
+        if self._topics_dir.is_dir():
+            for path in sorted(self._topics_dir.iterdir()):
+                if path.name.startswith("scratchpad-") and path.suffix == ".md":
+                    try:
+                        content = path.read_text(encoding="utf-8").strip()
+                        if content:
+                            parts.append(content)
+                    except (OSError, UnicodeDecodeError):
+                        continue
+
+        return "\n".join(parts)
+
+    def _read_full_lessons(self) -> str:
+        """Read lessons.md without budget constraint (for internal use)."""
+        if not self._lessons_path.is_file():
+            return ""
+        try:
+            return self._lessons_path.read_text(encoding="utf-8").strip()
+        except (OSError, UnicodeDecodeError):
+            return ""
+
+    def encode_rule(
+        self,
+        text: str,
+        kind: Literal["always", "never", "when"],
+        confidence: str = "medium",
+        source: str = "llm",
+    ) -> None:
+        """Write a new behavioral gate to rules.md.
+
+        Appends under the correct section (Always/Never/When).
+        Uses file locking for safety — like how the hippocampus
+        prevents interference between overlapping encoding events.
+        """
+        self._dir.mkdir(parents=True, exist_ok=True)
+
+        ts = time.strftime("%Y-%m-%d")
+        metadata = f"<!-- confidence:{confidence} source:{source} ts:{ts} -->"
+        entry = f"- {text} {metadata}\n"
+
+        section_header = f"## {kind.capitalize()}"
+
+        # Read existing content or create skeleton
+        if self._rules_path.is_file():
+            content = self._rules_path.read_text(encoding="utf-8")
+        else:
+            content = "# Rules\n\n## Always\n\n## Never\n\n## When\n"
+
+        # Check for duplicate (exact entry match, ignoring metadata)
+        if text in self._extract_entry_texts(content):
+            return
+
+        # Find the section and append
+        lines = content.splitlines(keepends=True)
+        new_lines: list[str] = []
+        inserted = False
+
+        i = 0
+        while i < len(lines):
+            new_lines.append(lines[i])
+            if lines[i].strip() == section_header and not inserted:
+                # Skip to end of section (next ## or end of file)
+                i += 1
+                section_entries: list[str] = []
+                while i < len(lines) and not (
+                    lines[i].strip().startswith("## ") and lines[i].strip() != section_header
+                ):
+                    section_entries.append(lines[i])
+                    i += 1
+                # Add existing entries
+                new_lines.extend(section_entries)
+                # Ensure we have a blank line before the entry if needed
+                if section_entries and section_entries[-1].strip():
+                    new_lines.append("\n")
+                elif not section_entries:
+                    pass  # Section was empty, entry follows header
+                new_lines.append(entry)
+                inserted = True
+                continue
+            i += 1
+
+        if not inserted:
+            # Section didn't exist — add it
+            new_lines.append(f"\n{section_header}\n{entry}")
+
+        self._encode_with_lock(self._rules_path, "".join(new_lines), mode="write")
+
+    def encode_lesson(
+        self,
+        text: str,
+        topic: str = "",
+        source: str = "llm",
+    ) -> None:
+        """Write a semantic fact to lessons.md.
+
+        If a topic is provided, also creates/appends to topics/{slug}.md.
+        """
+        self._dir.mkdir(parents=True, exist_ok=True)
+
+        ts = time.strftime("%Y-%m-%d")
+        topic_tag = f" topic:{topic}" if topic else ""
+        entry = f"- {text} <!--{topic_tag} ts:{ts} -->\n"
+
+        # Append to lessons.md
+        if not self._lessons_path.is_file():
+            self._encode_with_lock(
+                self._lessons_path,
+                f"# Lessons\n{entry}",
+                mode="write",
+            )
+        else:
+            # Check for duplicate (exact entry match, ignoring metadata)
+            existing = self._lessons_path.read_text(encoding="utf-8")
+            if text in self._extract_entry_texts(existing):
+                return
+            self._encode_with_lock(self._lessons_path, entry, mode="append")
+
+        # Also write to topic file if topic is substantial
+        if topic:
+            self._topics_dir.mkdir(parents=True, exist_ok=True)
+            slug = self._sanitize_slug(topic)
+            topic_path = self._topics_dir / f"{slug}.md"
+            if not topic_path.is_file():
+                self._encode_with_lock(
+                    topic_path,
+                    f"# {topic}\n{entry}",
+                    mode="write",
+                )
+            else:
+                existing = topic_path.read_text(encoding="utf-8")
+                if text not in self._extract_entry_texts(existing):
+                    self._encode_with_lock(topic_path, entry, mode="append")
+
+    def rewrite_identity(self, entries: list[str]) -> None:
+        """Replace the identity snapshot (profile.md) — full rewrite, not append.
+
+        Unlike other memory operations, identity is a coherent snapshot, not
+        an append log. Like how your self-concept updates as a whole, not
+        by appending new facts to old ones.
+        """
+        self._dir.mkdir(parents=True, exist_ok=True)
+        content = "# Profile\n" + "\n".join(f"- {e}" for e in entries) + "\n"
+        self._encode_with_lock(self._profile_path, content, mode="write")
+
+    def entry_count(self) -> int:
+        """Count total entries across rules.md and lessons.md."""
+        count = 0
+        for path in (self._rules_path, self._lessons_path):
+            if path.is_file():
+                try:
+                    content = path.read_text(encoding="utf-8")
+                    count += sum(1 for ln in content.splitlines() if ln.strip().startswith("- "))
+                except (OSError, UnicodeDecodeError):
+                    continue
+        return count
+
+    def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None:
+        """Write with file locking (fcntl.flock on Unix, no-op on Windows).
+
+        Prevents corruption from concurrent Anton sessions writing to
+        global memory — like synaptic tagging ensuring encoding fidelity
+        despite concurrent neural activity.
+        """
+        path.parent.mkdir(parents=True, exist_ok=True)
+
+        if mode == "write":
+            # Atomic write via temp file + rename
+            tmp_path = path.with_suffix(path.suffix + ".tmp")
+            with open(tmp_path, "w", encoding="utf-8") as f:
+                if sys.platform != "win32":
+                    import fcntl
+                    fcntl.flock(f.fileno(), fcntl.LOCK_EX)
+                try:
+                    f.write(text)
+                    f.flush()
+                finally:
+                    if sys.platform != "win32":
+                        import fcntl
+                        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+            tmp_path.replace(path)
+        else:
+            # Append mode
+            with open(path, "a", encoding="utf-8") as f:
+                if sys.platform != "win32":
+                    import fcntl
+                    fcntl.flock(f.fileno(), fcntl.LOCK_EX)
+                try:
+                    f.write(text)
+                    f.flush()
+                finally:
+                    if sys.platform != "win32":
+                        import fcntl
+                        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+
+    @staticmethod
+    def _extract_entry_texts(content: str) -> set[str]:
+        """Extract the set of normalized entry texts from a markdown memory file.
+
+        Strips the leading ``- ``, trailing metadata comments, and whitespace
+        so that dedup comparisons are exact-match on the *meaning* line only.
+        """
+        texts: set[str] = set()
+        for line in content.splitlines():
+            stripped = line.strip()
+            if not stripped.startswith("- "):
+                continue
+            # Remove leading "- "
+            entry = stripped[2:]
+            # Remove trailing <!-- ... --> metadata
+            entry = re.sub(r"\s*<!--[\s\S]*?-->\s*$", "", entry)
+            entry = entry.strip()
+            if entry:
+                texts.add(entry)
+        return texts
+
+    @staticmethod
+    def _sanitize_slug(name: str) -> str:
+        """Sanitize a topic name into a safe file slug."""
+        text = name.lower().strip()
+        text = re.sub(r"[^a-z0-9\s_-]", "", text)
+        text = re.sub(r"[\s]+", "-", text)
+        text = re.sub(r"-+", "-", text)
+        return text.strip("-") or "general"

From e167d6b8c1ab5eef661a0acb809ce159d29af9f5 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:56:47 +0200
Subject: [PATCH 048/134] Use shims for initial improts

---
 anton/memory/consolidator.py | 201 +--------------
 anton/memory/cortex.py       | 463 +----------------------------------
 2 files changed, 6 insertions(+), 658 deletions(-)

diff --git a/anton/memory/consolidator.py b/anton/memory/consolidator.py
index 621aa10e..4f61a018 100644
--- a/anton/memory/consolidator.py
+++ b/anton/memory/consolidator.py
@@ -1,199 +1,4 @@
-"""Consolidator — Anton's sleep-like memory consolidation process.
+# Shim — re-exports from core. Import from anton.core.memory.consolidator directly.
+from anton.core.memory.consolidator import Consolidator
 
-Named for hippocampal-cortical replay during Slow-Wave Sleep (SWS).
-
-During sleep, the hippocampus "replays" recent experiences to the neocortex
-in compressed, accelerated bursts (sharp-wave ripples). This offline process:
-  - Reviews what happened during waking hours
-  - Extracts statistical regularities and important lessons
-  - Transfers knowledge from episodic (hippocampal) to semantic (cortical) storage
-  - Is selective — emotionally tagged and goal-relevant experiences get priority
-
-The Consolidator mirrors this exactly: after a scratchpad session ends, it
-replays the cell history, asks "what would I tell myself to do differently?",
-and encodes the resulting lessons into long-term memory via the Cortex.
-
-Like sleep, consolidation is:
-  - Offline (runs after the task, not during)
-  - Compressed (summarizes cells, doesn't replay in full)
-  - Selective (only triggers when there were errors, long sessions, or cancellations)
-  - Background (doesn't block the user's next interaction)
-"""
-
-from __future__ import annotations
-
-import json
-from typing import TYPE_CHECKING
-
-from anton.memory.hippocampus import Engram
-
-if TYPE_CHECKING:
-    from anton.core.llm.client import LLMClient
-    from anton.scratchpad import Cell
-
-
-_CONSOLIDATION_PROMPT = """\
-You are a memory consolidation system for an AI coding assistant.
-
-Review this scratchpad session (sequence of code cells with their results) and
-extract durable, reusable lessons. Focus on:
-
-1. **Rules** — patterns to always/never follow:
-   - "Always call progress() before long API calls in scratchpad"
-   - "Never use time.sleep() in scratchpad cells"
-   - Conditional rules: "If fetching paginated data → use async + progress()"
-
-2. **Lessons** — factual knowledge discovered:
-   - API behaviors: "CoinGecko free tier rate-limits at ~50 req/min"
-   - Library quirks: "pandas read_csv needs encoding='utf-8-sig' for BOM files"
-   - Data facts: "Bitcoin price data via /coins/bitcoin/market_chart/range"
-
-Return a JSON array of objects:
-[
-  {
-    "text": "the memory to encode",
-    "kind": "always" | "never" | "when" | "lesson",
-    "scope": "global" | "project",
-    "topic": "optional-topic-slug",
-    "confidence": "high" | "medium"
-  }
-]
-
-Rules for scope:
-- "project": DEFAULT — use this for most memories. Anything related to the current
-  codebase, its APIs, file paths, libraries, patterns, conventions, or behaviors
-  observed during this session belongs here.
-- "global": RARE — only for truly universal knowledge that applies to any project
-  (e.g. general language quirks, stdlib gotchas). When in doubt, use "project".
-
-Rules for confidence:
-- "high": clearly correct, verified by the session results
-- "medium": probably correct but worth confirming
-
-If no meaningful lessons exist, return [].
-Do NOT extract trivial observations. Only encode genuinely reusable knowledge.
-"""
-
-
-class Consolidator:
-    """Extracts durable lessons from scratchpad sessions via offline replay.
-
-    Brain analog: hippocampal sharp-wave ripples during SWS that replay
-    compressed versions of waking experiences to the neocortex for
-    long-term storage.
-    """
-
-    def should_replay(self, cells: list[Cell]) -> bool:
-        """Heuristic gate — determines if this session warrants consolidation.
-
-        Like the amygdala tagging experiences for priority replay:
-        emotionally significant events (errors, long sessions, cancellations)
-        are preferentially consolidated. No LLM call needed.
-
-        Triggers when:
-          - Any cell had an error (negative emotional valence)
-          - Session was long (>=5 cells — rich experience to mine)
-          - Any cell was cancelled (interrupted action — what went wrong?)
-        """
-        if len(cells) < 2:
-            return False
-
-        # Long sessions are worth reviewing
-        if len(cells) >= 5:
-            return True
-
-        # Errors are high-signal learning opportunities
-        for cell in cells:
-            if cell.error:
-                return True
-
-        # Check for cancellation markers in stderr
-        for cell in cells:
-            if cell.stderr and ("cancelled" in cell.stderr.lower() or "killed" in cell.stderr.lower()):
-                return True
-
-        return False
-
-    async def replay_and_extract(self, cells: list[Cell], llm_client: LLMClient) -> list[Engram]:
-        """Replay the scratchpad session and extract lessons.
-
-        Like SWS replay: compresses the full session into a compact summary,
-        then runs a fast LLM pass asking:
-          "If you were to do this task again, what would you tell yourself?"
-
-        Returns structured Engram objects ready for encoding via the Cortex.
-        """
-        # Build compact cell summary
-        summary_lines: list[str] = []
-        for i, cell in enumerate(cells, 1):
-            desc = cell.description or "(no description)"
-            status = "error" if cell.error else "ok"
-            output_preview = ""
-            if cell.stdout:
-                first_line = cell.stdout.strip().split("\n")[0][:200]
-                output_preview = f" → {first_line}"
-            elif cell.error:
-                first_line = cell.error.strip().split("\n")[-1][:200]
-                output_preview = f" → ERROR: {first_line}"
-
-            summary_lines.append(f"Cell {i} [{status}]: {desc}{output_preview}")
-
-            # Include code snippet for error cells (helpful context)
-            if cell.error and cell.code:
-                code_preview = cell.code[:300]
-                if len(cell.code) > 300:
-                    code_preview += "..."
-                summary_lines.append(f"  Code: {code_preview}")
-
-        session_summary = "\n".join(summary_lines)
-
-        try:
-            response = await llm_client.code(
-                system=_CONSOLIDATION_PROMPT,
-                messages=[{"role": "user", "content": session_summary}],
-                max_tokens=2048,
-            )
-
-            raw = response.content.strip()
-            # Handle markdown code fences
-            if raw.startswith("```"):
-                raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:]
-                if raw.endswith("```"):
-                    raw = raw[:-3]
-                raw = raw.strip()
-
-            items = json.loads(raw)
-            if not isinstance(items, list):
-                return []
-
-        except Exception:
-            return []
-
-        engrams: list[Engram] = []
-        for item in items:
-            if not isinstance(item, dict) or "text" not in item:
-                continue
-
-            kind = item.get("kind", "lesson")
-            if kind not in ("always", "never", "when", "lesson"):
-                kind = "lesson"
-
-            scope = item.get("scope", "project")
-            if scope not in ("global", "project"):
-                scope = "project"
-
-            confidence = item.get("confidence", "medium")
-            if confidence not in ("high", "medium", "low"):
-                confidence = "medium"
-
-            engrams.append(Engram(
-                text=item["text"],
-                kind=kind,
-                scope=scope,
-                confidence=confidence,
-                topic=item.get("topic", ""),
-                source="consolidation",
-            ))
-
-        # Cap extraction to prevent memory bloat from single sessions
-        return engrams[:5]
+__all__ = ["Consolidator"]
diff --git a/anton/memory/cortex.py b/anton/memory/cortex.py
index bb3344bd..7065db47 100644
--- a/anton/memory/cortex.py
+++ b/anton/memory/cortex.py
@@ -1,461 +1,4 @@
-"""Cortex — Anton's executive memory coordinator.
+# Shim — re-exports from core. Import from anton.core.memory.cortex directly.
+from anton.core.memory.cortex import Cortex
 
-Named for the Prefrontal Cortex (PFC), the brain's executive center that
-orchestrates memory retrieval by sending top-down signals to the hippocampus
-and other memory systems.
-
-The dorsolateral PFC handles strategic retrieval — selecting which memories
-to pull into working memory. The ventromedial PFC integrates across memory
-systems to provide coherent context. The Cortex class mirrors both:
-
-  - build_memory_context() → dlPFC: strategic retrieval for the system prompt
-  - get_scratchpad_context() → vmPFC: integrating relevant knowledge for tools
-  - encode() → executive decision to encode (directing the hippocampus)
-  - encoding_gate() → encoding gate modulated by the memory mode
-
-The Cortex coordinates two Hippocampus instances (global + project scope),
-like how the PFC coordinates retrieval from multiple brain memory systems.
-"""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from anton.memory.hippocampus import Engram, Hippocampus
-
-if TYPE_CHECKING:
-    from anton.core.llm.client import LLMClient
-
-
-_IDENTITY_EXTRACT_PROMPT = """\
-Extract identity facts from this user message. Return a JSON array of strings,
-each a concise fact about the user (name, timezone, expertise, preferences, tools).
-
-If no identity-relevant information is found, return [].
-
-Examples of identity facts:
-- "Name: Jorge"
-- "Timezone: PST"
-- "Prefers dark mode"
-- "Uses uv over pip"
-
-Only extract facts that are clearly about the user's identity, preferences,
-or working style. Ignore transient conversation details.
-"""
-
-_COMPACTION_PROMPT = """\
-You are a memory compaction system. Review these memory entries and:
-1. Remove exact duplicates
-2. Merge entries that say the same thing differently — keep the clearest version
-3. Remove entries that are superseded by newer, more specific entries
-4. Keep all unique, useful entries
-
-Return a JSON object with:
-- "kept": array of entry strings to keep — preserve the trailing `<!-- ... -->` metadata comment on each entry exactly as it appears
-- "merged": array of strings describing what was merged
-- "pruned": array of strings describing what was removed and why
-
-Be conservative — when in doubt, keep the entry.
-"""
-
-
-class Cortex:
-    """Executive coordinator for Anton's memory systems.
-
-    Manages two Hippocampus instances (global + project scope), decides what
-    memories to load into working memory (the context window), and gates
-    encoding based on the current memory mode (the neuromodulatory setting).
-    """
-
-    def __init__(
-        self,
-        global_dir: Path,
-        project_dir: Path,
-        mode: str = "autopilot",
-        llm_client: LLMClient | None = None,
-    ) -> None:
-        """Initialize the executive with two hippocampal stores.
-
-        Args:
-            global_dir: Path to ~/.anton/memory/ (cross-project memories)
-            project_dir: Path to <project>/.anton/memory/ (project-specific)
-            mode: Memory mode — autopilot|copilot|off (encoding gate)
-            llm_client: For LLM-assisted operations (profile extraction, compaction)
-        """
-        self.global_hc = Hippocampus(global_dir)
-        self.project_hc = Hippocampus(project_dir)
-        self.mode = mode
-        self._llm = llm_client
-        self._turn_count = 0
-
-    # ~6000 chars ≈ ~1500 tokens — above this, use LLM to filter rules
-    _RULES_BUDGET_CHARS = 6000
-
-    _RULES_RETRIEVAL_PROMPT = """\
-Given the user's current message, select only the conditional (When/If) rules that are \
-relevant. Return the selected rules exactly as they appear, one per line (keep the "- " prefix).
-If all rules are relevant, return them all. If none are relevant, return "NONE".
-Do NOT add, modify, or summarize rules — return them verbatim.
-"""
-
-    async def build_memory_context(self, user_message: str = "") -> str:
-        """Assemble memories for the system prompt — the 'working memory' load.
-
-        Like the dlPFC performing strategic retrieval: selects what enters
-        the context window based on relevance and budget.
-
-        Args:
-            user_message: Current user message for cue-dependent retrieval.
-                When rules exceed the token budget, only relevant rules are loaded.
-        """
-        sections: list[str] = []
-
-        # 1. Identity (global only — identity is singular)
-        identity = self.global_hc.recall_identity()
-        if identity:
-            sections.append(f"## Your Memory — Identity\n{identity}")
-
-        # 2. Global rules (with smart retrieval)
-        global_rules = self.global_hc.recall_rules()
-        if global_rules:
-            global_rules = await self._retrieve_relevant_rules(global_rules, user_message)
-            if global_rules:
-                sections.append(f"## Your Memory — Global Rules\n{global_rules}")
-
-        # 3. Project rules (with smart retrieval)
-        project_rules = self.project_hc.recall_rules()
-        if project_rules:
-            project_rules = await self._retrieve_relevant_rules(project_rules, user_message)
-            if project_rules:
-                sections.append(f"## Your Memory — Project Rules\n{project_rules}")
-
-        # 4. Global lessons
-        global_lessons = self.global_hc.recall_lessons(token_budget=1000)
-        if global_lessons:
-            sections.append(f"## Your Memory — Global Lessons\n{global_lessons}")
-
-        # 5. Project lessons
-        project_lessons = self.project_hc.recall_lessons(token_budget=1000)
-        if project_lessons:
-            sections.append(f"## Your Memory — Project Lessons\n{project_lessons}")
-
-        # 6. Minds datasource context (auto-loaded if present)
-        minds_topic = self.project_hc.recall_topic("minds-datasource")
-        if minds_topic:
-            sections.append(f"## Minds — Datasource Context\n{minds_topic}")
-
-        if not sections:
-            return ""
-
-        return "\n\n" + "\n\n".join(sections)
-
-    async def _retrieve_relevant_rules(self, all_rules: str, user_message: str) -> str:
-        """Filter rules to only those relevant to the current user message.
-
-        Brain analog: dlPFC cue-dependent recall — the prefrontal cortex
-        selects which memories to activate based on current goals, rather
-        than loading everything into working memory.
-
-        Always/Never rules are behavioral constraints — always loaded in full.
-        Only conditional (When/If) rules are filtered by relevance.
-        If rules are under budget or no LLM is available, returns as-is.
-        """
-        if not user_message or self._llm is None:
-            return all_rules
-        if len(all_rules) <= self._RULES_BUDGET_CHARS:
-            return all_rules
-
-        # Split rules into mandatory (Always/Never) and filterable (When)
-        lines = all_rules.splitlines()
-        mandatory_lines: list[str] = []
-        when_lines: list[str] = []
-        current_section = ""
-
-        for line in lines:
-            stripped = line.strip()
-            if stripped.startswith("## Always"):
-                current_section = "always"
-                mandatory_lines.append(line)
-            elif stripped.startswith("## Never"):
-                current_section = "never"
-                mandatory_lines.append(line)
-            elif stripped.startswith("## When"):
-                current_section = "when"
-                mandatory_lines.append(line)  # keep the header
-            elif stripped.startswith("## ") or stripped.startswith("# "):
-                current_section = ""
-                mandatory_lines.append(line)
-            elif current_section == "when":
-                when_lines.append(line)
-            else:
-                mandatory_lines.append(line)
-
-        # If When section is small, no need to filter
-        when_text = "\n".join(when_lines).strip()
-        if not when_text or len(when_text) < 1000:
-            return all_rules
-
-        # Filter only the When rules
-        try:
-            response = await self._llm.code(
-                system=self._RULES_RETRIEVAL_PROMPT,
-                messages=[{
-                    "role": "user",
-                    "content": f"User message: {user_message}\n\nRules:\n{when_text}",
-                }],
-                max_tokens=4096,
-            )
-            result = response.content.strip()
-            if result == "NONE":
-                filtered_when = ""
-            elif result:
-                filtered_when = result
-            else:
-                filtered_when = when_text
-        except Exception:
-            filtered_when = when_text
-
-        # Reassemble: mandatory sections + filtered When rules
-        output = "\n".join(mandatory_lines)
-        if filtered_when:
-            output += "\n" + filtered_when
-        return output
-
-    def get_scratchpad_context(self) -> str:
-        """Retrieve procedural knowledge for scratchpad tool injection.
-
-        Like the vmPFC integrating memories for action planning — combines
-        global + project scratchpad wisdom into a coherent set of guidelines.
-        """
-        parts: list[str] = []
-
-        global_wisdom = self.global_hc.recall_scratchpad_wisdom()
-        if global_wisdom:
-            parts.append(global_wisdom)
-
-        project_wisdom = self.project_hc.recall_scratchpad_wisdom()
-        if project_wisdom:
-            parts.append(project_wisdom)
-
-        return "\n".join(parts)
-
-    async def encode(self, engrams: list[Engram]) -> list[str]:
-        """Direct the hippocampus to encode new memories.
-
-        Routes each engram to the appropriate hippocampal store based on scope.
-        Returns list of actions taken for logging.
-        """
-        if self.mode == "off":
-            return ["Memory encoding is disabled."]
-
-        actions: list[str] = []
-        for engram in engrams:
-            hc = self.global_hc if engram.scope == "global" else self.project_hc
-
-            if engram.kind == "profile":
-                # Profile entries accumulate, then rewrite
-                existing = hc.recall_identity()
-                entries = []
-                if existing:
-                    for line in existing.splitlines():
-                        stripped = line.strip()
-                        if stripped.startswith("- "):
-                            entries.append(stripped[2:])
-                        elif stripped and not stripped.startswith("#"):
-                            entries.append(stripped)
-                entries.append(engram.text)
-                hc.rewrite_identity(entries)
-                actions.append(f"Updated identity: {engram.text}")
-
-            elif engram.kind in ("always", "never", "when"):
-                hc.encode_rule(
-                    engram.text,
-                    kind=engram.kind,
-                    confidence=engram.confidence,
-                    source=engram.source,
-                )
-                actions.append(f"Encoded {engram.kind} rule: {engram.text}")
-
-            elif engram.kind == "lesson":
-                hc.encode_lesson(
-                    engram.text,
-                    topic=engram.topic,
-                    source=engram.source,
-                )
-                actions.append(f"Encoded lesson: {engram.text}")
-
-        return actions
-
-    def encoding_gate(self, engram: Engram) -> bool:
-        """Whether this engram needs user confirmation before encoding.
-
-        Brain analog: the Locus Coeruleus-NE system modulating encoding gain.
-        - autopilot (high NE): encode everything → never confirm
-        - copilot (moderate NE): auto-encode high-confidence, confirm ambiguous
-        - off (suppressed ACh): never encode (but also never writes)
-
-        Confirmations are always deferred until after the user has received
-        their answer — never shown during scratchpad execution or mid-turn.
-        """
-        if self.mode == "autopilot":
-            return False
-        if self.mode == "off":
-            return False  # Won't reach encoding anyway
-        # copilot: auto-encode high confidence user-sourced, confirm rest
-        return engram.confidence != "high"
-
-    # --- Compaction: Systems Consolidation + Synaptic Homeostasis ---
-
-    _COMPACTION_THRESHOLD = 20  # entries before compaction triggers
-    _VACUUM_INTERVAL = 10  # check compaction every N turns
-
-    def needs_compaction(self) -> bool:
-        """Check if memory files have grown beyond the compaction threshold.
-
-        Brain analog: synaptic saturation — during waking hours, synapses
-        strengthen indiscriminately. When the load exceeds a threshold,
-        consolidation/pruning is triggered.
-        """
-        return (
-            self.global_hc.entry_count() > self._COMPACTION_THRESHOLD
-            or self.project_hc.entry_count() > self._COMPACTION_THRESHOLD
-        )
-
-    async def compact_all(self) -> None:
-        """Run systems consolidation on all memory files.
-
-        Brain analog: the Synaptic Homeostasis Hypothesis (Tononi-Cirelli).
-        Uses the coding model for fast, cheap deduplication.
-        """
-        if self._llm is None:
-            return
-
-        for hc in (self.global_hc, self.project_hc):
-            if hc.entry_count() > self._COMPACTION_THRESHOLD:
-                await self._compact_file(hc, hc._lessons_path, "lesson")
-                await self._compact_file(hc, hc._rules_path, "rules")
-
-    async def vacuum(self) -> None:
-        """Run compaction unconditionally on all memory files.
-
-        Public entry point for on-demand cleanup (e.g. after /connect).
-        Unlike compact_all(), skips the threshold check — always runs.
-        """
-        if self._llm is None:
-            return
-        for hc in (self.global_hc, self.project_hc):
-            await self._compact_file(hc, hc._lessons_path, "lesson")
-            await self._compact_file(hc, hc._rules_path, "rules")
-
-    def maybe_vacuum(self) -> None:
-        """Periodic vacuum check — call after each assistant turn.
-
-        Every _VACUUM_INTERVAL turns, checks if compaction is needed and
-        fires it in the background if so.
-        """
-        import asyncio
-
-        self._turn_count += 1
-        if self._turn_count % self._VACUUM_INTERVAL != 0:
-            return
-        if not self.needs_compaction():
-            return
-        asyncio.create_task(self.compact_all())
-
-    async def _compact_file(self, hc: Hippocampus, path: Path, kind: str) -> None:
-        """Compact a single memory file using LLM-assisted deduplication."""
-        if not path.is_file():
-            return
-
-        content = path.read_text(encoding="utf-8")
-        entries = [ln.strip() for ln in content.splitlines() if ln.strip().startswith("- ")]
-
-        if len(entries) < 8:
-            return
-
-        try:
-            response = await self._llm.code(
-                system=_COMPACTION_PROMPT,
-                messages=[{"role": "user", "content": "\n".join(entries)}],
-                max_tokens=4096,
-            )
-            result = json.loads(response.content)
-            kept = result.get("kept", entries)
-        except Exception:
-            return  # Don't corrupt memory on failure
-
-        if not kept:
-            return
-
-        # Rebuild the file
-        if kind == "rules":
-            # Preserve section structure
-            always = [e for e in kept if "always" in e.lower() or not any(
-                k in e.lower() for k in ("never", "when", "if ")
-            )]
-            never = [e for e in kept if "never" in e.lower()]
-            when_rules = [e for e in kept if "when" in e.lower() or "if " in e.lower()]
-
-            lines = ["# Rules\n", "## Always"]
-            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in always)
-            lines.extend(["", "## Never"])
-            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in never)
-            lines.extend(["", "## When"])
-            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in when_rules)
-            new_content = "\n".join(lines) + "\n"
-        else:
-            lines = ["# Lessons"]
-            lines.extend(f"- {e}" if not e.startswith("- ") else e for e in kept)
-            new_content = "\n".join(lines) + "\n"
-
-        hc._encode_with_lock(path, new_content, mode="write")
-
-    async def maybe_update_identity(self, user_message: str) -> None:
-        """Check if conversation reveals identity facts worth profiling.
-
-        Brain analog: the Default Mode Network passively monitoring for
-        self-relevant information. Runs infrequently (every ~5 turns)
-        to avoid overhead. Uses fast coding model for classification.
-        """
-        if self._llm is None or self.mode == "off":
-            return
-
-        try:
-            response = await self._llm.code(
-                system=_IDENTITY_EXTRACT_PROMPT,
-                messages=[{"role": "user", "content": user_message}],
-                max_tokens=512,
-            )
-            facts = json.loads(response.content)
-            if not isinstance(facts, list) or not facts:
-                return
-        except Exception:
-            return
-
-        # Merge with existing identity
-        existing = self.global_hc.recall_identity()
-        existing_entries: list[str] = []
-        if existing:
-            for line in existing.splitlines():
-                stripped = line.strip()
-                if stripped.startswith("- "):
-                    existing_entries.append(stripped[2:])
-                elif stripped and not stripped.startswith("#"):
-                    existing_entries.append(stripped)
-
-        # Add new facts, avoiding duplicates
-        for fact in facts:
-            if isinstance(fact, str) and fact not in existing_entries:
-                # Check if this updates an existing fact (same key prefix)
-                key = fact.split(":")[0].strip().lower() if ":" in fact else ""
-                if key:
-                    existing_entries = [
-                        e for e in existing_entries
-                        if not e.lower().startswith(key + ":")
-                    ]
-                existing_entries.append(fact)
-
-        if existing_entries:
-            self.global_hc.rewrite_identity(existing_entries)
+__all__ = ["Cortex"]

From 6709cb3a276a17c78fb869774ba9b5197271b795 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:56:55 +0200
Subject: [PATCH 049/134] Use shims for initial improts

---
 anton/memory/episodes.py    | 227 +-------------------
 anton/memory/hippocampus.py | 409 +-----------------------------------
 2 files changed, 6 insertions(+), 630 deletions(-)

diff --git a/anton/memory/episodes.py b/anton/memory/episodes.py
index 339fd135..0fbb335f 100644
--- a/anton/memory/episodes.py
+++ b/anton/memory/episodes.py
@@ -1,225 +1,4 @@
-"""Episodic memory — timestamped, searchable archive of conversations.
+# Shim — re-exports from core. Import from anton.core.memory.episodes directly.
+from anton.core.memory.episodes import Episode, EpisodicMemory
 
-Brain analog: Medial Temporal Lobe episodic memory system.  Logs every
-turn (user input, assistant response, tool calls, scratchpad cells) as
-JSONL.  Fire-and-forget: never blocks anything.
-"""
-
-from __future__ import annotations
-
-import json
-import re
-from dataclasses import asdict, dataclass, field
-from datetime import datetime, timedelta, timezone
-from pathlib import Path
-
-
-@dataclass
-class Episode:
-    ts: str  # ISO 8601
-    session: str  # Session ID (matches filename stem)
-    turn: int
-    role: str  # "user" | "assistant" | "tool_call" | "tool_result" | "scratchpad"
-    content: str
-    meta: dict = field(default_factory=dict)
-
-
-_MAX_TOOL_INPUT = 2000
-_MAX_TOOL_RESULT = 2000
-
-
-class EpisodicMemory:
-    """Append-only conversation archive stored as per-session JSONL files."""
-
-    def __init__(self, episodes_dir: Path, *, enabled: bool = True) -> None:
-        self._dir = episodes_dir
-        self._enabled = enabled
-        self._session_id: str | None = None
-        self._file: Path | None = None
-
-
-    @property
-    def enabled(self) -> bool:
-        return self._enabled
-
-    @enabled.setter
-    def enabled(self, value: bool) -> None:
-        self._enabled = value
-
-    def start_session(self) -> str:
-        """Create a new JSONL file for this session and return the session ID."""
-        now = datetime.now(timezone.utc)
-        self._session_id = now.strftime("%Y%m%d_%H%M%S")
-        self._dir.mkdir(parents=True, exist_ok=True)
-        self._file = self._dir / f"{self._session_id}.jsonl"
-        self._file.touch()
-        return self._session_id
-
-    def resume_session(self, session_id: str) -> str:
-        """Resume an existing session by reusing its session ID and file."""
-        self._session_id = session_id
-        self._dir.mkdir(parents=True, exist_ok=True)
-        self._file = self._dir / f"{self._session_id}.jsonl"
-        if not self._file.exists():
-            self._file.touch()
-        return self._session_id
-
-    def log(self, episode: Episode) -> None:
-        """Append an episode to the current session file.  Never raises."""
-        if not self._enabled or self._file is None:
-            return
-        try:
-            import sys
-
-            line = json.dumps(asdict(episode), ensure_ascii=False) + "\n"
-            with self._file.open("a", encoding="utf-8") as f:
-                if sys.platform != "win32":
-                    import fcntl
-                    fcntl.flock(f, fcntl.LOCK_EX)
-                    f.write(line)
-                    fcntl.flock(f, fcntl.LOCK_UN)
-                else:
-                    f.write(line)
-        except Exception:
-            pass  # Fire-and-forget
-
-    def log_turn(
-        self,
-        turn: int,
-        role: str,
-        content: str,
-        **meta: object,
-    ) -> None:
-        """Convenience wrapper around log()."""
-        if not self._enabled or self._session_id is None:
-            return
-        # Truncate tool content
-        if role == "tool_call":
-            content = content[:_MAX_TOOL_INPUT]
-        elif role == "tool_result":
-            content = content[:_MAX_TOOL_RESULT]
-
-        self.log(Episode(
-            ts=datetime.now(timezone.utc).isoformat(),
-            session=self._session_id,
-            turn=turn,
-            role=role,
-            content=content,
-            meta=dict(meta),
-        ))
-
-    def recall(
-        self,
-        query: str,
-        *,
-        max_results: int = 20,
-        days_back: int | None = None,
-    ) -> list[Episode]:
-        """Search episodes for *query* (case-insensitive substring match).
-
-        When a user turn matches, returns the full episode context: the
-        matching turn plus the assistant response, tool calls, and scratchpad
-        results from the same turn. This mirrors real episodic recall — you
-        remember the whole episode, not just the cue.
-
-        Returns newest-first, capped at *max_results* episodes (each episode
-        may include multiple turns).
-        """
-        if not self._dir.is_dir():
-            return []
-
-        cutoff: datetime | None = None
-        if days_back is not None:
-            cutoff = datetime.now(timezone.utc) - timedelta(days=days_back)
-
-        pattern = re.compile(re.escape(query), re.IGNORECASE)
-        matches: list[Episode] = []
-        seen_turns: set[tuple[str, int]] = set()  # (session, turn) dedup
-
-        # Iterate files newest-first (filenames sort chronologically)
-        for path in sorted(self._dir.glob("*.jsonl"), reverse=True):
-            if cutoff is not None:
-                stem = path.stem
-                try:
-                    file_dt = datetime.strptime(stem, "%Y%m%d_%H%M%S").replace(
-                        tzinfo=timezone.utc,
-                    )
-                    if file_dt < cutoff:
-                        continue
-                except ValueError:
-                    pass
-
-            try:
-                lines = path.read_text(encoding="utf-8").strip().splitlines()
-            except Exception:
-                continue
-
-            # Parse all episodes in this file for context lookups
-            all_episodes: list[Episode] = []
-            for line in lines:
-                if not line.strip():
-                    continue
-                try:
-                    all_episodes.append(Episode(**json.loads(line)))
-                except Exception:
-                    continue
-
-            # Build turn index: (session, turn) -> list of episodes
-            turn_index: dict[tuple[str, int], list[Episode]] = {}
-            for ep in all_episodes:
-                key = (ep.session, ep.turn)
-                turn_index.setdefault(key, []).append(ep)
-
-            # Search newest-first
-            for ep in reversed(all_episodes):
-                if not pattern.search(ep.content):
-                    continue
-
-                key = (ep.session, ep.turn)
-                if key in seen_turns:
-                    continue
-                seen_turns.add(key)
-
-                # Include the matching turn's full context
-                turn_episodes = turn_index.get(key, [ep])
-                matches.extend(turn_episodes)
-
-                # Also grab the next turn if it has an assistant response
-                if ep.role == "user":
-                    next_key = (ep.session, ep.turn + 1)
-                    if next_key not in seen_turns:
-                        next_eps = turn_index.get(next_key, [])
-                        has_response = any(
-                            e.role in ("assistant", "tool_result", "scratchpad")
-                            for e in next_eps
-                        )
-                        if next_eps and has_response:
-                            seen_turns.add(next_key)
-                            matches.extend(next_eps)
-
-                if len(seen_turns) >= max_results:
-                    return matches
-
-        return matches
-
-    def recall_formatted(
-        self,
-        query: str,
-        **kwargs: object,
-    ) -> str:
-        """Return a human-readable string of matching episodes."""
-        episodes = self.recall(query, **kwargs)  # type: ignore[arg-type]
-        if not episodes:
-            return f"No episodes found matching '{query}'."
-        lines: list[str] = []
-        for ep in episodes:
-            # Show more content for assistant/scratchpad responses
-            max_len = 2000 if ep.role in ("assistant", "scratchpad", "tool_result") else 500
-            lines.append(f"[{ep.ts}] ({ep.role}) {ep.content[:max_len]}")
-        return "\n".join(lines)
-
-    def session_count(self) -> int:
-        """Count the number of session files."""
-        if not self._dir.is_dir():
-            return 0
-        return sum(1 for _ in self._dir.glob("*.jsonl"))
+__all__ = ["Episode", "EpisodicMemory"]
diff --git a/anton/memory/hippocampus.py b/anton/memory/hippocampus.py
index 4bea725e..b0ff99f3 100644
--- a/anton/memory/hippocampus.py
+++ b/anton/memory/hippocampus.py
@@ -1,407 +1,4 @@
-"""Hippocampus — Anton's memory encoding and retrieval engine.
+# Shim — re-exports from core. Import from anton.core.memory.hippocampus directly.
+from anton.core.memory.hippocampus import Engram, Hippocampus
 
-Named for the brain's hippocampus (CA3/CA1 subfields), which handles the
-fundamental operations of memory: encoding new traces (writing) and
-pattern-completing partial cues into full memories (reading).
-
-The hippocampus doesn't decide *what* to remember — that's the cortex's job.
-It simply executes storage and retrieval at a single scope (global or project),
-like how the brain's hippocampus encodes at the level of individual memory traces
-without executive judgment about importance.
-
-Each Hippocampus instance manages one scope's files:
-  - profile.md  → identity (mPFC / Default Mode Network analogy)
-  - rules.md    → behavioral gates (Basal Ganglia / OFC analogy)
-  - lessons.md  → semantic facts (Anterior Temporal Lobe analogy)
-  - topics/*.md → domain expertise (Cortical Association Areas analogy)
-"""
-
-from __future__ import annotations
-
-import re
-import sys
-import time
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Literal
-
-
-@dataclass
-class Engram:
-    """A single memory trace — the fundamental unit of memory.
-
-    Named for Karl Lashley's 'engram' — the physical substrate of a memory.
-    Each engram carries its content plus metadata about confidence, origin,
-    and topic for later retrieval and consolidation.
-    """
-
-    text: str
-    kind: Literal["always", "never", "when", "lesson", "profile"]
-    scope: Literal["global", "project"]
-    confidence: Literal["high", "medium", "low"] = "medium"
-    topic: str = ""
-    source: Literal["user", "consolidation", "llm"] = "llm"
-
-
-class Hippocampus:
-    """Reads and writes memory traces at a single scope (global OR project).
-
-    Like the hippocampal CA3 region (pattern completion for reads) and CA1
-    region (pattern separation for writes), this class handles the low-level
-    mechanics of memory storage without higher-order decisions about relevance
-    or importance.
-    """
-
-    def __init__(self, base_dir: Path) -> None:
-        """Initialize for a single scope.
-
-        Args:
-            base_dir: ~/.anton/memory/ (global) or <project>/.anton/memory/ (project)
-        """
-        self._dir = base_dir
-        self._profile_path = base_dir / "profile.md"
-        self._rules_path = base_dir / "rules.md"
-        self._lessons_path = base_dir / "lessons.md"
-        self._topics_dir = base_dir / "topics"
-
-
-    def recall_identity(self) -> str:
-        """Load the always-on self-model (profile.md).
-
-        Brain analog: medial Prefrontal Cortex / Default Mode Network.
-        This is the identity substrate — always active, never "looked up",
-        it contextualizes all other processing. Global scope only.
-        """
-        if not self._profile_path.is_file():
-            return ""
-        try:
-            return self._profile_path.read_text(encoding="utf-8").strip()
-        except (OSError, UnicodeDecodeError):
-            return ""
-
-    def recall_rules(self) -> str:
-        """Load behavioral gates (rules.md) as formatted always/never/when.
-
-        Brain analog: Basal Ganglia (Go/No-Go pathways) + Orbitofrontal Cortex
-        (conditional behavioral rules). These aren't memories to recall —
-        they're constraints that shape action selection.
-        """
-        if not self._rules_path.is_file():
-            return ""
-        try:
-            return self._rules_path.read_text(encoding="utf-8").strip()
-        except (OSError, UnicodeDecodeError):
-            return ""
-
-    def recall_lessons(self, token_budget: int = 1000) -> str:
-        """Load semantic knowledge (lessons.md), most recent first, within budget.
-
-        Brain analog: Anterior Temporal Lobe — the convergence hub for semantic
-        facts distilled from many episodes. Budget enforced at ~4 chars/token.
-        """
-        if not self._lessons_path.is_file():
-            return ""
-        try:
-            content = self._lessons_path.read_text(encoding="utf-8").strip()
-        except (OSError, UnicodeDecodeError):
-            return ""
-
-        if not content:
-            return ""
-
-        # Extract individual entries (lines starting with "- ")
-        lines = [ln for ln in content.splitlines() if ln.strip()]
-        # Keep header, then entries in reverse order (most recent last → first)
-        header_lines = []
-        entry_lines = []
-        for ln in lines:
-            if ln.startswith("- ") or ln.startswith("  "):
-                entry_lines.append(ln)
-            else:
-                header_lines.append(ln)
-
-        # Reverse entries so most recent are first
-        entry_lines.reverse()
-
-        # Budget: ~4 chars per token
-        char_budget = token_budget * 4
-        result_lines = list(header_lines)
-        used = sum(len(ln) for ln in result_lines)
-
-        for ln in entry_lines:
-            if used + len(ln) + 1 > char_budget:
-                break
-            result_lines.append(ln)
-            used += len(ln) + 1
-
-        return "\n".join(result_lines)
-
-    def recall_topic(self, slug: str) -> str:
-        """Load deep domain expertise on demand (topics/{slug}.md).
-
-        Brain analog: Cortical Association Areas — specialized regions activated
-        associatively when contextual cues indicate relevance.
-        """
-        safe_slug = self._sanitize_slug(slug)
-        path = self._topics_dir / f"{safe_slug}.md"
-        if not path.is_file():
-            return ""
-        try:
-            return path.read_text(encoding="utf-8").strip()
-        except (OSError, UnicodeDecodeError):
-            return ""
-
-    def recall_scratchpad_wisdom(self) -> str:
-        """Retrieve procedural knowledge relevant to scratchpad execution.
-
-        Returns all "when" rules + lessons with topic starting with "scratchpad-".
-        Injected into tool descriptions so the LLM sees them when composing code.
-        """
-        parts: list[str] = []
-
-        # Extract "when" rules
-        rules = self.recall_rules()
-        if rules:
-            in_when = False
-            for line in rules.splitlines():
-                if line.strip().startswith("## When"):
-                    in_when = True
-                    continue
-                elif line.strip().startswith("## "):
-                    in_when = False
-                    continue
-                if in_when and line.strip().startswith("- "):
-                    parts.append(line.strip())
-
-        # Extract scratchpad-related lessons
-        lessons = self._read_full_lessons()
-        for line in lessons.splitlines():
-            if line.strip().startswith("- ") and "scratchpad" in line.lower():
-                stripped = line.strip()
-                if stripped not in parts:
-                    parts.append(stripped)
-
-        # Check topics/scratchpad-*.md files
-        if self._topics_dir.is_dir():
-            for path in sorted(self._topics_dir.iterdir()):
-                if path.name.startswith("scratchpad-") and path.suffix == ".md":
-                    try:
-                        content = path.read_text(encoding="utf-8").strip()
-                        if content:
-                            parts.append(content)
-                    except (OSError, UnicodeDecodeError):
-                        continue
-
-        return "\n".join(parts)
-
-    def _read_full_lessons(self) -> str:
-        """Read lessons.md without budget constraint (for internal use)."""
-        if not self._lessons_path.is_file():
-            return ""
-        try:
-            return self._lessons_path.read_text(encoding="utf-8").strip()
-        except (OSError, UnicodeDecodeError):
-            return ""
-
-    def encode_rule(
-        self,
-        text: str,
-        kind: Literal["always", "never", "when"],
-        confidence: str = "medium",
-        source: str = "llm",
-    ) -> None:
-        """Write a new behavioral gate to rules.md.
-
-        Appends under the correct section (Always/Never/When).
-        Uses file locking for safety — like how the hippocampus
-        prevents interference between overlapping encoding events.
-        """
-        self._dir.mkdir(parents=True, exist_ok=True)
-
-        ts = time.strftime("%Y-%m-%d")
-        metadata = f"<!-- confidence:{confidence} source:{source} ts:{ts} -->"
-        entry = f"- {text} {metadata}\n"
-
-        section_header = f"## {kind.capitalize()}"
-
-        # Read existing content or create skeleton
-        if self._rules_path.is_file():
-            content = self._rules_path.read_text(encoding="utf-8")
-        else:
-            content = "# Rules\n\n## Always\n\n## Never\n\n## When\n"
-
-        # Check for duplicate (exact entry match, ignoring metadata)
-        if text in self._extract_entry_texts(content):
-            return
-
-        # Find the section and append
-        lines = content.splitlines(keepends=True)
-        new_lines: list[str] = []
-        inserted = False
-
-        i = 0
-        while i < len(lines):
-            new_lines.append(lines[i])
-            if lines[i].strip() == section_header and not inserted:
-                # Skip to end of section (next ## or end of file)
-                i += 1
-                section_entries: list[str] = []
-                while i < len(lines) and not (
-                    lines[i].strip().startswith("## ") and lines[i].strip() != section_header
-                ):
-                    section_entries.append(lines[i])
-                    i += 1
-                # Add existing entries
-                new_lines.extend(section_entries)
-                # Ensure we have a blank line before the entry if needed
-                if section_entries and section_entries[-1].strip():
-                    new_lines.append("\n")
-                elif not section_entries:
-                    pass  # Section was empty, entry follows header
-                new_lines.append(entry)
-                inserted = True
-                continue
-            i += 1
-
-        if not inserted:
-            # Section didn't exist — add it
-            new_lines.append(f"\n{section_header}\n{entry}")
-
-        self._encode_with_lock(self._rules_path, "".join(new_lines), mode="write")
-
-    def encode_lesson(
-        self,
-        text: str,
-        topic: str = "",
-        source: str = "llm",
-    ) -> None:
-        """Write a semantic fact to lessons.md.
-
-        If a topic is provided, also creates/appends to topics/{slug}.md.
-        """
-        self._dir.mkdir(parents=True, exist_ok=True)
-
-        ts = time.strftime("%Y-%m-%d")
-        topic_tag = f" topic:{topic}" if topic else ""
-        entry = f"- {text} <!--{topic_tag} ts:{ts} -->\n"
-
-        # Append to lessons.md
-        if not self._lessons_path.is_file():
-            self._encode_with_lock(
-                self._lessons_path,
-                f"# Lessons\n{entry}",
-                mode="write",
-            )
-        else:
-            # Check for duplicate (exact entry match, ignoring metadata)
-            existing = self._lessons_path.read_text(encoding="utf-8")
-            if text in self._extract_entry_texts(existing):
-                return
-            self._encode_with_lock(self._lessons_path, entry, mode="append")
-
-        # Also write to topic file if topic is substantial
-        if topic:
-            self._topics_dir.mkdir(parents=True, exist_ok=True)
-            slug = self._sanitize_slug(topic)
-            topic_path = self._topics_dir / f"{slug}.md"
-            if not topic_path.is_file():
-                self._encode_with_lock(
-                    topic_path,
-                    f"# {topic}\n{entry}",
-                    mode="write",
-                )
-            else:
-                existing = topic_path.read_text(encoding="utf-8")
-                if text not in self._extract_entry_texts(existing):
-                    self._encode_with_lock(topic_path, entry, mode="append")
-
-    def rewrite_identity(self, entries: list[str]) -> None:
-        """Replace the identity snapshot (profile.md) — full rewrite, not append.
-
-        Unlike other memory operations, identity is a coherent snapshot, not
-        an append log. Like how your self-concept updates as a whole, not
-        by appending new facts to old ones.
-        """
-        self._dir.mkdir(parents=True, exist_ok=True)
-        content = "# Profile\n" + "\n".join(f"- {e}" for e in entries) + "\n"
-        self._encode_with_lock(self._profile_path, content, mode="write")
-
-    def entry_count(self) -> int:
-        """Count total entries across rules.md and lessons.md."""
-        count = 0
-        for path in (self._rules_path, self._lessons_path):
-            if path.is_file():
-                try:
-                    content = path.read_text(encoding="utf-8")
-                    count += sum(1 for ln in content.splitlines() if ln.strip().startswith("- "))
-                except (OSError, UnicodeDecodeError):
-                    continue
-        return count
-
-    def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None:
-        """Write with file locking (fcntl.flock on Unix, no-op on Windows).
-
-        Prevents corruption from concurrent Anton sessions writing to
-        global memory — like synaptic tagging ensuring encoding fidelity
-        despite concurrent neural activity.
-        """
-        path.parent.mkdir(parents=True, exist_ok=True)
-
-        if mode == "write":
-            # Atomic write via temp file + rename
-            tmp_path = path.with_suffix(path.suffix + ".tmp")
-            with open(tmp_path, "w", encoding="utf-8") as f:
-                if sys.platform != "win32":
-                    import fcntl
-                    fcntl.flock(f.fileno(), fcntl.LOCK_EX)
-                try:
-                    f.write(text)
-                    f.flush()
-                finally:
-                    if sys.platform != "win32":
-                        import fcntl
-                        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
-            tmp_path.replace(path)
-        else:
-            # Append mode
-            with open(path, "a", encoding="utf-8") as f:
-                if sys.platform != "win32":
-                    import fcntl
-                    fcntl.flock(f.fileno(), fcntl.LOCK_EX)
-                try:
-                    f.write(text)
-                    f.flush()
-                finally:
-                    if sys.platform != "win32":
-                        import fcntl
-                        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
-
-    @staticmethod
-    def _extract_entry_texts(content: str) -> set[str]:
-        """Extract the set of normalized entry texts from a markdown memory file.
-
-        Strips the leading ``- ``, trailing metadata comments, and whitespace
-        so that dedup comparisons are exact-match on the *meaning* line only.
-        """
-        texts: set[str] = set()
-        for line in content.splitlines():
-            stripped = line.strip()
-            if not stripped.startswith("- "):
-                continue
-            # Remove leading "- "
-            entry = stripped[2:]
-            # Remove trailing <!-- ... --> metadata
-            entry = re.sub(r"\s*<!--[\s\S]*?-->\s*$", "", entry)
-            entry = entry.strip()
-            if entry:
-                texts.add(entry)
-        return texts
-
-    @staticmethod
-    def _sanitize_slug(name: str) -> str:
-        """Sanitize a topic name into a safe file slug."""
-        text = name.lower().strip()
-        text = re.sub(r"[^a-z0-9\s_-]", "", text)
-        text = re.sub(r"[\s]+", "-", text)
-        text = re.sub(r"-+", "-", text)
-        return text.strip("-") or "general"
+__all__ = ["Engram", "Hippocampus"]

From 860ec084b21499f06d4b45c622f8aebc0649d4d9 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:57:03 +0200
Subject: [PATCH 050/134] Fix tests

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 2c48a014..56058f5b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from anton.llm.provider import LLMResponse, ToolCall, Usage
+from anton.core.llm.provider import LLMResponse, ToolCall, Usage
 
 
 @pytest.fixture()

From 52b6746583835d4671946828f70c7bc3d3bea92c Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 12:57:07 +0200
Subject: [PATCH 051/134] Fix tests

---
 tests/test_chat_context.py |  2 +-
 tests/test_cortex.py       | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py
index ddf70b82..26339f39 100644
--- a/tests/test_chat_context.py
+++ b/tests/test_chat_context.py
@@ -76,7 +76,7 @@ def memory_dirs(tmp_path):
 @pytest.fixture()
 def cortex(memory_dirs):
     global_dir, project_dir = memory_dirs
-    return Cortex(global_dir=global_dir, project_dir=project_dir, mode="autopilot")
+    return Cortex(global_hc=Hippocampus(global_dir), project_hc=Hippocampus(project_dir), mode="autopilot")
 
 
 class TestMemorizeTool:
diff --git a/tests/test_cortex.py b/tests/test_cortex.py
index e6f202b8..81537033 100644
--- a/tests/test_cortex.py
+++ b/tests/test_cortex.py
@@ -21,7 +21,7 @@ def dirs(tmp_path):
 @pytest.fixture()
 def cortex(dirs):
     g, p = dirs
-    return Cortex(global_dir=g, project_dir=p, mode="copilot")
+    return Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="copilot")
 
 
 class TestBuildMemoryContext:
@@ -98,7 +98,7 @@ async def test_encode_profile(self, cortex, dirs):
 
     async def test_off_mode_returns_disabled(self, dirs):
         g, p = dirs
-        cortex = Cortex(global_dir=g, project_dir=p, mode="off")
+        cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="off")
         engram = Engram(text="test", kind="lesson", scope="global")
         actions = await cortex.encode([engram])
         assert any("disabled" in a.lower() for a in actions)
@@ -107,19 +107,19 @@ async def test_off_mode_returns_disabled(self, dirs):
 class TestEncodingGate:
     def test_autopilot_never_confirms(self, dirs):
         g, p = dirs
-        cortex = Cortex(global_dir=g, project_dir=p, mode="autopilot")
+        cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="autopilot")
         engram = Engram(text="test", kind="lesson", scope="global", confidence="low")
         assert cortex.encoding_gate(engram) is False
 
     def test_off_never_confirms(self, dirs):
         g, p = dirs
-        cortex = Cortex(global_dir=g, project_dir=p, mode="off")
+        cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="off")
         engram = Engram(text="test", kind="lesson", scope="global", confidence="high")
         assert cortex.encoding_gate(engram) is False
 
     def test_copilot_confirms_low_confidence(self, dirs):
         g, p = dirs
-        cortex = Cortex(global_dir=g, project_dir=p, mode="copilot")
+        cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="copilot")
         low = Engram(text="test", kind="lesson", scope="global", confidence="medium")
         high = Engram(text="test", kind="lesson", scope="global", confidence="high")
         assert cortex.encoding_gate(low) is True
@@ -148,7 +148,7 @@ async def test_no_llm_does_nothing(self, cortex, dirs):
     async def test_off_mode_does_nothing(self, dirs):
         g, p = dirs
         mock_llm = AsyncMock()
-        cortex = Cortex(global_dir=g, project_dir=p, mode="off", llm_client=mock_llm)
+        cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="off", llm_client=mock_llm)
         await cortex.maybe_update_identity("I'm Jorge")
         mock_llm.code.assert_not_called()
 
@@ -156,7 +156,7 @@ async def test_extracts_identity(self, dirs):
         g, p = dirs
         mock_llm = AsyncMock()
         mock_llm.code = AsyncMock(return_value=type("R", (), {"content": '["Name: Jorge"]'})())
-        cortex = Cortex(global_dir=g, project_dir=p, mode="copilot", llm_client=mock_llm)
+        cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="copilot", llm_client=mock_llm)
         await cortex.maybe_update_identity("Hi, I'm Jorge")
         assert (g / "profile.md").exists()
         assert "Name: Jorge" in (g / "profile.md").read_text()

From e2567e749e468b19bdbb0adf2ec354ca82725a95 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 13:06:03 +0200
Subject: [PATCH 052/134] Update docs

---
 anton/README.md             | 2 +-
 anton/core/memory/cortex.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/anton/README.md b/anton/README.md
index c810734f..badf5693 100644
--- a/anton/README.md
+++ b/anton/README.md
@@ -561,7 +561,7 @@ The memory system is wired into `ChatSession` and `_chat_loop()`:
 
 ```
 1. _chat_loop() startup:
-   → Creates Cortex(global_dir, project_dir, mode, llm)
+   → Creates Cortex(global_hc=Hippocampus(global_dir), project_hc=Hippocampus(project_dir), mode, llm)
    → Creates EpisodicMemory(episodes_dir, enabled=settings.episodic_memory)
    → Starts episodic session if enabled
    → Runs reconsolidation if needed
diff --git a/anton/core/memory/cortex.py b/anton/core/memory/cortex.py
index 2ebefb23..121bd0f3 100644
--- a/anton/core/memory/cortex.py
+++ b/anton/core/memory/cortex.py
@@ -13,7 +13,7 @@
   - encode() → executive decision to encode (directing the hippocampus)
   - encoding_gate() → encoding gate modulated by the memory mode
 
-The Cortex coordinates two Hippocampus instances (global + project scope),
+The Cortex coordinates two HippocampusProtocol instances (global + project scope),
 like how the PFC coordinates retrieval from multiple brain memory systems.
 """
 

From c70893af2875f73b33b8dfbe445a5dda2d7249e7 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 13:06:17 +0200
Subject: [PATCH 053/134] Fix imports

---
 tests/test_client.py          |  6 +++---
 tests/test_openai_provider.py | 14 +++++++-------
 tests/test_provider.py        | 14 +++++++-------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 991612a1..0885b6e5 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -5,8 +5,8 @@
 import pytest
 
 from anton.config.settings import AntonSettings
-from anton.llm.client import LLMClient
-from anton.llm.provider import LLMProvider, LLMResponse, Usage
+from anton.core.llm.client import LLMClient
+from anton.core.llm.provider import LLMProvider, LLMResponse, Usage
 
 
 @pytest.fixture()
@@ -75,7 +75,7 @@ async def test_plan_passes_tools(self, mock_providers):
 
 class TestLLMClientFromSettings:
     def test_from_settings_creates_client(self):
-        from anton.llm.anthropic import AnthropicProvider
+        from anton.core.llm.anthropic import AnthropicProvider
 
         with patch("anthropic.AsyncAnthropic"):
             settings = AntonSettings(
diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py
index 1342ede7..51e42b4d 100644
--- a/tests/test_openai_provider.py
+++ b/tests/test_openai_provider.py
@@ -6,14 +6,14 @@
 import pytest
 
 from anton.config.settings import AntonSettings
-from anton.llm.client import LLMClient
-from anton.llm.openai import (
+from anton.core.llm.client import LLMClient
+from anton.core.llm.openai import (
     OpenAIProvider,
     build_chat_completion_kwargs,
     _translate_messages,
     _translate_tools,
 )
-from anton.llm.provider import LLMProvider
+from anton.core.llm.provider import LLMProvider
 
 
 def _make_mock_response(*, content="Hello", tool_calls=None, prompt_tokens=10, completion_tokens=20, finish_reason="stop"):
@@ -38,7 +38,7 @@ def _make_mock_response(*, content="Hello", tool_calls=None, prompt_tokens=10, c
 
 class TestOpenAIProvider:
     async def test_complete_text_response(self):
-        with patch("anton.llm.openai.openai") as mock_openai:
+        with patch("anton.core.llm.openai.openai") as mock_openai:
             mock_client = AsyncMock()
             mock_openai.AsyncOpenAI.return_value = mock_client
 
@@ -60,7 +60,7 @@ async def test_complete_text_response(self):
             assert result.stop_reason == "stop"
 
     async def test_complete_tool_use_response(self):
-        with patch("anton.llm.openai.openai") as mock_openai:
+        with patch("anton.core.llm.openai.openai") as mock_openai:
             mock_client = AsyncMock()
             mock_openai.AsyncOpenAI.return_value = mock_client
 
@@ -88,7 +88,7 @@ async def test_complete_tool_use_response(self):
             assert result.stop_reason == "tool_calls"
 
     async def test_complete_passes_tool_choice(self):
-        with patch("anton.llm.openai.openai") as mock_openai:
+        with patch("anton.core.llm.openai.openai") as mock_openai:
             mock_client = AsyncMock()
             mock_openai.AsyncOpenAI.return_value = mock_client
 
@@ -223,7 +223,7 @@ def test_translate_messages_with_tool_result(self):
 
 class TestFromSettingsOpenAI:
     def test_from_settings_openai(self):
-        with patch("anton.llm.openai.openai"):
+        with patch("anton.core.llm.openai.openai"):
             settings = AntonSettings(
                 planning_provider="openai",
                 coding_provider="openai",
diff --git a/tests/test_provider.py b/tests/test_provider.py
index dece8dc7..0d0ac8a8 100644
--- a/tests/test_provider.py
+++ b/tests/test_provider.py
@@ -2,8 +2,8 @@
 
 from unittest.mock import AsyncMock, MagicMock, patch
 
-from anton.llm.anthropic import AnthropicProvider
-from anton.llm.provider import LLMResponse, ToolCall
+from anton.core.llm.anthropic import AnthropicProvider
+from anton.core.llm.provider import LLMResponse, ToolCall
 
 
 class TestDataclasses:
@@ -16,7 +16,7 @@ def test_llm_response_with_tool_calls(self):
 
 class TestAnthropicProvider:
     async def test_complete_text_response(self):
-        with patch("anton.llm.anthropic.anthropic") as mock_anthropic:
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
             mock_client = AsyncMock()
             mock_anthropic.AsyncAnthropic.return_value = mock_client
 
@@ -45,7 +45,7 @@ async def test_complete_text_response(self):
             assert result.stop_reason == "end_turn"
 
     async def test_complete_tool_use_response(self):
-        with patch("anton.llm.anthropic.anthropic") as mock_anthropic:
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
             mock_client = AsyncMock()
             mock_anthropic.AsyncAnthropic.return_value = mock_client
 
@@ -78,7 +78,7 @@ async def test_complete_tool_use_response(self):
             assert result.stop_reason == "tool_use"
 
     async def test_complete_passes_tool_choice(self):
-        with patch("anton.llm.anthropic.anthropic") as mock_anthropic:
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
             mock_client = AsyncMock()
             mock_anthropic.AsyncAnthropic.return_value = mock_client
 
@@ -110,7 +110,7 @@ async def test_complete_passes_tool_choice(self):
             assert call_kwargs["tools"] == tools
 
     async def test_complete_omits_tool_choice_when_none(self):
-        with patch("anton.llm.anthropic.anthropic") as mock_anthropic:
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
             mock_client = AsyncMock()
             mock_anthropic.AsyncAnthropic.return_value = mock_client
 
@@ -137,7 +137,7 @@ async def test_complete_omits_tool_choice_when_none(self):
             assert "tool_choice" not in call_kwargs
 
     async def test_provider_without_api_key(self):
-        with patch("anton.llm.anthropic.anthropic") as mock_anthropic:
+        with patch("anton.core.llm.anthropic.anthropic") as mock_anthropic:
             mock_anthropic.AsyncAnthropic.return_value = AsyncMock()
             provider = AnthropicProvider()
             mock_anthropic.AsyncAnthropic.assert_called_once_with()

From c343c5db23b34282fc508d667b7855b08a5a4e7b Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 13:06:22 +0200
Subject: [PATCH 054/134] Fix imports

---
 tests/test_chat.py            | 2 +-
 tests/test_chat_context.py    | 2 +-
 tests/test_chat_scratchpad.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_chat.py b/tests/test_chat.py
index 42fecc09..d0f465fd 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -5,7 +5,7 @@
 import pytest
 
 from anton.chat import ChatSession
-from anton.llm.provider import (
+from anton.core.llm.provider import (
     ContextOverflowError,
     LLMResponse,
     StreamComplete,
diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py
index 26339f39..f06ade80 100644
--- a/tests/test_chat_context.py
+++ b/tests/test_chat_context.py
@@ -13,7 +13,7 @@
 from anton.config.settings import AntonSettings
 from anton.core.tools.tool_defs import MEMORIZE_TOOL
 from anton.context.self_awareness import SelfAwarenessContext
-from anton.llm.provider import LLMResponse, ToolCall, Usage
+from anton.core.llm.provider import LLMResponse, ToolCall, Usage
 from anton.workspace import Workspace
 from anton.memory.cortex import Cortex
 from anton.memory.hippocampus import Hippocampus
diff --git a/tests/test_chat_scratchpad.py b/tests/test_chat_scratchpad.py
index e69356da..c00773b2 100644
--- a/tests/test_chat_scratchpad.py
+++ b/tests/test_chat_scratchpad.py
@@ -8,7 +8,7 @@
 from anton.core.session import ChatSession
 from anton.core.tools.tool_defs import SCRATCHPAD_TOOL
 from anton.commands.session import handle_resume
-from anton.llm.provider import LLMResponse, StreamComplete, StreamToolResult, ToolCall, Usage
+from anton.core.llm.provider import LLMResponse, StreamComplete, StreamToolResult, ToolCall, Usage
 
 
 @pytest.fixture()

From 2c79c6f17e74515cd4c9300228c314c094dbf116 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:16:11 +0200
Subject: [PATCH 055/134] Rmv unneeded shims

---
 anton/memory/consolidator.py | 4 ----
 anton/memory/cortex.py       | 4 ----
 anton/memory/episodes.py     | 4 ----
 anton/memory/hippocampus.py  | 4 ----
 4 files changed, 16 deletions(-)
 delete mode 100644 anton/memory/consolidator.py
 delete mode 100644 anton/memory/cortex.py
 delete mode 100644 anton/memory/episodes.py
 delete mode 100644 anton/memory/hippocampus.py

diff --git a/anton/memory/consolidator.py b/anton/memory/consolidator.py
deleted file mode 100644
index 4f61a018..00000000
--- a/anton/memory/consolidator.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Shim — re-exports from core. Import from anton.core.memory.consolidator directly.
-from anton.core.memory.consolidator import Consolidator
-
-__all__ = ["Consolidator"]
diff --git a/anton/memory/cortex.py b/anton/memory/cortex.py
deleted file mode 100644
index 7065db47..00000000
--- a/anton/memory/cortex.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Shim — re-exports from core. Import from anton.core.memory.cortex directly.
-from anton.core.memory.cortex import Cortex
-
-__all__ = ["Cortex"]
diff --git a/anton/memory/episodes.py b/anton/memory/episodes.py
deleted file mode 100644
index 0fbb335f..00000000
--- a/anton/memory/episodes.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Shim — re-exports from core. Import from anton.core.memory.episodes directly.
-from anton.core.memory.episodes import Episode, EpisodicMemory
-
-__all__ = ["Episode", "EpisodicMemory"]
diff --git a/anton/memory/hippocampus.py b/anton/memory/hippocampus.py
deleted file mode 100644
index b0ff99f3..00000000
--- a/anton/memory/hippocampus.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Shim — re-exports from core. Import from anton.core.memory.hippocampus directly.
-from anton.core.memory.hippocampus import Engram, Hippocampus
-
-__all__ = ["Engram", "Hippocampus"]

From e52427e0760377ddaf17e5a5ee3ad428d9d72c9f Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:16:20 +0200
Subject: [PATCH 056/134] Fix imports

---
 anton/chat.py             | 4 ++--
 anton/chat_session.py     | 4 ++--
 anton/commands/session.py | 4 ++--
 anton/commands/setup.py   | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 0536e4dc..7d85de91 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -80,7 +80,7 @@
     from rich.console import Console
 
     from anton.config.settings import AntonSettings
-    from anton.memory.episodes import EpisodicMemory
+    from anton.core.memory.episodes import EpisodicMemory
     from anton.workspace import Workspace
 
 
@@ -960,7 +960,7 @@ async def _chat_loop(
     if cortex.needs_compaction():
         asyncio.create_task(cortex.compact_all())
 
-    from anton.memory.episodes import EpisodicMemory
+    from anton.core.memory.episodes import EpisodicMemory
 
     episodes_dir = settings.workspace_path / ".anton" / "episodes"
     episodic = EpisodicMemory(episodes_dir, enabled=settings.episodic_memory)
diff --git a/anton/chat_session.py b/anton/chat_session.py
index f8127a67..b06a64c9 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -12,8 +12,8 @@
 
 if TYPE_CHECKING:
     from anton.chat import ChatSession
-    from anton.memory.cortex import Cortex
-    from anton.memory.episodes import EpisodicMemory
+    from anton.core.memory.cortex import Cortex
+    from anton.core.memory.episodes import EpisodicMemory
     from anton.memory.history_store import HistoryStore
     from anton.workspace import Workspace
 
diff --git a/anton/commands/session.py b/anton/commands/session.py
index 9822daec..7b4d1856 100644
--- a/anton/commands/session.py
+++ b/anton/commands/session.py
@@ -12,9 +12,9 @@
 
 if TYPE_CHECKING:
     from anton.chat import ChatSession
-    from anton.memory.episodes import EpisodicMemory
+    from anton.core.memory.episodes import EpisodicMemory
     from anton.memory.history_store import HistoryStore
-    from anton.memory.cortex import Cortex
+    from anton.core.memory.cortex import Cortex
     from anton.workspace import Workspace
 
 
diff --git a/anton/commands/setup.py b/anton/commands/setup.py
index 3ab1126c..b5be940a 100644
--- a/anton/commands/setup.py
+++ b/anton/commands/setup.py
@@ -10,9 +10,9 @@
 
 if TYPE_CHECKING:
     from anton.chat import ChatSession
-    from anton.memory.episodes import EpisodicMemory
+    from anton.core.memory.episodes import EpisodicMemory
     from anton.memory.history_store import HistoryStore
-    from anton.memory.cortex import Cortex
+    from anton.core.memory.cortex import Cortex
     from anton.workspace import Workspace
 
 

From c90fc54363be8c345e640dbf6fc151a1ee78758c Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:16:31 +0200
Subject: [PATCH 057/134] Move consolidation prompt

---
 anton/core/llm/prompts.py | 42 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
index d9b65ef8..7911aaa7 100644
--- a/anton/core/llm/prompts.py
+++ b/anton/core/llm/prompts.py
@@ -369,6 +369,48 @@
 """
 
 
+CONSOLIDATION_PROMPT = """\
+You are a memory consolidation system for an AI coding assistant.
+
+Review this scratchpad session (sequence of code cells with their results) and
+extract durable, reusable lessons. Focus on:
+
+1. **Rules** — patterns to always/never follow:
+   - "Always call progress() before long API calls in scratchpad"
+   - "Never use time.sleep() in scratchpad cells"
+   - Conditional rules: "If fetching paginated data → use async + progress()"
+
+2. **Lessons** — factual knowledge discovered:
+   - API behaviors: "CoinGecko free tier rate-limits at ~50 req/min"
+   - Library quirks: "pandas read_csv needs encoding='utf-8-sig' for BOM files"
+   - Data facts: "Bitcoin price data via /coins/bitcoin/market_chart/range"
+
+Return a JSON array of objects:
+[
+  {
+    "text": "the memory to encode",
+    "kind": "always" | "never" | "when" | "lesson",
+    "scope": "global" | "project",
+    "topic": "optional-topic-slug",
+    "confidence": "high" | "medium"
+  }
+]
+
+Rules for scope:
+- "project": DEFAULT — use this for most memories. Anything related to the current
+  codebase, its APIs, file paths, libraries, patterns, conventions, or behaviors
+  observed during this session belongs here.
+- "global": RARE — only for truly universal knowledge that applies to any project
+  (e.g. general language quirks, stdlib gotchas). When in doubt, use "project".
+
+Rules for confidence:
+- "high": clearly correct, verified by the session results
+- "medium": probably correct but worth confirming
+
+If no meaningful lessons exist, return [].
+Do NOT extract trivial observations. Only encode genuinely reusable knowledge.
+"""
+
 RESILIENCE_NUDGE = (
     "\n\nSYSTEM: This tool has failed twice in a row. Before retrying the same approach or "
     "asking the user for help, try a creative workaround — different headers/user-agent, "

From 09d7435c53b1a900de2be69d1468b6e00e6d5ce0 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:16:45 +0200
Subject: [PATCH 058/134] Rmv un needed exports

---
 anton/core/memory/__init__.py     | 15 ----------
 anton/core/memory/consolidator.py | 46 ++-----------------------------
 2 files changed, 2 insertions(+), 59 deletions(-)

diff --git a/anton/core/memory/__init__.py b/anton/core/memory/__init__.py
index ab60a07c..e69de29b 100644
--- a/anton/core/memory/__init__.py
+++ b/anton/core/memory/__init__.py
@@ -1,15 +0,0 @@
-from anton.core.memory.base import HippocampusProtocol
-from anton.core.memory.hippocampus import Engram, Hippocampus
-from anton.core.memory.episodes import Episode, EpisodicMemory
-from anton.core.memory.consolidator import Consolidator
-from anton.core.memory.cortex import Cortex
-
-__all__ = [
-    "HippocampusProtocol",
-    "Engram",
-    "Hippocampus",
-    "Episode",
-    "EpisodicMemory",
-    "Consolidator",
-    "Cortex",
-]
diff --git a/anton/core/memory/consolidator.py b/anton/core/memory/consolidator.py
index 8a0e8bb6..20bbafe5 100644
--- a/anton/core/memory/consolidator.py
+++ b/anton/core/memory/consolidator.py
@@ -25,6 +25,7 @@
 import json
 from typing import TYPE_CHECKING
 
+from anton.core.llm.prompts import CONSOLIDATION_PROMPT
 from anton.core.memory.hippocampus import Engram
 
 if TYPE_CHECKING:
@@ -32,49 +33,6 @@
     from anton.scratchpad import Cell
 
 
-_CONSOLIDATION_PROMPT = """\
-You are a memory consolidation system for an AI coding assistant.
-
-Review this scratchpad session (sequence of code cells with their results) and
-extract durable, reusable lessons. Focus on:
-
-1. **Rules** — patterns to always/never follow:
-   - "Always call progress() before long API calls in scratchpad"
-   - "Never use time.sleep() in scratchpad cells"
-   - Conditional rules: "If fetching paginated data → use async + progress()"
-
-2. **Lessons** — factual knowledge discovered:
-   - API behaviors: "CoinGecko free tier rate-limits at ~50 req/min"
-   - Library quirks: "pandas read_csv needs encoding='utf-8-sig' for BOM files"
-   - Data facts: "Bitcoin price data via /coins/bitcoin/market_chart/range"
-
-Return a JSON array of objects:
-[
-  {
-    "text": "the memory to encode",
-    "kind": "always" | "never" | "when" | "lesson",
-    "scope": "global" | "project",
-    "topic": "optional-topic-slug",
-    "confidence": "high" | "medium"
-  }
-]
-
-Rules for scope:
-- "project": DEFAULT — use this for most memories. Anything related to the current
-  codebase, its APIs, file paths, libraries, patterns, conventions, or behaviors
-  observed during this session belongs here.
-- "global": RARE — only for truly universal knowledge that applies to any project
-  (e.g. general language quirks, stdlib gotchas). When in doubt, use "project".
-
-Rules for confidence:
-- "high": clearly correct, verified by the session results
-- "medium": probably correct but worth confirming
-
-If no meaningful lessons exist, return [].
-Do NOT extract trivial observations. Only encode genuinely reusable knowledge.
-"""
-
-
 class Consolidator:
     """Extracts durable lessons from scratchpad sessions via offline replay.
 
@@ -149,7 +107,7 @@ async def replay_and_extract(self, cells: list[Cell], llm_client: LLMClient) ->
 
         try:
             response = await llm_client.code(
-                system=_CONSOLIDATION_PROMPT,
+                system=CONSOLIDATION_PROMPT,
                 messages=[{"role": "user", "content": session_summary}],
                 max_tokens=2048,
             )

From db9ecc7ec2fc62fa9a266e5c7d834bb1d531bc96 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:17:00 +0200
Subject: [PATCH 059/134] New imports

---
 anton/core/session.py             | 8 ++++----
 anton/core/tools/tool_handlers.py | 2 +-
 anton/memory/reconsolidator.py    | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index 4f37a5c1..d61ea6fc 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -33,8 +33,8 @@
     from anton.context.self_awareness import SelfAwarenessContext
     from anton.chat_ui import EscapeWatcher
     from anton.core.llm.client import LLMClient
-    from anton.memory.cortex import Cortex
-    from anton.memory.episodes import EpisodicMemory
+    from anton.core.memory.cortex import Cortex
+    from anton.core.memory.episodes import EpisodicMemory
     from anton.memory.history_store import HistoryStore
     from anton.workspace import Workspace
 
@@ -1169,7 +1169,7 @@ async def _stream_and_handle_tools(
 
     def _maybe_consolidate_scratchpads(self) -> None:
         """Check if any scratchpad sessions warrant consolidation and fire it off."""
-        from anton.memory.consolidator import Consolidator
+        from anton.core.memory.consolidator import Consolidator
 
         consolidator = Consolidator()
         for pad in self._scratchpads._pads.values():
@@ -1179,7 +1179,7 @@ def _maybe_consolidate_scratchpads(self) -> None:
 
     async def _consolidate(self, cells: list) -> None:
         """Run offline consolidation on a completed scratchpad session."""
-        from anton.memory.consolidator import Consolidator
+        from anton.core.memory.consolidator import Consolidator
 
         consolidator = Consolidator()
         engrams = await consolidator.replay_and_extract(cells, self._llm)
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index 82adefd6..60af421b 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -38,7 +38,7 @@ async def handle_memorize(session: ChatSession, tc_input: dict) -> str:
     if session._cortex.mode == "off":
         return "Memory encoding is disabled. Change memory mode via /setup to enable."
 
-    from anton.memory.hippocampus import Engram
+    from anton.core.memory.hippocampus import Engram
 
     raw_entries = tc_input.get("entries", [])
     if not raw_entries:
diff --git a/anton/memory/reconsolidator.py b/anton/memory/reconsolidator.py
index 656d242b..32297e39 100644
--- a/anton/memory/reconsolidator.py
+++ b/anton/memory/reconsolidator.py
@@ -19,7 +19,7 @@
 import json
 from pathlib import Path
 
-from anton.memory.hippocampus import Hippocampus
+from anton.core.memory.hippocampus import Hippocampus
 
 
 def needs_reconsolidation(project_dir: Path) -> bool:

From f2a32591580ce717374c167c6d0e065568db06c3 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:17:06 +0200
Subject: [PATCH 060/134] Fix tests

---
 tests/test_cortex.py      | 4 ++--
 tests/test_episodes.py    | 2 +-
 tests/test_hippocampus.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_cortex.py b/tests/test_cortex.py
index 81537033..02a12d14 100644
--- a/tests/test_cortex.py
+++ b/tests/test_cortex.py
@@ -5,8 +5,8 @@
 
 import pytest
 
-from anton.memory.cortex import Cortex
-from anton.memory.hippocampus import Engram, Hippocampus
+from anton.core.memory.cortex import Cortex
+from anton.core.memory.hippocampus import Engram, Hippocampus
 
 
 @pytest.fixture()
diff --git a/tests/test_episodes.py b/tests/test_episodes.py
index c4f4187d..977a45c7 100644
--- a/tests/test_episodes.py
+++ b/tests/test_episodes.py
@@ -9,7 +9,7 @@
 
 import pytest
 
-from anton.memory.episodes import Episode, EpisodicMemory
+from anton.core.memory.episodes import Episode, EpisodicMemory
 
 
 @pytest.fixture()
diff --git a/tests/test_hippocampus.py b/tests/test_hippocampus.py
index 169eb2c5..10241d10 100644
--- a/tests/test_hippocampus.py
+++ b/tests/test_hippocampus.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from anton.memory.hippocampus import Hippocampus
+from anton.core.memory.hippocampus import Hippocampus
 
 
 @pytest.fixture()

From 069f966e15af77305a9ca802bd455955d22cd55d Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:17:11 +0200
Subject: [PATCH 061/134] Fix tests

---
 tests/test_chat_context.py | 4 ++--
 tests/test_consolidator.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py
index f06ade80..52719aaf 100644
--- a/tests/test_chat_context.py
+++ b/tests/test_chat_context.py
@@ -15,8 +15,8 @@
 from anton.context.self_awareness import SelfAwarenessContext
 from anton.core.llm.provider import LLMResponse, ToolCall, Usage
 from anton.workspace import Workspace
-from anton.memory.cortex import Cortex
-from anton.memory.hippocampus import Hippocampus
+from anton.core.memory.cortex import Cortex
+from anton.core.memory.hippocampus import Hippocampus
 
 
 def _text_response(text: str) -> LLMResponse:
diff --git a/tests/test_consolidator.py b/tests/test_consolidator.py
index ffb82d1b..b262f746 100644
--- a/tests/test_consolidator.py
+++ b/tests/test_consolidator.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from anton.memory.consolidator import Consolidator
+from anton.core.memory.consolidator import Consolidator
 
 
 @dataclass

From 575e362a13815659a783f0df8f1bb5cfbfc0d281 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 14:21:39 +0200
Subject: [PATCH 062/134] Lint

---
 anton/core/memory/consolidator.py | 26 ++++++++++++++---------
 anton/core/memory/cortex.py       | 34 +++++++++++++++++++++----------
 anton/core/memory/episodes.py     | 24 +++++++++++++---------
 anton/core/memory/hippocampus.py  | 12 ++++++++---
 4 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/anton/core/memory/consolidator.py b/anton/core/memory/consolidator.py
index 20bbafe5..7cc6066f 100644
--- a/anton/core/memory/consolidator.py
+++ b/anton/core/memory/consolidator.py
@@ -67,12 +67,16 @@ def should_replay(self, cells: list[Cell]) -> bool:
 
         # Check for cancellation markers in stderr
         for cell in cells:
-            if cell.stderr and ("cancelled" in cell.stderr.lower() or "killed" in cell.stderr.lower()):
+            if cell.stderr and (
+                "cancelled" in cell.stderr.lower() or "killed" in cell.stderr.lower()
+            ):
                 return True
 
         return False
 
-    async def replay_and_extract(self, cells: list[Cell], llm_client: LLMClient) -> list[Engram]:
+    async def replay_and_extract(
+        self, cells: list[Cell], llm_client: LLMClient
+    ) -> list[Engram]:
         """Replay the scratchpad session and extract lessons.
 
         Like SWS replay: compresses the full session into a compact summary,
@@ -144,14 +148,16 @@ async def replay_and_extract(self, cells: list[Cell], llm_client: LLMClient) ->
             if confidence not in ("high", "medium", "low"):
                 confidence = "medium"
 
-            engrams.append(Engram(
-                text=item["text"],
-                kind=kind,
-                scope=scope,
-                confidence=confidence,
-                topic=item.get("topic", ""),
-                source="consolidation",
-            ))
+            engrams.append(
+                Engram(
+                    text=item["text"],
+                    kind=kind,
+                    scope=scope,
+                    confidence=confidence,
+                    topic=item.get("topic", ""),
+                    source="consolidation",
+                )
+            )
 
         # Cap extraction to prevent memory bloat from single sessions
         return engrams[:5]
diff --git a/anton/core/memory/cortex.py b/anton/core/memory/cortex.py
index 121bd0f3..6038acba 100644
--- a/anton/core/memory/cortex.py
+++ b/anton/core/memory/cortex.py
@@ -121,14 +121,18 @@ async def build_memory_context(self, user_message: str = "") -> str:
         # 2. Global rules (with smart retrieval)
         global_rules = self.global_hc.recall_rules()
         if global_rules:
-            global_rules = await self._retrieve_relevant_rules(global_rules, user_message)
+            global_rules = await self._retrieve_relevant_rules(
+                global_rules, user_message
+            )
             if global_rules:
                 sections.append(f"## Your Memory — Global Rules\n{global_rules}")
 
         # 3. Project rules (with smart retrieval)
         project_rules = self.project_hc.recall_rules()
         if project_rules:
-            project_rules = await self._retrieve_relevant_rules(project_rules, user_message)
+            project_rules = await self._retrieve_relevant_rules(
+                project_rules, user_message
+            )
             if project_rules:
                 sections.append(f"## Your Memory — Project Rules\n{project_rules}")
 
@@ -202,10 +206,12 @@ async def _retrieve_relevant_rules(self, all_rules: str, user_message: str) -> s
         try:
             response = await self._llm.code(
                 system=self._RULES_RETRIEVAL_PROMPT,
-                messages=[{
-                    "role": "user",
-                    "content": f"User message: {user_message}\n\nRules:\n{when_text}",
-                }],
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"User message: {user_message}\n\nRules:\n{when_text}",
+                    }
+                ],
                 max_tokens=4096,
             )
             result = response.content.strip()
@@ -375,7 +381,9 @@ async def _compact_file(self, hc: Hippocampus, path: Path, kind: str) -> None:
             return
 
         content = path.read_text(encoding="utf-8")
-        entries = [ln.strip() for ln in content.splitlines() if ln.strip().startswith("- ")]
+        entries = [
+            ln.strip() for ln in content.splitlines() if ln.strip().startswith("- ")
+        ]
 
         if len(entries) < 8:
             return
@@ -397,9 +405,12 @@ async def _compact_file(self, hc: Hippocampus, path: Path, kind: str) -> None:
         # Rebuild the file
         if kind == "rules":
             # Preserve section structure
-            always = [e for e in kept if "always" in e.lower() or not any(
-                k in e.lower() for k in ("never", "when", "if ")
-            )]
+            always = [
+                e
+                for e in kept
+                if "always" in e.lower()
+                or not any(k in e.lower() for k in ("never", "when", "if "))
+            ]
             never = [e for e in kept if "never" in e.lower()]
             when_rules = [e for e in kept if "when" in e.lower() or "if " in e.lower()]
 
@@ -457,7 +468,8 @@ async def maybe_update_identity(self, user_message: str) -> None:
                 key = fact.split(":")[0].strip().lower() if ":" in fact else ""
                 if key:
                     existing_entries = [
-                        e for e in existing_entries
+                        e
+                        for e in existing_entries
                         if not e.lower().startswith(key + ":")
                     ]
                 existing_entries.append(fact)
diff --git a/anton/core/memory/episodes.py b/anton/core/memory/episodes.py
index 339fd135..24a1c6f1 100644
--- a/anton/core/memory/episodes.py
+++ b/anton/core/memory/episodes.py
@@ -37,7 +37,6 @@ def __init__(self, episodes_dir: Path, *, enabled: bool = True) -> None:
         self._session_id: str | None = None
         self._file: Path | None = None
 
-
     @property
     def enabled(self) -> bool:
         return self._enabled
@@ -75,6 +74,7 @@ def log(self, episode: Episode) -> None:
             with self._file.open("a", encoding="utf-8") as f:
                 if sys.platform != "win32":
                     import fcntl
+
                     fcntl.flock(f, fcntl.LOCK_EX)
                     f.write(line)
                     fcntl.flock(f, fcntl.LOCK_UN)
@@ -99,14 +99,16 @@ def log_turn(
         elif role == "tool_result":
             content = content[:_MAX_TOOL_RESULT]
 
-        self.log(Episode(
-            ts=datetime.now(timezone.utc).isoformat(),
-            session=self._session_id,
-            turn=turn,
-            role=role,
-            content=content,
-            meta=dict(meta),
-        ))
+        self.log(
+            Episode(
+                ts=datetime.now(timezone.utc).isoformat(),
+                session=self._session_id,
+                turn=turn,
+                role=role,
+                content=content,
+                meta=dict(meta),
+            )
+        )
 
     def recall(
         self,
@@ -214,7 +216,9 @@ def recall_formatted(
         lines: list[str] = []
         for ep in episodes:
             # Show more content for assistant/scratchpad responses
-            max_len = 2000 if ep.role in ("assistant", "scratchpad", "tool_result") else 500
+            max_len = (
+                2000 if ep.role in ("assistant", "scratchpad", "tool_result") else 500
+            )
             lines.append(f"[{ep.ts}] ({ep.role}) {ep.content[:max_len]}")
         return "\n".join(lines)
 
diff --git a/anton/core/memory/hippocampus.py b/anton/core/memory/hippocampus.py
index 4bea725e..9134a301 100644
--- a/anton/core/memory/hippocampus.py
+++ b/anton/core/memory/hippocampus.py
@@ -64,7 +64,6 @@ def __init__(self, base_dir: Path) -> None:
         self._lessons_path = base_dir / "lessons.md"
         self._topics_dir = base_dir / "topics"
 
-
     def recall_identity(self) -> str:
         """Load the always-on self-model (profile.md).
 
@@ -247,7 +246,8 @@ def encode_rule(
                 i += 1
                 section_entries: list[str] = []
                 while i < len(lines) and not (
-                    lines[i].strip().startswith("## ") and lines[i].strip() != section_header
+                    lines[i].strip().startswith("## ")
+                    and lines[i].strip() != section_header
                 ):
                     section_entries.append(lines[i])
                     i += 1
@@ -333,7 +333,9 @@ def entry_count(self) -> int:
             if path.is_file():
                 try:
                     content = path.read_text(encoding="utf-8")
-                    count += sum(1 for ln in content.splitlines() if ln.strip().startswith("- "))
+                    count += sum(
+                        1 for ln in content.splitlines() if ln.strip().startswith("- ")
+                    )
                 except (OSError, UnicodeDecodeError):
                     continue
         return count
@@ -353,6 +355,7 @@ def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None
             with open(tmp_path, "w", encoding="utf-8") as f:
                 if sys.platform != "win32":
                     import fcntl
+
                     fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                 try:
                     f.write(text)
@@ -360,6 +363,7 @@ def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None
                 finally:
                     if sys.platform != "win32":
                         import fcntl
+
                         fcntl.flock(f.fileno(), fcntl.LOCK_UN)
             tmp_path.replace(path)
         else:
@@ -367,6 +371,7 @@ def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None
             with open(path, "a", encoding="utf-8") as f:
                 if sys.platform != "win32":
                     import fcntl
+
                     fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                 try:
                     f.write(text)
@@ -374,6 +379,7 @@ def _encode_with_lock(self, path: Path, text: str, mode: str = "append") -> None
                 finally:
                     if sys.platform != "win32":
                         import fcntl
+
                         fcntl.flock(f.fileno(), fcntl.LOCK_UN)
 
     @staticmethod

From d12b2b1220e04cfe406478482e83da4a0d50b14d Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:06:37 +0200
Subject: [PATCH 063/134] Import from core

---
 anton/chat.py                | 2 +-
 anton/cli.py                 | 2 +-
 anton/commands/datasource.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 7d85de91..7f28a398 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -685,7 +685,7 @@ async def _agent_zero(console: Console, session: "ChatSession", settings) -> str
         f'OUTPUT_PATH = {output_html!r}',
     )
 
-    from anton.scratchpad import Cell
+    from anton.core.backends.base import Cell
     from rich.live import Live
     from rich.spinner import Spinner
     from rich.text import Text
diff --git a/anton/cli.py b/anton/cli.py
index 3871a059..940bf01a 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -25,7 +25,7 @@
 
 from anton.chat import ChatSession
 from anton.core.llm.client import LLMClient
-from anton.scratchpad import ScratchpadManager
+from anton.core.backends.manager import ScratchpadManager
 
 from anton.commands.datasource import (
     handle_remove_data_source,
diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index 1e69a5b4..c1de837c 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -22,7 +22,7 @@
     parse_connection_slug,
 )
 from anton.utils.prompt import prompt_or_cancel
-from anton.scratchpad import ScratchpadManager
+from anton.core.backends.manager import ScratchpadManager
 
 if TYPE_CHECKING:
     from anton.chat import ChatSession

From 90cdd1e7bfa57e5270edecc1aa96e1f8a4f303c8 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:06:48 +0200
Subject: [PATCH 064/134] Improt from core

---
 anton/core/memory/consolidator.py |  2 +-
 anton/core/session.py             | 14 ++++++++------
 anton/core/tools/tool_handlers.py |  6 +++---
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/anton/core/memory/consolidator.py b/anton/core/memory/consolidator.py
index 7cc6066f..ae07ebd7 100644
--- a/anton/core/memory/consolidator.py
+++ b/anton/core/memory/consolidator.py
@@ -30,7 +30,7 @@
 
 if TYPE_CHECKING:
     from anton.core.llm.client import LLMClient
-    from anton.scratchpad import Cell
+    from anton.core.backends.base import Cell
 
 
 class Consolidator:
diff --git a/anton/core/session.py b/anton/core/session.py
index d61ea6fc..a920346d 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -16,7 +16,7 @@
     StreamToolResult,
     TokenLimitExceeded
 )
-from anton.scratchpad import ScratchpadManager
+from anton.core.backends.manager import ScratchpadManager
 from anton.core.tools.registry import ToolRegistry
 from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef
 from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
@@ -283,7 +283,7 @@ def _build_tools(self) -> list[dict]:
 
     def _build_core_tools(self) -> None:
         scratchpad_tool = SCRATCHPAD_TOOL
-        pkg_list = self._scratchpads._available_packages
+        pkg_list = self._scratchpads.available_packages
         if pkg_list:
             notable = sorted(p for p in pkg_list if p.lower() in self._NOTABLE_PACKAGES)
             if notable:
@@ -420,7 +420,7 @@ async def _summarize_history(self) -> None:
     def _compact_scratchpads(self) -> bool:
         """Compact all active scratchpads. Returns True if any were compacted."""
         compacted = False
-        for pad in self._scratchpads._pads.values():
+        for pad in self._scratchpads.pads.values():
             if pad._compact_cells():
                 compacted = True
         return compacted
@@ -840,7 +840,7 @@ async def _stream_and_handle_tools(
                                 import time as _time
 
                                 _sp_t0 = _time.monotonic()
-                                from anton.scratchpad import Cell
+                                from anton.core.backends.base import Cell
 
                                 cell = None
                                 async for item in pad.execute_streaming(
@@ -848,8 +848,10 @@ async def _stream_and_handle_tools(
                                     description=description,
                                     estimated_time=estimated_time,
                                     estimated_seconds=estimated_seconds,
-                                    cancel_event=self._cancel_event,
                                 ):
+                                    if self._cancel_event.is_set():
+                                        await pad.cancel()
+                                        break
                                     if isinstance(item, str):
                                         yield StreamTaskProgress(
                                             phase="scratchpad", message=item
@@ -1172,7 +1174,7 @@ def _maybe_consolidate_scratchpads(self) -> None:
         from anton.core.memory.consolidator import Consolidator
 
         consolidator = Consolidator()
-        for pad in self._scratchpads._pads.values():
+        for pad in self._scratchpads.pads.values():
             cells = list(pad.cells)
             if consolidator.should_replay(cells):
                 asyncio.create_task(self._consolidate(cells))
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index 60af421b..d94d724a 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -109,13 +109,13 @@ async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
         return format_cell_result(cell)
 
     elif action == "view":
-        pad = session._scratchpads._pads.get(name)
+        pad = session._scratchpads.pads.get(name)
         if pad is None:
             return f"No scratchpad named '{name}'."
         return pad.view()
 
     elif action == "reset":
-        pad = session._scratchpads._pads.get(name)
+        pad = session._scratchpads.pads.get(name)
         if pad is None:
             return f"No scratchpad named '{name}'."
         await pad.reset()
@@ -125,7 +125,7 @@ async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
         return await session._scratchpads.remove(name)
 
     elif action == "dump":
-        pad = session._scratchpads._pads.get(name)
+        pad = session._scratchpads.pads.get(name)
         if pad is None:
             return f"No scratchpad named '{name}'."
         return pad.render_notebook()

From 99a509e52da8ed7ceb8bf15f845b52f31e7d6703 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:06:57 +0200
Subject: [PATCH 065/134] Rmv old scratchpad

---
 anton/scratchpad.py | 959 --------------------------------------------
 1 file changed, 959 deletions(-)
 delete mode 100644 anton/scratchpad.py

diff --git a/anton/scratchpad.py b/anton/scratchpad.py
deleted file mode 100644
index c766d8a6..00000000
--- a/anton/scratchpad.py
+++ /dev/null
@@ -1,959 +0,0 @@
-"""Scratchpad — persistent Python subprocess for stateful, notebook-like execution."""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import os
-import shutil
-import sys
-import tempfile
-import venv
-from dataclasses import dataclass, field
-from pathlib import Path
-
-_CELL_TIMEOUT_DEFAULT = 120        # Default total timeout when no estimate given
-_CELL_INACTIVITY_TIMEOUT = 30      # Max silence between output lines before killing
-_CELL_INACTIVITY_AFTER_PROGRESS = 60  # Grace window after a progress() call
-_INSTALL_TIMEOUT = 120
-_MAX_OUTPUT = 10_000
-_PROGRESS_MARKER = "__ANTON_PROGRESS__"
-_KEEP_RECENT = 5  # Number of recent cells to keep during compaction
-
-
-def _compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
-    """Compute (total_timeout, inactivity_timeout) from estimated execution time.
-
-    - If estimate is 0: use defaults (120s total, 30s inactivity).
-    - Otherwise: total = max(estimate * 2, estimate + 30) with no cap.
-      Inactivity = max(estimate * 0.5, 30) — no hard cap, scales with estimate.
-    """
-    if estimated_seconds <= 0:
-        return float(_CELL_TIMEOUT_DEFAULT), float(_CELL_INACTIVITY_TIMEOUT)
-    total = max(estimated_seconds * 2, estimated_seconds + 30)
-    inactivity = max(estimated_seconds * 0.5, 30)
-    return float(total), float(inactivity)
-
-
-_BOOT_SCRIPT_PATH = Path(__file__).parent / "scratchpad_boot.py"
-
-_CELL_DELIM = "__ANTON_CELL_END__"
-_RESULT_START = "__ANTON_RESULT__"
-_RESULT_END = "__ANTON_RESULT_END__"
-
-
-@dataclass
-class Cell:
-    code: str
-    stdout: str
-    stderr: str
-    error: str | None
-    description: str = ""
-    estimated_time: str = ""
-    logs: str = ""
-
-
-@dataclass
-class Scratchpad:
-    name: str
-    cells: list[Cell] = field(default_factory=list)
-    _proc: asyncio.subprocess.Process | None = field(default=None, repr=False)
-    _boot_path: str | None = field(default=None, repr=False)
-    _coding_provider: str = field(default="anthropic", repr=False)
-    _coding_model: str = field(default="", repr=False)
-    _coding_api_key: str = field(default="", repr=False)
-    _coding_base_url: str = field(default="", repr=False)
-    _venv_dir: str | None = field(default=None, repr=False)
-    _venv_python: str | None = field(default=None, repr=False)
-    _installed_packages: set[str] = field(default_factory=set, repr=False)
-    _venvs_base: Path = field(
-        default_factory=lambda: Path("~/.anton/scratchpad-venvs").expanduser(),
-        repr=False,
-    )
-
-    _MAX_VENV_RETRIES = 3
-
-    def _ensure_venv(self) -> None:
-        """Create a lightweight per-scratchpad venv (idempotent).
-
-        Uses system_site_packages=True so the real system packages are visible.
-        If we're running inside a parent venv, we also drop a .pth file so the
-        parent venv's site-packages are visible in the child.
-
-        If a persistent venv already exists on disk it is recycled when healthy.
-        If the venv is broken (stale symlinks, missing Python binary, version
-        mismatch), it is deleted and recreated from scratch. Gives up after
-        _MAX_VENV_RETRIES.
-        """
-        if self._venv_dir is not None and self._verify_venv_python():
-            return
-
-        # Try to recycle a persistent venv from a previous session.
-        venv_path = self._venvs_base / self.name
-        if venv_path.is_dir() and self._try_recycle_venv(venv_path):
-            return
-
-        # Recycling failed or no prior venv — nuke leftovers and create fresh.
-        if venv_path.is_dir():
-            self._nuke_venv()
-
-        last_error: Exception | None = None
-        for attempt in range(1, self._MAX_VENV_RETRIES + 1):
-            try:
-                self._create_venv()
-                if self._verify_venv_python():
-                    self._setup_parent_site_packages()
-                    self._save_python_version()
-                    return
-                # Python binary exists but doesn't run — nuke and retry
-                raise RuntimeError(f"venv Python binary at {self._venv_python} is not functional")
-            except Exception as exc:
-                last_error = exc
-                # Clean up the broken venv before retrying
-                self._nuke_venv()
-
-        raise RuntimeError(
-            f"Failed to create a working Python venv after {self._MAX_VENV_RETRIES} attempts. "
-            f"Last error: {last_error}. "
-            f"Try running: python3 -c 'print(\"ok\")' to verify your Python installation."
-        )
-
-    @staticmethod
-    def _find_uv() -> str | None:
-        """Return the path to the ``uv`` binary, or *None* if unavailable."""
-        # Fast path: already on PATH
-        uv = shutil.which("uv")
-        if uv:
-            return uv
-        # Common install locations
-        if sys.platform == "win32":
-            candidates = (
-                os.path.expanduser("~/.local/bin/uv.exe"),
-                os.path.expanduser("~/.cargo/bin/uv.exe"),
-            )
-        else:
-            candidates = (
-                os.path.expanduser("~/.local/bin/uv"),
-                os.path.expanduser("~/.cargo/bin/uv"),
-            )
-        for candidate in candidates:
-            if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
-                return candidate
-        return None
-
-    def _create_venv(self) -> None:
-        """Allocate a venv directory and create the virtual environment.
-
-        Prefers ``uv venv`` when available — it is faster, more reliable on
-        macOS (doesn't break when Homebrew upgrades Python), and doesn't depend
-        on the ``venv`` stdlib module being functional.  Falls back to
-        ``venv.create()`` when ``uv`` isn't found.
-
-        The venv is persisted at ``{_venvs_base}/{name}`` on all platforms so
-        installed packages survive across sessions.
-        """
-        import subprocess as _sp
-
-        self._venv_dir = str(self._venvs_base / self.name)
-        os.makedirs(self._venv_dir, exist_ok=True)
-
-        uv = self._find_uv()
-        if uv:
-            _sp.run(
-                [uv, "venv", self._venv_dir,
-                 "--python", sys.executable,
-                 "--system-site-packages", "--seed", "--quiet"],
-                check=True,
-                capture_output=True,
-                timeout=30,
-            )
-        else:
-            venv.create(self._venv_dir, system_site_packages=True, with_pip=False, clear=True)
-
-        if sys.platform == "win32":
-            bin_dir = os.path.join(self._venv_dir, "Scripts")
-            self._venv_python = os.path.join(bin_dir, "python.exe")
-            self._add_windows_firewall_rule()
-        else:
-            bin_dir = os.path.join(self._venv_dir, "bin")
-            self._venv_python = os.path.join(bin_dir, "python")
-
-    def _verify_venv_python(self) -> bool:
-        """Check that the venv Python binary exists and can execute."""
-        if self._venv_python is None:
-            return False
-        if not os.path.exists(self._venv_python):
-            return False
-        # Quick smoke test — run python with a trivial command
-        try:
-            import subprocess
-            result = subprocess.run(
-                [self._venv_python, "-c", "print('ok')"],
-                capture_output=True,
-                timeout=5,
-            )
-            return result.returncode == 0 and "ok" in result.stdout.decode()
-        except Exception:
-            return False
-
-    def _nuke_venv(self) -> None:
-        """Delete the venv directory entirely so it can be recreated."""
-        if self._venv_dir is not None:
-            try:
-                shutil.rmtree(self._venv_dir)
-            except OSError:
-                pass
-        self._venv_dir = None
-        self._venv_python = None
-
-    def _add_windows_firewall_rule(self) -> None:
-        """Add a Windows Firewall outbound-allow rule for this venv's python.exe.
-
-        Windows Firewall blocks new executables by default.  Without a rule,
-        scratchpad HTTP calls (httpx, requests, etc.) silently time out.
-        Runs silently — failures are ignored (user can add rules manually).
-        """
-        if self._venv_python is None or not os.path.isfile(self._venv_python):
-            return
-        import subprocess as _sp
-        rule_name = f"Anton Scratchpad - {self.name}"
-        try:
-            _sp.run(
-                [
-                    "netsh", "advfirewall", "firewall", "add", "rule",
-                    f"name={rule_name}", "dir=out", "action=allow",
-                    f"program={self._venv_python}",
-                ],
-                capture_output=True,
-                timeout=10,
-            )
-        except Exception:
-            pass
-        self._installed_packages.clear()
-
-    def _setup_parent_site_packages(self) -> None:
-        """Make parent venv's packages visible in the child venv."""
-        if sys.prefix != sys.base_prefix:
-            import site as _site
-            parent_site = _site.getsitepackages()
-            child_site = None
-            for dirpath, dirnames, _ in os.walk(self._venv_dir):
-                if "site-packages" in dirnames:
-                    child_site = os.path.join(dirpath, "site-packages")
-                    break
-            if child_site and parent_site:
-                pth_path = os.path.join(child_site, "_parent_venv.pth")
-                with open(pth_path, "w") as f:
-                    for sp in parent_site:
-                        f.write(sp + "\n")
-
-    def _try_recycle_venv(self, venv_path: Path) -> bool:
-        """Validate and reuse a persistent venv from a previous session.
-
-        Sets internal paths, verifies the Python binary is functional, checks
-        the Python version matches, loads saved requirements, and refreshes
-        parent-site-packages links.  Returns False on any failure (caller
-        should nuke the directory and create a fresh venv).
-        """
-        try:
-            self._venv_dir = str(venv_path)
-            if sys.platform == "win32":
-                self._venv_python = os.path.join(self._venv_dir, "Scripts", "python.exe")
-            else:
-                self._venv_python = os.path.join(self._venv_dir, "bin", "python")
-
-            if not self._verify_venv_python():
-                return False
-            if not self._check_python_version():
-                return False
-            self._load_requirements()
-            self._setup_parent_site_packages()
-            return True
-        except Exception:
-            return False
-
-    def _save_requirements(self) -> None:
-        """Write installed package names to requirements.txt (best-effort)."""
-        if not self._venv_dir or not self._installed_packages:
-            return
-        try:
-            req_path = os.path.join(self._venv_dir, "requirements.txt")
-            with open(req_path, "w") as f:
-                for pkg in sorted(self._installed_packages):
-                    f.write(pkg + "\n")
-        except OSError:
-            pass
-
-    def _load_requirements(self) -> None:
-        """Read requirements.txt into _installed_packages."""
-        if not self._venv_dir:
-            return
-        req_path = os.path.join(self._venv_dir, "requirements.txt")
-        try:
-            with open(req_path) as f:
-                for line in f:
-                    pkg = line.strip()
-                    if pkg:
-                        self._installed_packages.add(pkg)
-        except FileNotFoundError:
-            pass
-
-    def _save_python_version(self) -> None:
-        """Write the current Python major.minor to .python_version."""
-        if not self._venv_dir:
-            return
-        try:
-            ver_path = os.path.join(self._venv_dir, ".python_version")
-            with open(ver_path, "w") as f:
-                f.write(f"{sys.version_info.major}.{sys.version_info.minor}\n")
-        except OSError:
-            pass
-
-    def _check_python_version(self) -> bool:
-        """Return True if .python_version matches the current Python."""
-        if not self._venv_dir:
-            return False
-        ver_path = os.path.join(self._venv_dir, ".python_version")
-        try:
-            with open(ver_path) as f:
-                saved = f.read().strip()
-            expected = f"{sys.version_info.major}.{sys.version_info.minor}"
-            return saved == expected
-        except FileNotFoundError:
-            # No version file — treat as mismatch so it gets recreated with one.
-            return False
-
-    async def start(self) -> None:
-        """Write the boot script to a temp file and launch the subprocess."""
-        self._ensure_venv()
-
-        boot_code = _BOOT_SCRIPT_PATH.read_text()
-        fd, path = tempfile.mkstemp(suffix=".py", prefix="anton_scratchpad_")
-        os.write(fd, boot_code.encode())
-        os.close(fd)
-        self._boot_path = path
-
-        env = os.environ.copy()
-        if self._coding_model:
-            env["ANTON_SCRATCHPAD_MODEL"] = self._coding_model
-        if self._coding_provider:
-            env["ANTON_SCRATCHPAD_PROVIDER"] = self._coding_provider
-        # Ensure the SDKs can find API keys under their expected names.
-        # Anton stores them as ANTON_*_API_KEY; the SDKs expect *_API_KEY.
-        if "ANTHROPIC_API_KEY" not in env and "ANTON_ANTHROPIC_API_KEY" in env:
-            env["ANTHROPIC_API_KEY"] = env["ANTON_ANTHROPIC_API_KEY"]
-        if "OPENAI_API_KEY" not in env and "ANTON_OPENAI_API_KEY" in env:
-            env["OPENAI_API_KEY"] = env["ANTON_OPENAI_API_KEY"]
-        if "OPENAI_BASE_URL" not in env and "ANTON_OPENAI_BASE_URL" in env:
-            env["OPENAI_BASE_URL"] = env["ANTON_OPENAI_BASE_URL"]
-        # Minds credentials can serve as OpenAI-compatible fallback when
-        # ANTON_OPENAI_* vars aren't persisted (new clean config path).
-        if "OPENAI_API_KEY" not in env and "ANTON_MINDS_API_KEY" in env and self._coding_provider == "openai-compatible":
-            env["OPENAI_API_KEY"] = env["ANTON_MINDS_API_KEY"]
-        if "OPENAI_BASE_URL" not in env and "ANTON_MINDS_URL" in env and self._coding_provider == "openai-compatible":
-            env["OPENAI_BASE_URL"] = f"{env['ANTON_MINDS_URL'].rstrip('/')}/api/v1"
-        # If settings provided an explicit API key (e.g. from ~/.anton/.env or
-        # Pydantic settings), inject it so the subprocess SDK can authenticate.
-        if self._coding_api_key:
-            sdk_key = {
-                "anthropic": "ANTHROPIC_API_KEY",
-                "openai": "OPENAI_API_KEY",
-                "openai-compatible": "OPENAI_API_KEY",
-            }.get(self._coding_provider, "")
-            if sdk_key:
-                env[sdk_key] = self._coding_api_key
-        # For openai-compatible (Minds), force OPENAI_BASE_URL so get_llm()
-        # doesn't default to api.openai.com. The explicit _coding_base_url from
-        # settings takes top priority, then ANTON_OPENAI_BASE_URL from .env.
-        if self._coding_provider in ("openai", "openai-compatible"):
-            base_url = (
-                self._coding_base_url
-                or env.get("ANTON_OPENAI_BASE_URL")
-                or env.get("OPENAI_BASE_URL")
-                or ""
-            )
-            if base_url:
-                env["OPENAI_BASE_URL"] = base_url
-                env["ANTON_OPENAI_BASE_URL"] = base_url
-        # Pass uv path so the boot script can use it for auto-installing
-        # missing modules (same installer that created the venv).
-        uv = self._find_uv()
-        if uv:
-            env["ANTON_UV_PATH"] = uv
-
-        # Ensure the anton package is importable in the subprocess (needed for
-        # get_llm and skill loading). The boot script runs from a temp file, so
-        # the project root isn't on sys.path by default.
-        _anton_root = str(Path(__file__).resolve().parent.parent)
-        python_path = env.get("PYTHONPATH", "")
-        if _anton_root not in python_path:
-            env["PYTHONPATH"] = _anton_root + (os.pathsep + python_path if python_path else "")
-
-        try:
-            self._proc = await asyncio.create_subprocess_exec(
-                self._venv_python, path,
-                stdin=asyncio.subprocess.PIPE,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-                env=env,
-                # Own session so os.killpg() kills the whole process tree
-                # (grandchildren spawned by user code, pip installs, etc.)
-                start_new_session=(sys.platform != "win32"),
-            )
-        except (FileNotFoundError, PermissionError, OSError) as exc:
-            # Python binary is missing or broken — nuke venv and raise
-            self._nuke_venv()
-            raise RuntimeError(
-                f"Failed to start scratchpad: {exc}. "
-                f"The Python venv has been deleted and will be recreated on next attempt."
-            ) from exc
-
-    async def execute(
-        self,
-        code: str,
-        *,
-        description: str = "",
-        estimated_time: str = "",
-        estimated_seconds: int = 0,
-    ) -> Cell:
-        """Send code to the subprocess, read the JSON result, return a Cell.
-
-        Backward-compatible wrapper around execute_streaming() that drains
-        all events and returns just the final Cell.
-        """
-        async for item in self.execute_streaming(
-            code,
-            description=description,
-            estimated_time=estimated_time,
-            estimated_seconds=estimated_seconds,
-        ):
-            if isinstance(item, Cell):
-                return item
-        # Should not reach here, but just in case
-        return Cell(code=code, stdout="", stderr="", error="No result produced.")
-
-    async def execute_streaming(
-        self,
-        code: str,
-        *,
-        description: str = "",
-        estimated_time: str = "",
-        estimated_seconds: int = 0,
-        cancel_event: asyncio.Event | None = None,
-    ):
-        """Async generator that sends code and yields progress strings and a final Cell.
-
-        Yields:
-            str — progress messages from progress() calls in the cell code
-            Cell — the final execution result (always the last item)
-        """
-        if self._proc is None or self._proc.returncode is not None:
-            yield Cell(
-                code=code,
-                stdout="",
-                stderr="",
-                error="Scratchpad process is not running. Use reset to restart.",
-                description=description,
-                estimated_time=estimated_time,
-            )
-            return
-
-        payload = code + "\n" + _CELL_DELIM + "\n"
-        self._proc.stdin.write(payload.encode())  # type: ignore[union-attr]
-        await self._proc.stdin.drain()  # type: ignore[union-attr]
-
-        total_timeout, inactivity_timeout = _compute_timeouts(estimated_seconds)
-
-        try:
-            result_data: dict | None = None
-            async for item in self._read_result(
-                total_timeout=total_timeout,
-                inactivity_timeout=inactivity_timeout,
-                cancel_event=cancel_event,
-            ):
-                if isinstance(item, str):
-                    yield item  # progress message
-                else:
-                    result_data = item
-        except (asyncio.TimeoutError, asyncio.CancelledError) as exc:
-            self._kill_tree()
-            try:
-                await asyncio.wait_for(self._proc.wait(), timeout=5)
-            except asyncio.TimeoutError:
-                pass
-            error_msg = (
-                f"{exc}. Process killed — state lost. Use reset to restart.\n\n"
-                "If a database query was running, it may still be executing server-side.\n"
-                "To check and cancel: run SHOW PROCESSLIST (MySQL) or\n"
-                "SELECT * FROM information_schema.processlist WHERE status='running' and cancel with KILL <id>.\n"
-                "For Snowflake: use SHOW RUNNING QUERIES and SELECT SYSTEM$CANCEL_ALL_QUERIES(<session_id>)."
-            )
-            cell = Cell(
-                code=code,
-                stdout="",
-                stderr="",
-                error=error_msg,
-                description=description,
-                estimated_time=estimated_time,
-            )
-            self.cells.append(cell)
-            yield cell
-            return
-        except Exception as exc:
-            # Catch-all for unexpected errors (e.g. JSON parse failures).
-            # The subprocess may still be alive and usable — don't kill it.
-            # Return the error as a Cell so the LLM can see it and recover.
-            cell = Cell(
-                code=code,
-                stdout="",
-                stderr="",
-                error=f"Scratchpad result could not be read: {exc}. The scratchpad is still running — you can retry.",
-                description=description,
-                estimated_time=estimated_time,
-            )
-            self.cells.append(cell)
-            yield cell
-            return
-
-        if result_data is None:
-            result_data = {"stdout": "", "stderr": "", "error": "Process exited unexpectedly."}
-
-        # Track packages that the subprocess auto-installed on ModuleNotFoundError
-        for pkg in result_data.get("auto_installed") or []:
-            self._installed_packages.add(pkg.lower())
-
-        cell = Cell(
-            code=code,
-            stdout=result_data.get("stdout", ""),
-            stderr=result_data.get("stderr", ""),
-            error=result_data.get("error"),
-            description=description,
-            estimated_time=estimated_time,
-            logs=result_data.get("logs", ""),
-        )
-        self.cells.append(cell)
-        yield cell
-
-    async def _read_result(
-        self,
-        *,
-        total_timeout: float = _CELL_TIMEOUT_DEFAULT,
-        inactivity_timeout: float = _CELL_INACTIVITY_TIMEOUT,
-        cancel_event: asyncio.Event | None = None,
-    ):
-        """Async generator that reads lines from stdout until result delimiters.
-
-        Yields:
-            str — progress messages (lines starting with _PROGRESS_MARKER)
-            dict — the final JSON result (always the last item)
-
-        Raises asyncio.TimeoutError with a descriptive message.
-        Raises asyncio.CancelledError if cancel_event is set.
-
-        After a progress() call is received, the inactivity window is extended
-        to _CELL_INACTIVITY_AFTER_PROGRESS (60s) so that long-running work
-        that signals liveness isn't killed prematurely.
-        """
-        import time as _time
-
-        lines: list[str] = []
-        in_result = False
-        start = _time.monotonic()
-        current_inactivity = inactivity_timeout
-
-        while True:
-            if cancel_event is not None and cancel_event.is_set():
-                raise asyncio.CancelledError("Cancelled by user")
-            elapsed = _time.monotonic() - start
-            remaining_total = total_timeout - elapsed
-            if remaining_total <= 0:
-                raise asyncio.TimeoutError(
-                    f"Cell timed out after {total_timeout:.0f}s total"
-                )
-
-            line_timeout = min(current_inactivity, remaining_total)
-            try:
-                raw = await asyncio.wait_for(
-                    self._proc.stdout.readline(),  # type: ignore[union-attr]
-                    timeout=line_timeout,
-                )
-            except asyncio.TimeoutError:
-                # Determine which timeout was hit
-                elapsed_now = _time.monotonic() - start
-                if elapsed_now >= total_timeout - 0.5:
-                    raise asyncio.TimeoutError(
-                        f"Cell timed out after {total_timeout:.0f}s total"
-                    ) from None
-                raise asyncio.TimeoutError(
-                    f"Cell killed after {current_inactivity:.0f}s of inactivity "
-                    f"(no output or progress() calls)"
-                ) from None
-
-            if not raw:
-                yield {"stdout": "", "stderr": "", "error": "Process exited unexpectedly."}
-                return
-
-            line = raw.decode().rstrip("\r\n")
-
-            # Progress marker — yield to caller, don't store.
-            # Extend inactivity window: the cell is actively working.
-            if line.startswith(_PROGRESS_MARKER):
-                current_inactivity = max(
-                    current_inactivity, _CELL_INACTIVITY_AFTER_PROGRESS,
-                )
-                message = line[len(_PROGRESS_MARKER):].strip()
-                yield message
-                continue
-
-            if line == _RESULT_START:
-                in_result = True
-                continue
-            if line == _RESULT_END:
-                break
-            if in_result:
-                lines.append(line)
-
-        raw_text = "\n".join(lines)
-        try:
-            yield json.loads(raw_text)
-        except json.JSONDecodeError:
-            # Try to extract valid JSON by finding the outermost { }
-            try:
-                start = raw_text.index("{")
-                end = raw_text.rindex("}") + 1
-                yield json.loads(raw_text[start:end])
-            except (ValueError, json.JSONDecodeError):
-                yield {
-                    "stdout": raw_text,
-                    "stderr": "",
-                    "logs": "",
-                    "error": "Scratchpad result was malformed (JSON parse failed). Output above may be partial.",
-                }
-
-    def view(self) -> str:
-        """Format all cells with their outputs."""
-        if not self.cells:
-            return f"Scratchpad '{self.name}' is empty."
-
-        parts: list[str] = []
-        for i, cell in enumerate(self.cells):
-            header = f"--- Cell {i + 1}"
-            if cell.description:
-                header += f": {cell.description}"
-            header += " ---"
-            parts.append(header)
-            parts.append(cell.code)
-            if cell.stdout:
-                parts.append(f"[output]\n{cell.stdout}")
-            if cell.logs:
-                parts.append(f"[logs]\n{cell.logs}")
-            if cell.stderr:
-                parts.append(f"[stderr]\n{cell.stderr}")
-            if cell.error:
-                parts.append(f"[error]\n{cell.error}")
-            if not cell.stdout and not cell.logs and not cell.stderr and not cell.error:
-                parts.append("(no output)")
-        return "\n".join(parts)
-
-    @staticmethod
-    def _truncate_output(text: str, max_lines: int = 20, max_chars: int = 2000) -> str:
-        """Truncate output to *max_lines* / *max_chars*, whichever is shorter."""
-        lines = text.split("\n")
-        # Apply line limit
-        if len(lines) > max_lines:
-            kept = "\n".join(lines[:max_lines])
-            remaining = len(lines) - max_lines
-            return kept + f"\n... ({remaining} more lines)"
-        # Apply char limit (don't cut mid-line)
-        if len(text) > max_chars:
-            total = 0
-            kept_lines: list[str] = []
-            for line in lines:
-                if total + len(line) + 1 > max_chars and kept_lines:
-                    break
-                kept_lines.append(line)
-                total += len(line) + 1
-            return "\n".join(kept_lines) + "\n... (truncated)"
-        return text
-
-    def render_notebook(self) -> str:
-        """Return a clean markdown notebook-style summary of all cells."""
-        # Filter out empty/whitespace-only cells
-        numbered: list[tuple[int, Cell]] = []
-        idx = 0
-        for cell in self.cells:
-            idx += 1
-            if not cell.code.strip():
-                continue
-            numbered.append((idx, cell))
-
-        if not numbered:
-            return f"Scratchpad '{self.name}' has no cells."
-
-        parts: list[str] = [f"## Scratchpad: {self.name} ({len(numbered)} cells)"]
-
-        for i, (num, cell) in enumerate(numbered):
-            header = f"\n### Cell {num}"
-            if cell.description:
-                header += f" \u2014 {cell.description}"
-            parts.append(header)
-            parts.append(f"```python\n{cell.code}\n```\n")
-
-            if cell.error:
-                # Show only the last traceback line
-                last_line = cell.error.strip().split("\n")[-1]
-                parts.append(f"**Error:** `{last_line}`")
-                # If there was partial output before the error, show it
-                if cell.stdout:
-                    truncated = self._truncate_output(cell.stdout.rstrip("\n"))
-                    parts.append(f"**Partial output:**\n```\n{truncated}\n```\n")
-            elif cell.stdout:
-                truncated = self._truncate_output(cell.stdout.rstrip("\n"))
-                parts.append(f"**Output:**\n```\n{truncated}\n```\n")
-
-            if cell.logs:
-                truncated_logs = self._truncate_output(cell.logs.rstrip("\n"), max_lines=10, max_chars=1000)
-                parts.append(f"**Logs:**\n```\n{truncated_logs}\n```\n")
-
-            if i < len(numbered) - 1:
-                parts.append("---")
-
-        return "\n".join(parts)
-
-    def _compact_cells(self) -> bool:
-        """Collapse old cells into a single summary cell to reduce context size.
-
-        Keeps the most recent _KEEP_RECENT cells intact.  Older cells are
-        replaced by one summary cell with a one-line-per-cell digest.
-
-        Returns True if compaction actually happened.
-        """
-        if len(self.cells) <= _KEEP_RECENT + 1:
-            return False
-
-        to_compact = self.cells[: -_KEEP_RECENT]
-        recent = self.cells[-_KEEP_RECENT:]
-
-        summary_lines: list[str] = []
-        for i, cell in enumerate(to_compact, 1):
-            status = "error" if cell.error else "ok"
-            desc = cell.description or f"Cell {i}"
-            first_line = ""
-            output = cell.stdout or cell.error or ""
-            if output:
-                first_line = output.strip().split("\n")[0][:120]
-            summary_lines.append(f"  [{status}] {desc}: {first_line}")
-
-        summary_text = (
-            f"# Compacted {len(to_compact)} earlier cells:\n"
-            + "\n".join(summary_lines)
-        )
-        summary_cell = Cell(
-            code="# (compacted — see summary above)",
-            stdout=summary_text,
-            stderr="",
-            error=None,
-            description=f"Summary of cells 1–{len(to_compact)}",
-        )
-        self.cells = [summary_cell] + recent
-        return True
-
-    async def cancel_running(self) -> None:
-        """Kill the current execution and restart the subprocess.
-
-        Called when the user cancels (ESC / Ctrl-C) during a running cell.
-        Kills the entire process tree, records a cancelled cell, then restarts
-        so the scratchpad is ready for the next use.
-        """
-        if self._proc is None or self._proc.returncode is not None:
-            return
-        self._kill_tree()
-        try:
-            await asyncio.wait_for(self._proc.wait(), timeout=5)
-        except asyncio.TimeoutError:
-            pass
-        # Record the cancelled execution
-        self.cells.append(Cell(
-            code="# (cancelled by user)",
-            stdout="",
-            stderr="",
-            error="Cancelled by user.",
-            description="Cancelled",
-        ))
-        # Restart so the pad is usable again
-        self._proc = None
-        await self.start()
-
-    async def _stop_process(self) -> None:
-        """Kill the subprocess and delete the boot script, but keep the venv."""
-        if self._proc is not None and self._proc.returncode is None:
-            try:
-                self._kill_tree()
-                await asyncio.wait_for(self._proc.wait(), timeout=5)
-            except (ProcessLookupError, asyncio.TimeoutError):
-                pass
-        # Close transport pipes to prevent "Event loop is closed" noise
-        # from __del__ during Python shutdown.  Only stdin (a StreamWriter)
-        # has .close(); stdout/stderr are StreamReaders with no close method.
-        if self._proc is not None:
-            pipe = self._proc.stdin
-            if pipe is not None:
-                if hasattr(pipe, "is_closing"):
-                    if not pipe.is_closing():
-                        pipe.close()
-                else:
-                    pipe.close()
-        self._proc = None
-        if self._boot_path is not None:
-            try:
-                os.unlink(self._boot_path)
-            except OSError:
-                pass
-            self._boot_path = None
-
-    def _kill_tree(self) -> None:
-        """Kill the subprocess and all its children via process group."""
-        if self._proc is None or self._proc.returncode is not None:
-            return
-        pid = self._proc.pid
-        if sys.platform != "win32":
-            # Kill the entire process group (subprocess + grandchildren)
-            import signal
-            try:
-                os.killpg(pid, signal.SIGKILL)
-            except (ProcessLookupError, PermissionError):
-                # Fallback: kill just the direct child
-                try:
-                    self._proc.kill()
-                except ProcessLookupError:
-                    pass
-        else:
-            self._proc.kill()
-
-    async def reset(self) -> None:
-        """Kill the process, clear cells, restart.
-
-        If the venv is healthy, it's reused (installed packages survive).
-        If the venv is broken, it's deleted and recreated from scratch.
-        """
-        await self._stop_process()
-        self.cells.clear()
-        # If the venv Python is broken, nuke it so _ensure_venv recreates it
-        if not self._verify_venv_python():
-            self._nuke_venv()
-        await self.start()
-
-    async def close(self) -> None:
-        """Kill the process and clean up the boot script temp file.
-
-        The venv is preserved on disk so installed packages survive across
-        sessions. A ``requirements.txt`` is saved to record what was installed.
-        """
-        await self._stop_process()
-        if self._venv_dir is not None:
-            self._save_requirements()
-            self._venv_dir = None
-            self._venv_python = None
-
-    async def install_packages(self, packages: list[str]) -> str:
-        """Install packages into the scratchpad's venv via pip (or uv pip)."""
-        if not packages:
-            return "No packages specified."
-        # Skip packages we've already installed in this scratchpad
-        needed = [p for p in packages if p.lower() not in self._installed_packages]
-        if not needed:
-            return "All packages already installed."
-        self._ensure_venv()
-
-        uv = self._find_uv()
-        if uv:
-            cmd = [uv, "pip", "install", "--python", self._venv_python, *needed]
-        else:
-            cmd = [self._venv_python, "-m", "pip", "install", "--no-input", *needed]
-
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.STDOUT,
-        )
-        try:
-            stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=_INSTALL_TIMEOUT)
-        except asyncio.TimeoutError:
-            proc.kill()
-            await proc.wait()
-            return f"Install timed out after {_INSTALL_TIMEOUT}s."
-        output = stdout.decode()
-        if proc.returncode != 0:
-            return f"Install failed (exit {proc.returncode}):\n{output}"
-        # Track successfully installed packages
-        for p in needed:
-            self._installed_packages.add(p.lower())
-        return output
-
-
-class ScratchpadManager:
-    """Manages named scratchpad instances."""
-
-    def __init__(
-        self,
-        coding_provider: str = "anthropic",
-        coding_model: str = "",
-        coding_api_key: str = "",
-        coding_base_url: str = "",
-        workspace_path: Path | None = None,
-    ) -> None:
-        self._pads: dict[str, Scratchpad] = {}
-        self._coding_provider: str = coding_provider
-        self._coding_model: str = coding_model
-        self._coding_api_key: str = coding_api_key
-        self._coding_base_url: str = coding_base_url
-        if workspace_path is not None:
-            self._venvs_base = workspace_path / ".anton" / "scratchpad-venvs"
-        else:
-            self._venvs_base = Path("~/.anton/scratchpad-venvs").expanduser()
-        self._available_packages: list[str] = self.probe_packages()
-
-    @staticmethod
-    def probe_packages() -> list[str]:
-        """Return sorted list of installed package distribution names."""
-        from importlib.metadata import distributions
-
-        return sorted({d.metadata["Name"] for d in distributions()})
-
-    async def get_or_create(self, name: str) -> Scratchpad:
-        """Return existing pad or create + start a new one."""
-        if name not in self._pads:
-            pad = Scratchpad(
-                name=name,
-                _coding_provider=self._coding_provider,
-                _coding_model=self._coding_model,
-                _coding_api_key=self._coding_api_key,
-                _coding_base_url=self._coding_base_url,
-                _venvs_base=self._venvs_base,
-            )
-            await pad.start()
-            self._pads[name] = pad
-        return self._pads[name]
-
-    async def remove(self, name: str) -> str:
-        """Kill and fully delete a scratchpad (including its persistent venv)."""
-        pad = self._pads.pop(name, None)
-        if pad is None:
-            return f"No scratchpad named '{name}'."
-        await pad._stop_process()
-        pad._nuke_venv()
-        return f"Scratchpad '{name}' removed."
-
-    def list_pads(self) -> list[str]:
-        return list(self._pads.keys())
-
-    async def cancel_all_running(self) -> None:
-        """Cancel running executions in all scratchpads and restart them."""
-        for pad in self._pads.values():
-            await pad.cancel_running()
-
-    async def close_all(self) -> None:
-        """Cleanup all scratchpads on session end."""
-        for pad in self._pads.values():
-            await pad.close()
-        self._pads.clear()

From 2497be3ca457fb5d0ee4b4d8a6b9b59aa69c7c0a Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:07:12 +0200
Subject: [PATCH 066/134] Move manger

---
 anton/core/backends/manager.py | 82 ++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 anton/core/backends/manager.py

diff --git a/anton/core/backends/manager.py b/anton/core/backends/manager.py
new file mode 100644
index 00000000..eff9334c
--- /dev/null
+++ b/anton/core/backends/manager.py
@@ -0,0 +1,82 @@
+"""ScratchpadManager — lifecycle manager for named scratchpad runtimes."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from anton.core.backends.base import ScratchpadRuntime
+from anton.core.backends.local import LocalScratchpadRuntime
+
+
+class ScratchpadManager:
+    """Manages named scratchpad runtime instances."""
+
+    def __init__(
+        self,
+        coding_provider: str = "anthropic",
+        coding_model: str = "",
+        coding_api_key: str = "",
+        coding_base_url: str = "",
+        workspace_path: Path | None = None,
+    ) -> None:
+        self._pads: dict[str, ScratchpadRuntime] = {}
+        self._coding_provider = coding_provider
+        self._coding_model = coding_model
+        self._coding_api_key = coding_api_key
+        self._coding_base_url = coding_base_url
+        self._workspace_path = workspace_path
+        self._available_packages: list[str] = self.probe_packages()
+
+    @property
+    def pads(self) -> dict[str, ScratchpadRuntime]:
+        """Read-only view of the active scratchpad runtimes."""
+        return self._pads
+
+    @property
+    def available_packages(self) -> list[str]:
+        """Sorted list of installed package distribution names."""
+        return self._available_packages
+
+    @staticmethod
+    def probe_packages() -> list[str]:
+        """Return sorted list of installed package distribution names."""
+        from importlib.metadata import distributions
+
+        return sorted({d.metadata["Name"] for d in distributions()})
+
+    async def get_or_create(self, name: str) -> ScratchpadRuntime:
+        """Return existing pad or create + start a new one."""
+        if name not in self._pads:
+            pad = LocalScratchpadRuntime(
+                name=name,
+                coding_provider=self._coding_provider,
+                coding_model=self._coding_model,
+                coding_api_key=self._coding_api_key,
+                coding_base_url=self._coding_base_url,
+                workspace_path=self._workspace_path,
+            )
+            await pad.start()
+            self._pads[name] = pad
+        return self._pads[name]
+
+    async def remove(self, name: str) -> str:
+        """Kill and fully delete a scratchpad (including its persistent venv)."""
+        pad = self._pads.pop(name, None)
+        if pad is None:
+            return f"No scratchpad named '{name}'."
+        await pad.cleanup()
+        return f"Scratchpad '{name}' removed."
+
+    def list_pads(self) -> list[str]:
+        return list(self._pads.keys())
+
+    async def cancel_all_running(self) -> None:
+        """Cancel running executions in all scratchpads and restart them."""
+        for pad in self._pads.values():
+            await pad.cancel()
+
+    async def close_all(self) -> None:
+        """Cleanup all scratchpads on session end."""
+        for pad in self._pads.values():
+            await pad.close()
+        self._pads.clear()

From 8589a62bf0a7c951bf307ea1e6b5f6cdbe52c13c Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:07:25 +0200
Subject: [PATCH 067/134] Move local scratchpad to core

---
 anton/core/backends/local.py | 658 +++++++++++++++++++++++++++++++++++
 1 file changed, 658 insertions(+)
 create mode 100644 anton/core/backends/local.py

diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
new file mode 100644
index 00000000..d16debb8
--- /dev/null
+++ b/anton/core/backends/local.py
@@ -0,0 +1,658 @@
+"""LocalScratchpadRuntime — venv-based scratchpad for the CLI."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import shutil
+import sys
+import tempfile
+import venv
+from pathlib import Path
+
+from anton.core.backends.base import (
+    Cell,
+    ScratchpadRuntime,
+    _CELL_DELIM,
+    _CELL_INACTIVITY_AFTER_PROGRESS,
+    _CELL_TIMEOUT_DEFAULT,
+    _CELL_INACTIVITY_TIMEOUT,
+    _INSTALL_TIMEOUT,
+    _PROGRESS_MARKER,
+    _RESULT_END,
+    _RESULT_START,
+    _compute_timeouts,
+)
+
+_BOOT_SCRIPT_PATH = Path(__file__).parent.parent.parent / "scratchpad_boot.py"
+_MAX_OUTPUT = 10_000
+
+
+class LocalScratchpadRuntime(ScratchpadRuntime):
+    """Runs scratchpad cells in a persistent per-named venv subprocess."""
+
+    _MAX_VENV_RETRIES = 3
+
+    def __init__(
+        self,
+        name: str,
+        *,
+        cells: list[Cell] | None = None,
+        coding_provider: str = "anthropic",
+        coding_model: str = "",
+        coding_api_key: str = "",
+        coding_base_url: str = "",
+        workspace_path: Path | None = None,
+        _venvs_base: Path | None = None,
+    ) -> None:
+        super().__init__(
+            name,
+            cells=cells,
+            coding_provider=coding_provider,
+            coding_model=coding_model,
+            coding_api_key=coding_api_key,
+            workspace_path=workspace_path,
+        )
+        self._coding_base_url = coding_base_url
+        self._proc: asyncio.subprocess.Process | None = None
+        self._boot_path: str | None = None
+        self._venv_dir: str | None = None
+        self._venv_python: str | None = None
+        if _venvs_base is not None:
+            self._venvs_base = _venvs_base
+        elif workspace_path is not None:
+            self._venvs_base = workspace_path / ".anton" / "scratchpad-venvs"
+        else:
+            self._venvs_base = Path("~/.anton/scratchpad-venvs").expanduser()
+
+    # ── venv management ────────────────────────────────────────────────────
+
+    def _ensure_venv(self) -> None:
+        if self._venv_dir is not None and self._verify_venv_python():
+            return
+
+        venv_path = self._venvs_base / self.name
+        if venv_path.is_dir() and self._try_recycle_venv(venv_path):
+            return
+
+        if venv_path.is_dir():
+            self._nuke_venv()
+
+        last_error: Exception | None = None
+        for attempt in range(1, self._MAX_VENV_RETRIES + 1):
+            try:
+                self._create_venv()
+                if self._verify_venv_python():
+                    self._setup_parent_site_packages()
+                    self._save_python_version()
+                    return
+                raise RuntimeError(
+                    f"venv Python binary at {self._venv_python} is not functional"
+                )
+            except Exception as exc:
+                last_error = exc
+                self._nuke_venv()
+
+        raise RuntimeError(
+            f"Failed to create a working Python venv after {self._MAX_VENV_RETRIES} "
+            f"attempts. Last error: {last_error}. "
+            f"Try running: python3 -c 'print(\"ok\")' to verify your Python installation."
+        )
+
+    @staticmethod
+    def _find_uv() -> str | None:
+        uv = shutil.which("uv")
+        if uv:
+            return uv
+        if sys.platform == "win32":
+            candidates = (
+                os.path.expanduser("~/.local/bin/uv.exe"),
+                os.path.expanduser("~/.cargo/bin/uv.exe"),
+            )
+        else:
+            candidates = (
+                os.path.expanduser("~/.local/bin/uv"),
+                os.path.expanduser("~/.cargo/bin/uv"),
+            )
+        for candidate in candidates:
+            if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
+                return candidate
+        return None
+
+    def _create_venv(self) -> None:
+        import subprocess as _sp
+
+        self._venv_dir = str(self._venvs_base / self.name)
+        os.makedirs(self._venv_dir, exist_ok=True)
+
+        uv = self._find_uv()
+        if uv:
+            _sp.run(
+                [
+                    uv, "venv", self._venv_dir,
+                    "--python", sys.executable,
+                    "--system-site-packages", "--seed", "--quiet",
+                ],
+                check=True,
+                capture_output=True,
+                timeout=30,
+            )
+        else:
+            venv.create(
+                self._venv_dir,
+                system_site_packages=True,
+                with_pip=False,
+                clear=True,
+            )
+
+        if sys.platform == "win32":
+            bin_dir = os.path.join(self._venv_dir, "Scripts")
+            self._venv_python = os.path.join(bin_dir, "python.exe")
+            self._add_windows_firewall_rule()
+        else:
+            bin_dir = os.path.join(self._venv_dir, "bin")
+            self._venv_python = os.path.join(bin_dir, "python")
+
+    def _verify_venv_python(self) -> bool:
+        if self._venv_python is None:
+            return False
+        if not os.path.exists(self._venv_python):
+            return False
+        try:
+            import subprocess
+            result = subprocess.run(
+                [self._venv_python, "-c", "print('ok')"],
+                capture_output=True,
+                timeout=5,
+            )
+            return result.returncode == 0 and "ok" in result.stdout.decode()
+        except Exception:
+            return False
+
+    def _nuke_venv(self) -> None:
+        if self._venv_dir is not None:
+            try:
+                shutil.rmtree(self._venv_dir)
+            except OSError:
+                pass
+        self._venv_dir = None
+        self._venv_python = None
+
+    def _add_windows_firewall_rule(self) -> None:
+        if self._venv_python is None or not os.path.isfile(self._venv_python):
+            return
+        import subprocess as _sp
+        rule_name = f"Anton Scratchpad - {self.name}"
+        try:
+            _sp.run(
+                [
+                    "netsh", "advfirewall", "firewall", "add", "rule",
+                    f"name={rule_name}", "dir=out", "action=allow",
+                    f"program={self._venv_python}",
+                ],
+                capture_output=True,
+                timeout=10,
+            )
+        except Exception:
+            pass
+        self._installed_packages.clear()
+
+    def _setup_parent_site_packages(self) -> None:
+        if sys.prefix != sys.base_prefix:
+            import site as _site
+            parent_site = _site.getsitepackages()
+            child_site = None
+            for dirpath, dirnames, _ in os.walk(self._venv_dir):
+                if "site-packages" in dirnames:
+                    child_site = os.path.join(dirpath, "site-packages")
+                    break
+            if child_site and parent_site:
+                pth_path = os.path.join(child_site, "_parent_venv.pth")
+                with open(pth_path, "w") as f:
+                    for sp in parent_site:
+                        f.write(sp + "\n")
+
+    def _try_recycle_venv(self, venv_path: Path) -> bool:
+        try:
+            self._venv_dir = str(venv_path)
+            if sys.platform == "win32":
+                self._venv_python = os.path.join(
+                    self._venv_dir, "Scripts", "python.exe"
+                )
+            else:
+                self._venv_python = os.path.join(self._venv_dir, "bin", "python")
+
+            if not self._verify_venv_python():
+                return False
+            if not self._check_python_version():
+                return False
+            self._load_requirements()
+            self._setup_parent_site_packages()
+            return True
+        except Exception:
+            return False
+
+    def _save_requirements(self) -> None:
+        if not self._venv_dir or not self._installed_packages:
+            return
+        try:
+            req_path = os.path.join(self._venv_dir, "requirements.txt")
+            with open(req_path, "w") as f:
+                for pkg in sorted(self._installed_packages):
+                    f.write(pkg + "\n")
+        except OSError:
+            pass
+
+    def _load_requirements(self) -> None:
+        if not self._venv_dir:
+            return
+        req_path = os.path.join(self._venv_dir, "requirements.txt")
+        try:
+            with open(req_path) as f:
+                for line in f:
+                    pkg = line.strip()
+                    if pkg:
+                        self._installed_packages.add(pkg)
+        except FileNotFoundError:
+            pass
+
+    def _save_python_version(self) -> None:
+        if not self._venv_dir:
+            return
+        try:
+            ver_path = os.path.join(self._venv_dir, ".python_version")
+            with open(ver_path, "w") as f:
+                f.write(f"{sys.version_info.major}.{sys.version_info.minor}\n")
+        except OSError:
+            pass
+
+    def _check_python_version(self) -> bool:
+        if not self._venv_dir:
+            return False
+        ver_path = os.path.join(self._venv_dir, ".python_version")
+        try:
+            with open(ver_path) as f:
+                saved = f.read().strip()
+            expected = f"{sys.version_info.major}.{sys.version_info.minor}"
+            return saved == expected
+        except FileNotFoundError:
+            return False
+
+    # ── Lifecycle ──────────────────────────────────────────────────────────
+
+    async def start(self) -> None:
+        """Write the boot script to a temp file and launch the subprocess."""
+        self._ensure_venv()
+
+        boot_code = _BOOT_SCRIPT_PATH.read_text()
+        fd, path = tempfile.mkstemp(suffix=".py", prefix="anton_scratchpad_")
+        os.write(fd, boot_code.encode())
+        os.close(fd)
+        self._boot_path = path
+
+        env = os.environ.copy()
+        if self._coding_model:
+            env["ANTON_SCRATCHPAD_MODEL"] = self._coding_model
+        if self._coding_provider:
+            env["ANTON_SCRATCHPAD_PROVIDER"] = self._coding_provider
+        if "ANTHROPIC_API_KEY" not in env and "ANTON_ANTHROPIC_API_KEY" in env:
+            env["ANTHROPIC_API_KEY"] = env["ANTON_ANTHROPIC_API_KEY"]
+        if "OPENAI_API_KEY" not in env and "ANTON_OPENAI_API_KEY" in env:
+            env["OPENAI_API_KEY"] = env["ANTON_OPENAI_API_KEY"]
+        if "OPENAI_BASE_URL" not in env and "ANTON_OPENAI_BASE_URL" in env:
+            env["OPENAI_BASE_URL"] = env["ANTON_OPENAI_BASE_URL"]
+        if (
+            "OPENAI_API_KEY" not in env
+            and "ANTON_MINDS_API_KEY" in env
+            and self._coding_provider == "openai-compatible"
+        ):
+            env["OPENAI_API_KEY"] = env["ANTON_MINDS_API_KEY"]
+        if (
+            "OPENAI_BASE_URL" not in env
+            and "ANTON_MINDS_URL" in env
+            and self._coding_provider == "openai-compatible"
+        ):
+            env["OPENAI_BASE_URL"] = f"{env['ANTON_MINDS_URL'].rstrip('/')}/api/v1"
+        if self._coding_api_key:
+            sdk_key = {
+                "anthropic": "ANTHROPIC_API_KEY",
+                "openai": "OPENAI_API_KEY",
+                "openai-compatible": "OPENAI_API_KEY",
+            }.get(self._coding_provider, "")
+            if sdk_key:
+                env[sdk_key] = self._coding_api_key
+        if self._coding_provider in ("openai", "openai-compatible"):
+            base_url = (
+                self._coding_base_url
+                or env.get("ANTON_OPENAI_BASE_URL")
+                or env.get("OPENAI_BASE_URL")
+                or ""
+            )
+            if base_url:
+                env["OPENAI_BASE_URL"] = base_url
+                env["ANTON_OPENAI_BASE_URL"] = base_url
+        uv = self._find_uv()
+        if uv:
+            env["ANTON_UV_PATH"] = uv
+
+        _anton_root = str(Path(__file__).resolve().parent.parent.parent.parent)
+        python_path = env.get("PYTHONPATH", "")
+        if _anton_root not in python_path:
+            env["PYTHONPATH"] = (
+                _anton_root + (os.pathsep + python_path if python_path else "")
+            )
+
+        try:
+            self._proc = await asyncio.create_subprocess_exec(
+                self._venv_python,
+                path,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                env=env,
+                start_new_session=(sys.platform != "win32"),
+            )
+        except (FileNotFoundError, PermissionError, OSError) as exc:
+            self._nuke_venv()
+            raise RuntimeError(
+                f"Failed to start scratchpad: {exc}. "
+                "The Python venv has been deleted and will be recreated on next attempt."
+            ) from exc
+
+    async def reset(self) -> None:
+        """Kill the process, clear cells, and restart."""
+        await self._stop_process()
+        self.cells.clear()
+        if not self._verify_venv_python():
+            self._nuke_venv()
+        await self.start()
+
+    async def close(self) -> None:
+        """Kill the process and save requirements; preserve the venv."""
+        await self._stop_process()
+        if self._venv_dir is not None:
+            self._save_requirements()
+            self._venv_dir = None
+            self._venv_python = None
+
+    async def cancel(self) -> None:
+        """Kill the current cell and restart the runtime."""
+        if self._proc is None or self._proc.returncode is not None:
+            return
+        self._kill_tree()
+        try:
+            await asyncio.wait_for(self._proc.wait(), timeout=5)
+        except asyncio.TimeoutError:
+            pass
+        self.cells.append(
+            Cell(
+                code="# (cancelled by user)",
+                stdout="",
+                stderr="",
+                error="Cancelled by user.",
+                description="Cancelled",
+            )
+        )
+        self._proc = None
+        await self.start()
+
+    async def cleanup(self) -> None:
+        """Kill process and delete the venv entirely."""
+        await self._stop_process()
+        self._nuke_venv()
+
+    # ── Execution ──────────────────────────────────────────────────────────
+
+    async def execute_streaming(
+        self,
+        code: str,
+        *,
+        description: str = "",
+        estimated_time: str = "",
+        estimated_seconds: int = 0,
+    ):
+        """Async generator: yields progress strings then a final Cell."""
+        if self._proc is None or self._proc.returncode is not None:
+            yield Cell(
+                code=code,
+                stdout="",
+                stderr="",
+                error="Scratchpad process is not running. Use reset to restart.",
+                description=description,
+                estimated_time=estimated_time,
+            )
+            return
+
+        payload = code + "\n" + _CELL_DELIM + "\n"
+        self._proc.stdin.write(payload.encode())  # type: ignore[union-attr]
+        await self._proc.stdin.drain()  # type: ignore[union-attr]
+
+        total_timeout, inactivity_timeout = _compute_timeouts(estimated_seconds)
+
+        try:
+            result_data: dict | None = None
+            async for item in self._read_result(
+                total_timeout=total_timeout,
+                inactivity_timeout=inactivity_timeout,
+            ):
+                if isinstance(item, str):
+                    yield item
+                else:
+                    result_data = item
+        except (asyncio.TimeoutError, asyncio.CancelledError) as exc:
+            self._kill_tree()
+            try:
+                await asyncio.wait_for(self._proc.wait(), timeout=5)
+            except asyncio.TimeoutError:
+                pass
+            error_msg = (
+                f"{exc}. Process killed — state lost. Use reset to restart.\n\n"
+                "If a database query was running, it may still be executing server-side.\n"
+                "To check and cancel: run SHOW PROCESSLIST (MySQL) or\n"
+                "SELECT * FROM information_schema.processlist WHERE status='running' "
+                "and cancel with KILL <id>.\n"
+                "For Snowflake: use SHOW RUNNING QUERIES and "
+                "SELECT SYSTEM$CANCEL_ALL_QUERIES(<session_id>)."
+            )
+            cell = Cell(
+                code=code,
+                stdout="",
+                stderr="",
+                error=error_msg,
+                description=description,
+                estimated_time=estimated_time,
+            )
+            self.cells.append(cell)
+            yield cell
+            return
+        except Exception as exc:
+            cell = Cell(
+                code=code,
+                stdout="",
+                stderr="",
+                error=(
+                    f"Scratchpad result could not be read: {exc}. "
+                    "The scratchpad is still running — you can retry."
+                ),
+                description=description,
+                estimated_time=estimated_time,
+            )
+            self.cells.append(cell)
+            yield cell
+            return
+
+        if result_data is None:
+            result_data = {
+                "stdout": "",
+                "stderr": "",
+                "error": "Process exited unexpectedly.",
+            }
+
+        for pkg in result_data.get("auto_installed") or []:
+            self._installed_packages.add(pkg.lower())
+
+        cell = Cell(
+            code=code,
+            stdout=result_data.get("stdout", ""),
+            stderr=result_data.get("stderr", ""),
+            error=result_data.get("error"),
+            description=description,
+            estimated_time=estimated_time,
+            logs=result_data.get("logs", ""),
+        )
+        self.cells.append(cell)
+        yield cell
+
+    async def _read_result(
+        self,
+        *,
+        total_timeout: float = _CELL_TIMEOUT_DEFAULT,
+        inactivity_timeout: float = _CELL_INACTIVITY_TIMEOUT,
+    ):
+        """Read stdout until result delimiters; yield progress strings then dict."""
+        import time as _time
+
+        lines: list[str] = []
+        in_result = False
+        start = _time.monotonic()
+        current_inactivity = inactivity_timeout
+
+        while True:
+            elapsed = _time.monotonic() - start
+            remaining_total = total_timeout - elapsed
+            if remaining_total <= 0:
+                raise asyncio.TimeoutError(
+                    f"Cell timed out after {total_timeout:.0f}s total"
+                )
+
+            line_timeout = min(current_inactivity, remaining_total)
+            try:
+                raw = await asyncio.wait_for(
+                    self._proc.stdout.readline(),  # type: ignore[union-attr]
+                    timeout=line_timeout,
+                )
+            except asyncio.TimeoutError:
+                elapsed_now = _time.monotonic() - start
+                if elapsed_now >= total_timeout - 0.5:
+                    raise asyncio.TimeoutError(
+                        f"Cell timed out after {total_timeout:.0f}s total"
+                    ) from None
+                raise asyncio.TimeoutError(
+                    f"Cell killed after {current_inactivity:.0f}s of inactivity "
+                    "(no output or progress() calls)"
+                ) from None
+
+            if not raw:
+                yield {"stdout": "", "stderr": "", "error": "Process exited unexpectedly."}
+                return
+
+            line = raw.decode().rstrip("\r\n")
+
+            if line.startswith(_PROGRESS_MARKER):
+                current_inactivity = max(
+                    current_inactivity, _CELL_INACTIVITY_AFTER_PROGRESS
+                )
+                message = line[len(_PROGRESS_MARKER):].strip()
+                yield message
+                continue
+
+            if line == _RESULT_START:
+                in_result = True
+                continue
+            if line == _RESULT_END:
+                break
+            if in_result:
+                lines.append(line)
+
+        raw_text = "\n".join(lines)
+        try:
+            yield json.loads(raw_text)
+        except json.JSONDecodeError:
+            try:
+                start_idx = raw_text.index("{")
+                end_idx = raw_text.rindex("}") + 1
+                yield json.loads(raw_text[start_idx:end_idx])
+            except (ValueError, json.JSONDecodeError):
+                yield {
+                    "stdout": raw_text,
+                    "stderr": "",
+                    "logs": "",
+                    "error": "Scratchpad result was malformed (JSON parse failed). "
+                    "Output above may be partial.",
+                }
+
+    async def install_packages(self, packages: list[str]) -> str:
+        if not packages:
+            return "No packages specified."
+        needed = [p for p in packages if p.lower() not in self._installed_packages]
+        if not needed:
+            return "All packages already installed."
+        self._ensure_venv()
+
+        uv = self._find_uv()
+        if uv:
+            cmd = [uv, "pip", "install", "--python", self._venv_python, *needed]
+        else:
+            cmd = [self._venv_python, "-m", "pip", "install", "--no-input", *needed]
+
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+        )
+        try:
+            stdout, _ = await asyncio.wait_for(
+                proc.communicate(), timeout=_INSTALL_TIMEOUT
+            )
+        except asyncio.TimeoutError:
+            proc.kill()
+            await proc.wait()
+            return f"Install timed out after {_INSTALL_TIMEOUT}s."
+        output = stdout.decode()
+        if proc.returncode != 0:
+            return f"Install failed (exit {proc.returncode}):\n{output}"
+        for p in needed:
+            self._installed_packages.add(p.lower())
+        return output
+
+    # ── Internal helpers ───────────────────────────────────────────────────
+
+    async def _stop_process(self) -> None:
+        if self._proc is not None and self._proc.returncode is None:
+            try:
+                self._kill_tree()
+                await asyncio.wait_for(self._proc.wait(), timeout=5)
+            except (ProcessLookupError, asyncio.TimeoutError):
+                pass
+        if self._proc is not None:
+            pipe = self._proc.stdin
+            if pipe is not None:
+                if hasattr(pipe, "is_closing"):
+                    if not pipe.is_closing():
+                        pipe.close()
+                else:
+                    pipe.close()
+        self._proc = None
+        if self._boot_path is not None:
+            try:
+                os.unlink(self._boot_path)
+            except OSError:
+                pass
+            self._boot_path = None
+
+    def _kill_tree(self) -> None:
+        if self._proc is None or self._proc.returncode is not None:
+            return
+        pid = self._proc.pid
+        if sys.platform != "win32":
+            import signal
+            try:
+                os.killpg(pid, signal.SIGKILL)
+            except (ProcessLookupError, PermissionError):
+                try:
+                    self._proc.kill()
+                except ProcessLookupError:
+                    pass
+        else:
+            self._proc.kill()

From f3ebf2447b6e1af065988c205e97930e249f614d Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:07:35 +0200
Subject: [PATCH 068/134] New base scratchpad interface

---
 anton/core/backends/base.py | 257 ++++++++++++++++++++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 anton/core/backends/base.py

diff --git a/anton/core/backends/base.py b/anton/core/backends/base.py
new file mode 100644
index 00000000..a765e292
--- /dev/null
+++ b/anton/core/backends/base.py
@@ -0,0 +1,257 @@
+"""ScratchpadRuntime ABC — pluggable scratchpad backend contract.
+
+Core interface shared by LocalScratchpadRuntime (CLI) and
+our cloud ScratchpadRuntime (Enterprise).
+"""
+
+from __future__ import annotations
+
+import json
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+
+_CELL_TIMEOUT_DEFAULT = 120        # Default total timeout when no estimate given
+_CELL_INACTIVITY_TIMEOUT = 30      # Max silence between output lines before killing
+_CELL_INACTIVITY_AFTER_PROGRESS = 60  # Grace window after a progress() call
+_KEEP_RECENT = 5                   # Number of recent cells to keep during compaction
+_INSTALL_TIMEOUT = 120
+
+_CELL_DELIM = "__ANTON_CELL_END__"
+_RESULT_START = "__ANTON_RESULT__"
+_RESULT_END = "__ANTON_RESULT_END__"
+_PROGRESS_MARKER = "__ANTON_PROGRESS__"
+
+
+@dataclass
+class Cell:
+    """A single scratchpad execution unit — code in, outputs out."""
+    code: str
+    stdout: str
+    stderr: str
+    error: str | None
+    description: str = ""
+    estimated_time: str = ""
+    logs: str = ""
+
+
+def _compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
+    """Compute (total_timeout, inactivity_timeout) from an estimated run time.
+
+    - estimate == 0: use defaults (120s total, 30s inactivity).
+    - Otherwise: total = max(estimate * 2, estimate + 30), no hard cap.
+      inactivity = max(estimate * 0.5, 30), scales with estimate.
+    """
+    if estimated_seconds <= 0:
+        return float(_CELL_TIMEOUT_DEFAULT), float(_CELL_INACTIVITY_TIMEOUT)
+    total = max(estimated_seconds * 2, estimated_seconds + 30)
+    inactivity = max(estimated_seconds * 0.5, 30)
+    return float(total), float(inactivity)
+
+
+class ScratchpadRuntime(ABC):
+    """Abstract base class for scratchpad execution backends.
+
+    Concrete implementations provide a specific execution environment
+    (local venv, Docker container, etc.). The shared display, compaction,
+    and timeout logic lives here so all backends benefit automatically.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        *,
+        cells: list[Cell] | None = None,
+        coding_provider: str = "anthropic",
+        coding_model: str = "",
+        coding_api_key: str = "",
+        workspace_path: Path | None = None,
+    ) -> None:
+        self.name = name
+        self.cells: list[Cell] = cells if cells is not None else []
+        self._coding_provider = coding_provider
+        self._coding_model = coding_model
+        self._coding_api_key = coding_api_key
+        self._workspace_path = workspace_path or Path("~/.anton").expanduser()
+        self._installed_packages: set[str] = set()
+
+    @abstractmethod
+    async def start(self) -> None:
+        """Launch the runtime environment."""
+
+    @abstractmethod
+    async def reset(self) -> None:
+        """Kill the runtime, clear cells, and restart."""
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Shut down the runtime, preserving any persistent resources."""
+
+    @abstractmethod
+    async def cancel(self) -> None:
+        """Cancel the currently running cell and restart the runtime."""
+
+    @abstractmethod
+    async def install_packages(self, packages: list[str]) -> str:
+        """Install Python packages into the runtime environment."""
+
+    @abstractmethod
+    async def execute_streaming(
+        self,
+        code: str,
+        *,
+        description: str = "",
+        estimated_time: str = "",
+        estimated_seconds: int = 0,
+    ):
+        """Execute code and yield progress strings then a final Cell."""
+
+    @abstractmethod
+    async def cleanup(self) -> None:
+        """Release all backend-specific resources (venv dir, containers, etc.).
+
+        Called by ScratchpadManager.remove() to fully destroy this runtime.
+        Unlike close(), cleanup() removes persistent storage too.
+        """
+
+    async def execute(
+        self,
+        code: str,
+        *,
+        description: str = "",
+        estimated_time: str = "",
+        estimated_seconds: int = 0,
+    ) -> Cell:
+        """Drain execute_streaming() and return just the final Cell."""
+        async for item in self.execute_streaming(
+            code,
+            description=description,
+            estimated_time=estimated_time,
+            estimated_seconds=estimated_seconds,
+        ):
+            if isinstance(item, Cell):
+                return item
+        return Cell(code=code, stdout="", stderr="", error="No result produced.")
+
+    def view(self) -> str:
+        """Format all cells with their outputs for LLM consumption."""
+        if not self.cells:
+            return f"Scratchpad '{self.name}' is empty."
+
+        parts: list[str] = []
+        for i, cell in enumerate(self.cells):
+            header = f"--- Cell {i + 1}"
+            if cell.description:
+                header += f": {cell.description}"
+            header += " ---"
+            parts.append(header)
+            parts.append(cell.code)
+            if cell.stdout:
+                parts.append(f"[output]\n{cell.stdout}")
+            if cell.logs:
+                parts.append(f"[logs]\n{cell.logs}")
+            if cell.stderr:
+                parts.append(f"[stderr]\n{cell.stderr}")
+            if cell.error:
+                parts.append(f"[error]\n{cell.error}")
+            if not cell.stdout and not cell.logs and not cell.stderr and not cell.error:
+                parts.append("(no output)")
+        return "\n".join(parts)
+
+    def render_notebook(self) -> str:
+        """Return a clean markdown notebook-style summary of all cells."""
+        numbered: list[tuple[int, Cell]] = []
+        idx = 0
+        for cell in self.cells:
+            idx += 1
+            if not cell.code.strip():
+                continue
+            numbered.append((idx, cell))
+
+        if not numbered:
+            return f"Scratchpad '{self.name}' has no cells."
+
+        parts: list[str] = [f"## Scratchpad: {self.name} ({len(numbered)} cells)"]
+
+        for i, (num, cell) in enumerate(numbered):
+            header = f"\n### Cell {num}"
+            if cell.description:
+                header += f" \u2014 {cell.description}"
+            parts.append(header)
+            parts.append(f"```python\n{cell.code}\n```\n")
+
+            if cell.error:
+                last_line = cell.error.strip().split("\n")[-1]
+                parts.append(f"**Error:** `{last_line}`")
+                if cell.stdout:
+                    truncated = self._truncate_output(cell.stdout.rstrip("\n"))
+                    parts.append(f"**Partial output:**\n```\n{truncated}\n```\n")
+            elif cell.stdout:
+                truncated = self._truncate_output(cell.stdout.rstrip("\n"))
+                parts.append(f"**Output:**\n```\n{truncated}\n```\n")
+
+            if cell.logs:
+                truncated_logs = self._truncate_output(
+                    cell.logs.rstrip("\n"), max_lines=10, max_chars=1000
+                )
+                parts.append(f"**Logs:**\n```\n{truncated_logs}\n```\n")
+
+            if i < len(numbered) - 1:
+                parts.append("---")
+
+        return "\n".join(parts)
+
+    @staticmethod
+    def _truncate_output(text: str, max_lines: int = 20, max_chars: int = 2000) -> str:
+        """Truncate output to max_lines / max_chars, whichever is shorter."""
+        lines = text.split("\n")
+        if len(lines) > max_lines:
+            kept = "\n".join(lines[:max_lines])
+            remaining = len(lines) - max_lines
+            return kept + f"\n... ({remaining} more lines)"
+        if len(text) > max_chars:
+            total = 0
+            kept_lines: list[str] = []
+            for line in lines:
+                if total + len(line) + 1 > max_chars and kept_lines:
+                    break
+                kept_lines.append(line)
+                total += len(line) + 1
+            return "\n".join(kept_lines) + "\n... (truncated)"
+        return text
+
+    def _compact_cells(self) -> bool:
+        """Collapse old cells into a summary cell to reduce context size.
+
+        Keeps the most recent _KEEP_RECENT cells intact. Returns True if
+        compaction actually happened.
+        """
+        if len(self.cells) <= _KEEP_RECENT + 1:
+            return False
+
+        to_compact = self.cells[:-_KEEP_RECENT]
+        recent = self.cells[-_KEEP_RECENT:]
+
+        summary_lines: list[str] = []
+        for i, cell in enumerate(to_compact, 1):
+            status = "error" if cell.error else "ok"
+            desc = cell.description or f"Cell {i}"
+            first_line = ""
+            output = cell.stdout or cell.error or ""
+            if output:
+                first_line = output.strip().split("\n")[0][:120]
+            summary_lines.append(f"  [{status}] {desc}: {first_line}")
+
+        summary_text = (
+            f"# Compacted {len(to_compact)} earlier cells:\n"
+            + "\n".join(summary_lines)
+        )
+        summary_cell = Cell(
+            code="# (compacted — see summary above)",
+            stdout=summary_text,
+            stderr="",
+            error=None,
+            description=f"Summary of cells 1\u2013{len(to_compact)}",
+        )
+        self.cells = [summary_cell] + recent
+        return True
\ No newline at end of file

From 0a61d7a34e5b7bc45303fdd8828ba4b2e9f4bac7 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:07:40 +0200
Subject: [PATCH 069/134] Fix tests

---
 tests/test_scratchpad.py | 42 +++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/tests/test_scratchpad.py b/tests/test_scratchpad.py
index e26f3b2c..eeea042e 100644
--- a/tests/test_scratchpad.py
+++ b/tests/test_scratchpad.py
@@ -5,8 +5,13 @@
 
 import pytest
 
-import anton.scratchpad as scratchpad_module
-from anton.scratchpad import Cell, Scratchpad, ScratchpadManager
+import anton.core.backends.base as backends_base
+from anton.core.backends.base import Cell
+from anton.core.backends.local import LocalScratchpadRuntime
+from anton.core.backends.manager import ScratchpadManager
+
+# Alias for brevity in tests
+Scratchpad = LocalScratchpadRuntime
 
 
 class TestScratchpadBasicExecution:
@@ -110,8 +115,8 @@ async def test_reset_clears_state(self):
 class TestScratchpadEdgeCases:
     async def test_timeout_kills_process(self, monkeypatch):
         """Long-running code triggers timeout."""
-        monkeypatch.setattr(scratchpad_module, "_CELL_TIMEOUT_DEFAULT", 1)
-        monkeypatch.setattr(scratchpad_module, "_CELL_INACTIVITY_TIMEOUT", 1)
+        monkeypatch.setattr(backends_base, "_CELL_TIMEOUT_DEFAULT", 1)
+        monkeypatch.setattr(backends_base, "_CELL_INACTIVITY_TIMEOUT", 1)
         pad = Scratchpad(name="test")
         await pad.start()
         try:
@@ -298,7 +303,7 @@ async def test_render_notebook_hides_stderr_without_error(self):
     async def test_truncate_output_lines(self):
         """Respects line limit."""
         text = "\n".join(f"line {i}" for i in range(50))
-        result = Scratchpad._truncate_output(text, max_lines=10)
+        result = LocalScratchpadRuntime._truncate_output(text, max_lines=10)
         assert "line 0" in result
         assert "line 9" in result
         assert "line 10" not in result
@@ -307,7 +312,7 @@ async def test_truncate_output_lines(self):
     async def test_truncate_output_chars(self):
         """Respects char limit."""
         text = "\n".join("x" * 80 for _ in range(5))
-        result = Scratchpad._truncate_output(text, max_lines=100, max_chars=200)
+        result = LocalScratchpadRuntime._truncate_output(text, max_lines=100, max_chars=200)
         assert "(truncated)" in result
         assert len(result) < len(text)
 
@@ -402,7 +407,7 @@ async def test_env_vars_accessible(self, monkeypatch):
 
     async def test_get_llm_available_when_model_set(self):
         """get_llm() should be injected when ANTON_SCRATCHPAD_MODEL is set."""
-        pad = Scratchpad(name="llm-test", _coding_model="claude-test-model")
+        pad = Scratchpad(name="llm-test", coding_model="claude-test-model")
         await pad.start()
         try:
             cell = await pad.execute("llm = get_llm(); print(llm.model)")
@@ -424,7 +429,7 @@ async def test_get_llm_not_available_without_model(self):
 
     async def test_agentic_loop_available_when_model_set(self):
         """agentic_loop() should be injected alongside get_llm()."""
-        pad = Scratchpad(name="agentic-test", _coding_model="claude-test-model")
+        pad = Scratchpad(name="agentic-test", coding_model="claude-test-model")
         await pad.start()
         try:
             cell = await pad.execute("print(callable(agentic_loop))")
@@ -446,7 +451,7 @@ async def test_agentic_loop_not_available_without_model(self):
 
     async def test_generate_object_available_when_model_set(self):
         """generate_object() should be available on the LLM wrapper."""
-        pad = Scratchpad(name="genobj-test", _coding_model="claude-test-model")
+        pad = Scratchpad(name="genobj-test", coding_model="claude-test-model")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -462,7 +467,7 @@ async def test_api_key_bridged(self, monkeypatch):
         monkeypatch.setenv("ANTON_ANTHROPIC_API_KEY", "sk-ant-test-123")
         # Remove ANTHROPIC_API_KEY if set, to test the bridge
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
-        pad = Scratchpad(name="key-test", _coding_model="test-model")
+        pad = Scratchpad(name="key-test", coding_model="test-model")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -627,10 +632,7 @@ async def test_venv_nuked_on_corruption(self, tmp_path):
     async def test_remove_deletes_persistent_venv(self, tmp_path):
         """ScratchpadManager.remove() fully deletes the persistent venv dir."""
         import shutil
-        venvs_base = tmp_path / "venvs"
         mgr = ScratchpadManager(workspace_path=tmp_path)
-        # Override base to use our tmp dir
-        mgr._venvs_base = venvs_base
         try:
             pad = await mgr.get_or_create("deleteme")
             venv_dir = pad._venv_dir
@@ -639,7 +641,7 @@ async def test_remove_deletes_persistent_venv(self, tmp_path):
             assert not os.path.exists(venv_dir)
         finally:
             await mgr.close_all()
-            shutil.rmtree(venvs_base, ignore_errors=True)
+            shutil.rmtree(tmp_path / ".anton", ignore_errors=True)
 
     async def test_requirements_saved_on_close(self, tmp_path):
         """requirements.txt is written when pad has installed packages."""
@@ -721,8 +723,8 @@ async def test_progress_function_available_in_namespace(self):
 
     async def test_progress_resets_inactivity_timeout(self, monkeypatch):
         """Code that calls progress() frequently should survive even with a short inactivity timeout."""
-        monkeypatch.setattr(scratchpad_module, "_CELL_INACTIVITY_TIMEOUT", 2)
-        monkeypatch.setattr(scratchpad_module, "_CELL_TIMEOUT_DEFAULT", 10)
+        monkeypatch.setattr(backends_base, "_CELL_INACTIVITY_TIMEOUT", 2)
+        monkeypatch.setattr(backends_base, "_CELL_TIMEOUT_DEFAULT", 10)
         pad = Scratchpad(name="progress-keep-alive")
         await pad.start()
         try:
@@ -741,8 +743,8 @@ async def test_progress_resets_inactivity_timeout(self, monkeypatch):
 
     async def test_inactivity_timeout_kills_without_progress(self, monkeypatch):
         """Code that sleeps without progress() calls should be killed by inactivity timeout."""
-        monkeypatch.setattr(scratchpad_module, "_CELL_INACTIVITY_TIMEOUT", 2)
-        monkeypatch.setattr(scratchpad_module, "_CELL_TIMEOUT_DEFAULT", 60)
+        monkeypatch.setattr(backends_base, "_CELL_INACTIVITY_TIMEOUT", 2)
+        monkeypatch.setattr(backends_base, "_CELL_TIMEOUT_DEFAULT", 60)
         pad = Scratchpad(name="no-progress")
         await pad.start()
         try:
@@ -780,14 +782,14 @@ async def test_execute_streaming_yields_progress(self):
 
     async def test_compute_timeouts_no_estimate(self):
         """No estimate should use defaults."""
-        from anton.scratchpad import _compute_timeouts
+        from anton.core.backends.base import _compute_timeouts
         total, inactivity = _compute_timeouts(0)
         assert total == 120.0
         assert inactivity == 30.0
 
     async def test_compute_timeouts_with_estimate(self):
         """Estimate should scale total timeout and inactivity with no hard cap."""
-        from anton.scratchpad import _compute_timeouts
+        from anton.core.backends.base import _compute_timeouts
 
         # Small estimate: max(10*2, 10+30) = max(20, 40) = 40
         total, inactivity = _compute_timeouts(10)

From 468c4bedb56440c752617f84ce24b41d6c230769 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:11:06 +0200
Subject: [PATCH 070/134] Move boot

---
 anton/core/backends/local.py |   2 +-
 anton/scratchpad_boot.py     | 631 -----------------------------------
 2 files changed, 1 insertion(+), 632 deletions(-)
 delete mode 100644 anton/scratchpad_boot.py

diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index d16debb8..78033665 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -25,7 +25,7 @@
     _compute_timeouts,
 )
 
-_BOOT_SCRIPT_PATH = Path(__file__).parent.parent.parent / "scratchpad_boot.py"
+_BOOT_SCRIPT_PATH = Path(__file__).parent / "scratchpad_boot.py"
 _MAX_OUTPUT = 10_000
 
 
diff --git a/anton/scratchpad_boot.py b/anton/scratchpad_boot.py
deleted file mode 100644
index 7f205ea5..00000000
--- a/anton/scratchpad_boot.py
+++ /dev/null
@@ -1,631 +0,0 @@
-import io
-import json
-import os
-import sys
-import traceback
-
-_CELL_DELIM = "__ANTON_CELL_END__"
-_RESULT_START = "__ANTON_RESULT__"
-_RESULT_END = "__ANTON_RESULT_END__"
-
-# Persistent namespace across cells
-namespace = {"__builtins__": __builtins__}
-
-# --- Inject get_llm() for LLM access from scratchpad code ---
-_scratchpad_model = os.environ.get("ANTON_SCRATCHPAD_MODEL", "")
-if _scratchpad_model:
-    try:
-        import asyncio as _llm_asyncio
-
-        _scratchpad_provider_name = os.environ.get("ANTON_SCRATCHPAD_PROVIDER", "anthropic")
-        if _scratchpad_provider_name in ("openai", "openai-compatible"):
-            from anton.core.llm.openai import OpenAIProvider as _ProviderClass
-        else:
-            from anton.core.llm.anthropic import AnthropicProvider as _ProviderClass
-
-        _llm_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
-        if _scratchpad_provider_name in ("openai", "openai-compatible"):
-            # Explicitly pass base_url so Minds/openai-compatible endpoints work.
-            # The OpenAI SDK may or may not pick up OPENAI_BASE_URL from env,
-            # so we pass it directly to be safe.
-            _llm_base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get("ANTON_OPENAI_BASE_URL")
-            _llm_api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("ANTON_OPENAI_API_KEY")
-            _llm_provider = _ProviderClass(
-                api_key=_llm_api_key or None,
-                base_url=_llm_base_url or None,
-                ssl_verify=_llm_ssl_verify,
-            )
-        else:
-            _llm_provider = _ProviderClass()  # Anthropic doesn't need ssl_verify
-        _llm_model = _scratchpad_model
-
-        _LLM_HEARTBEAT_INTERVAL = 10  # seconds between heartbeats during LLM calls
-
-        async def _run_with_heartbeat(coro):
-            """Run an async coroutine while emitting progress heartbeats.
-
-            LLM API calls can block for 30s+.  Without heartbeats, the
-            scratchpad inactivity timeout (30s) kills the process.  This
-            wrapper runs a heartbeat task alongside the real work.
-            """
-            async def _heartbeat():
-                elapsed = 0
-                while True:
-                    await _llm_asyncio.sleep(_LLM_HEARTBEAT_INTERVAL)
-                    elapsed += _LLM_HEARTBEAT_INTERVAL
-                    _real_stdout.write(
-                        _PROGRESS_MARKER + f" Waiting for LLM… ({elapsed}s)\n"
-                    )
-                    _real_stdout.flush()
-
-            beat = _llm_asyncio.create_task(_heartbeat())
-            try:
-                return await coro
-            finally:
-                beat.cancel()
-                try:
-                    await beat
-                except _llm_asyncio.CancelledError:
-                    pass
-
-        class _ScratchpadLLM:
-            """Sync LLM wrapper for scratchpad use. Mirrors SkillLLM interface."""
-
-            @property
-            def model(self):
-                return _llm_model
-
-            def complete(self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096):
-                """Call the LLM synchronously. Returns an LLMResponse.
-
-                Automatically emits progress heartbeats every 10s so that
-                long API calls don't trip the scratchpad inactivity timeout.
-                """
-                return _llm_asyncio.run(_run_with_heartbeat(
-                    _llm_provider.complete(
-                        model=_llm_model,
-                        system=system,
-                        messages=messages,
-                        tools=tools,
-                        tool_choice=tool_choice,
-                        max_tokens=max_tokens,
-                    )
-                ))
-
-            async def complete_async(self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096):
-                """Call the LLM asynchronously. Returns an LLMResponse.
-
-                Use this inside async code (e.g. asyncio.gather) for concurrent
-                LLM calls.  Emits heartbeats automatically like complete().
-                """
-                return await _run_with_heartbeat(
-                    _llm_provider.complete(
-                        model=_llm_model,
-                        system=system,
-                        messages=messages,
-                        tools=tools,
-                        tool_choice=tool_choice,
-                        max_tokens=max_tokens,
-                    )
-                )
-
-            def generate_object(self, schema_class, *, system, messages, max_tokens=4096):
-                """Generate a structured object matching a Pydantic model.
-
-                Uses tool_choice to force the LLM to return structured data.
-                Supports single models and list[Model].
-
-                Args:
-                    schema_class: A Pydantic BaseModel subclass, or list[Model].
-                    system: System prompt.
-                    messages: Conversation messages.
-                    max_tokens: Max tokens for the LLM call.
-
-                Returns:
-                    An instance of schema_class (or a list of instances).
-                """
-                from pydantic import BaseModel as _BaseModel
-
-                is_list = hasattr(schema_class, "__origin__") and schema_class.__origin__ is list
-                if is_list:
-                    inner_class = schema_class.__args__[0]
-
-                    class _ArrayWrapper(_BaseModel):
-                        items: list[inner_class]
-
-                    schema = _ArrayWrapper.model_json_schema()
-                    tool_name = f"{inner_class.__name__}_array"
-                else:
-                    schema = schema_class.model_json_schema()
-                    tool_name = schema_class.__name__
-
-                tool = {
-                    "name": tool_name,
-                    "description": f"Generate structured output matching the {tool_name} schema.",
-                    "input_schema": schema,
-                }
-
-                response = self.complete(
-                    system=system,
-                    messages=messages,
-                    tools=[tool],
-                    tool_choice={"type": "tool", "name": tool_name},
-                    max_tokens=max_tokens,
-                )
-
-                if not response.tool_calls:
-                    raise ValueError("LLM did not return structured output.")
-
-                import json as _json
-                raw = response.tool_calls[0].input
-
-                if is_list:
-                    wrapper = _ArrayWrapper.model_validate(raw)
-                    return wrapper.items
-                return schema_class.model_validate(raw)
-
-        _scratchpad_llm_instance = _ScratchpadLLM()
-
-        def get_llm():
-            """Get a pre-configured LLM client. No API keys needed."""
-            return _scratchpad_llm_instance
-
-        def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_tokens=4096):
-            """Run a synchronous LLM tool-call loop.
-
-            The LLM reasons, calls tools via handle_tool(name, inputs) -> str,
-            and iterates until it produces a final text response.
-
-            Args:
-                system: System prompt for the LLM.
-                user_message: Initial user message.
-                tools: Tool definitions (Anthropic tool schema format).
-                handle_tool: Callback (tool_name, tool_input) -> result_string.
-                max_turns: Safety limit on LLM round-trips (default 10).
-                max_tokens: Max tokens per LLM call.
-
-            Returns:
-                The final text response from the LLM.
-            """
-            llm = get_llm()
-            messages = [{"role": "user", "content": user_message}]
-
-            response = None
-            for _ in range(max_turns):
-                response = llm.complete(
-                    system=system,
-                    messages=messages,
-                    tools=tools,
-                    max_tokens=max_tokens,
-                )
-
-                if not response.tool_calls:
-                    return response.content
-
-                # Build assistant message with text + tool_use blocks
-                assistant_content = []
-                if response.content:
-                    assistant_content.append({"type": "text", "text": response.content})
-                for tc in response.tool_calls:
-                    assistant_content.append({
-                        "type": "tool_use",
-                        "id": tc.id,
-                        "name": tc.name,
-                        "input": tc.input,
-                    })
-                messages.append({"role": "assistant", "content": assistant_content})
-
-                # Execute each tool and collect results
-                tool_results = []
-                for tc in response.tool_calls:
-                    try:
-                        result = handle_tool(tc.name, tc.input)
-                    except Exception as exc:
-                        result = f"Error: {exc}"
-                    tool_results.append({
-                        "type": "tool_result",
-                        "tool_use_id": tc.id,
-                        "content": result,
-                    })
-                messages.append({"role": "user", "content": tool_results})
-
-            # Hit max_turns
-            return response.content if response else ""
-
-        namespace["get_llm"] = get_llm
-        namespace["agentic_loop"] = agentic_loop
-    except Exception:
-        pass  # LLM not available — not fatal (e.g. anthropic not installed)
-
-# --- Inject query_minds_data() for Minds datasource access from scratchpad ---
-_minds_datasource = os.environ.get("ANTON_MINDS_DATASOURCE", "")
-_minds_api_key = os.environ.get("ANTON_MINDS_API_KEY", "")
-_minds_url = os.environ.get("ANTON_MINDS_URL", "")
-if _minds_datasource and _minds_api_key and _minds_url:
-    try:
-        import ssl as _minds_ssl
-        import urllib.request as _minds_urllib
-
-        _minds_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
-
-        def query_minds_data(query, datasource=None):
-            """Query a Minds datasource with SQL. Returns dict with type, data, column_names, error_message."""
-            ds = datasource or _minds_datasource
-            url = f"{_minds_url}/api/v1/datasources/{ds}/query"
-            payload = json.dumps({"query": query, "native_query": True}).encode()
-
-            req = _minds_urllib.Request(url, data=payload, method="POST")
-            req.add_header("Authorization", f"Bearer {_minds_api_key}")
-            req.add_header("Content-Type", "application/json")
-            req.add_header("Accept", "application/json")
-            req.add_header("User-Agent", "Mozilla/5.0 (compatible; Anton/1.0; +https://github.com/mindsdb/anton)")
-            req.add_header("Accept-Language", "en-US,en;q=0.9")
-            req.add_header("Accept-Encoding", "identity")
-            req.add_header("Connection", "keep-alive")
-
-            ctx = None
-            if not _minds_ssl_verify:
-                ctx = _minds_ssl.create_default_context()
-                ctx.check_hostname = False
-                ctx.verify_mode = _minds_ssl.CERT_NONE
-
-            try:
-                with _minds_urllib.urlopen(req, context=ctx, timeout=60) as resp:
-                    return json.loads(resp.read().decode())
-            except _minds_urllib.HTTPError as e:
-                body = ""
-                try:
-                    body = e.read().decode()
-                except Exception:
-                    pass
-                return {
-                    "type": "error",
-                    "data": None,
-                    "column_names": None,
-                    "error_message": f"HTTP {e.code}: {body or e.reason}",
-                }
-            except Exception as e:
-                return {
-                    "type": "error",
-                    "data": None,
-                    "column_names": None,
-                    "error_message": str(e),
-                }
-
-        namespace["query_minds_data"] = query_minds_data
-    except Exception:
-        pass  # Minds query not available — not fatal
-
-# Read-execute loop
-_real_stdout = sys.stdout
-_real_stdin = sys.stdin
-
-_PROGRESS_MARKER = "__ANTON_PROGRESS__"
-_MAX_OUTPUT = 10_000
-
-def progress(message=""):
-    """Signal that long-running work is still active. Resets the inactivity timer."""
-    _real_stdout.write(_PROGRESS_MARKER + " " + str(message) + "\n")
-    _real_stdout.flush()
-
-namespace["progress"] = progress
-
-def sample(var, mode="preview", _name=None):
-    """Inspect a variable with type-aware formatting.
-
-    Args:
-        var: The variable to inspect.
-        mode: "preview" (default) — compact summary. "full" — complete dump.
-        _name: Optional label printed as header (auto-detected when possible).
-
-    Prints formatted output to stdout (captured by the cell).
-    """
-    _MAX_PREVIEW = 2000
-    _MAX_FULL = 10000
-    limit = _MAX_PREVIEW if mode == "preview" else _MAX_FULL
-
-    header = f"[sample:{type(var).__name__}]"
-    if _name:
-        header = f"[sample:{_name} ({type(var).__name__})]"
-
-    lines = [header]
-
-    try:
-        import pandas as _pd
-        if isinstance(var, _pd.DataFrame):
-            lines.append(f"Shape: {var.shape[0]} rows x {var.shape[1]} cols")
-            lines.append(f"Columns: {list(var.columns)}")
-            lines.append(f"Dtypes:\n{var.dtypes.to_string()}")
-            if mode == "preview":
-                lines.append(f"\nHead (5 rows):\n{var.head().to_string()}")
-                if var.shape[0] > 5:
-                    lines.append(f"\nTail (3 rows):\n{var.tail(3).to_string()}")
-                nulls = var.isnull().sum()
-                nulls = nulls[nulls > 0]
-                if len(nulls) > 0:
-                    lines.append(f"\nNull counts:\n{nulls.to_string()}")
-            else:
-                lines.append(f"\nDescribe:\n{var.describe(include='all').to_string()}")
-                n = min(50, var.shape[0])
-                lines.append(f"\nFirst {n} rows:\n{var.head(n).to_string()}")
-                nulls = var.isnull().sum()
-                nulls = nulls[nulls > 0]
-                if len(nulls) > 0:
-                    lines.append(f"\nNull counts:\n{nulls.to_string()}")
-            print(_truncate_sample("\n".join(lines), limit))
-            return
-
-        if isinstance(var, _pd.Series):
-            lines.append(f"Length: {len(var)}, Dtype: {var.dtype}, Name: {var.name}")
-            if mode == "preview":
-                lines.append(f"\nHead (10):\n{var.head(10).to_string()}")
-            else:
-                lines.append(f"\nDescribe:\n{var.describe().to_string()}")
-                n = min(50, len(var))
-                lines.append(f"\nFirst {n}:\n{var.head(n).to_string()}")
-            print(_truncate_sample("\n".join(lines), limit))
-            return
-    except ImportError:
-        pass
-
-    try:
-        import numpy as _np
-        if isinstance(var, _np.ndarray):
-            lines.append(f"Shape: {var.shape}, Dtype: {var.dtype}")
-            if mode == "preview":
-                flat = var.flatten()
-                n = min(10, len(flat))
-                lines.append(f"First {n} values: {flat[:n].tolist()}")
-                if len(flat) > 10:
-                    lines.append(f"Last 3 values: {flat[-3:].tolist()}")
-                lines.append(f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}")
-            else:
-                lines.append(f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}, Std: {var.std():.4g}")
-                lines.append(f"\n{repr(var)}")
-            print(_truncate_sample("\n".join(lines), limit))
-            return
-    except ImportError:
-        pass
-
-    if isinstance(var, dict):
-        lines.append(f"Keys ({len(var)}): {list(var.keys())[:20]}")
-        if len(var) > 20:
-            lines[-1] += f" ... (+{len(var) - 20} more)"
-        if mode == "preview":
-            for i, (k, v) in enumerate(var.items()):
-                if i >= 10:
-                    lines.append(f"  ... ({len(var) - 10} more entries)")
-                    break
-                val_repr = repr(v)
-                if len(val_repr) > 120:
-                    val_repr = val_repr[:120] + "..."
-                lines.append(f"  {k!r}: {val_repr}")
-        else:
-            import json as _json
-            try:
-                lines.append(_json.dumps(var, indent=2, default=str))
-            except (TypeError, ValueError):
-                lines.append(repr(var))
-        print(_truncate_sample("\n".join(lines), limit))
-        return
-
-    if isinstance(var, (list, tuple)):
-        kind = type(var).__name__
-        lines.append(f"Length: {len(var)}")
-        if len(var) > 0:
-            lines.append(f"Item types: {type(var[0]).__name__}" +
-                         (f" (mixed)" if len(var) > 1 and type(var[0]) != type(var[-1]) else ""))
-        if mode == "preview":
-            n = min(5, len(var))
-            for i in range(n):
-                val_repr = repr(var[i])
-                if len(val_repr) > 200:
-                    val_repr = val_repr[:200] + "..."
-                lines.append(f"  [{i}] {val_repr}")
-            if len(var) > 5:
-                lines.append(f"  ... ({len(var) - 5} more)")
-                val_repr = repr(var[-1])
-                if len(val_repr) > 200:
-                    val_repr = val_repr[:200] + "..."
-                lines.append(f"  [{len(var) - 1}] {val_repr}")
-        else:
-            for i, item in enumerate(var):
-                val_repr = repr(item)
-                if len(val_repr) > 500:
-                    val_repr = val_repr[:500] + "..."
-                lines.append(f"  [{i}] {val_repr}")
-        print(_truncate_sample("\n".join(lines), limit))
-        return
-
-    if isinstance(var, (set, frozenset)):
-        lines.append(f"Length: {len(var)}")
-        items = sorted(var, key=repr)
-        if mode == "preview":
-            for item in items[:10]:
-                lines.append(f"  {repr(item)}")
-            if len(items) > 10:
-                lines.append(f"  ... ({len(items) - 10} more)")
-        else:
-            for item in items:
-                lines.append(f"  {repr(item)}")
-        print(_truncate_sample("\n".join(lines), limit))
-        return
-
-    if isinstance(var, str):
-        lines.append(f"Length: {len(var)}")
-        if mode == "preview":
-            preview = var[:500]
-            if len(var) > 500:
-                preview += f"\n... ({len(var) - 500} more chars)"
-            lines.append(preview)
-        else:
-            lines.append(var)
-        print(_truncate_sample("\n".join(lines), limit))
-        return
-
-    if isinstance(var, bytes):
-        lines.append(f"Length: {len(var)} bytes")
-        if mode == "preview":
-            lines.append(repr(var[:200]))
-            if len(var) > 200:
-                lines.append(f"... ({len(var) - 200} more bytes)")
-        else:
-            lines.append(repr(var))
-        print(_truncate_sample("\n".join(lines), limit))
-        return
-
-    lines.append(f"Type: {type(var).__module__}.{type(var).__qualname__}")
-    # Show public attributes
-    attrs = [a for a in dir(var) if not a.startswith("_")]
-    if attrs:
-        lines.append(f"Attributes ({len(attrs)}): {attrs[:20]}")
-        if len(attrs) > 20:
-            lines[-1] += f" ... (+{len(attrs) - 20} more)"
-    r = repr(var)
-    if mode == "preview" and len(r) > 500:
-        r = r[:500] + "..."
-    lines.append(f"Repr: {r}")
-    print(_truncate_sample("\n".join(lines), limit))
-
-
-def _truncate_sample(text, max_chars):
-    """Truncate sample output to max_chars."""
-    if len(text) <= max_chars:
-        return text
-    return text[:max_chars] + f"\n... (truncated, {len(text)} chars total)"
-
-
-namespace["sample"] = sample
-
-# --- Logging capture ---
-# Libraries like httpx, urllib3, etc. use Python logging. By default these
-# messages are silently dropped (no handler configured). We set up a handler
-# that writes to a per-cell StringIO so the LLM can see connection info,
-# warnings, and errors from libraries.
-import logging as _logging
-
-class _CellLogHandler(_logging.Handler):
-    """Logging handler that writes to whichever StringIO is current."""
-    def __init__(self):
-        super().__init__(level=_logging.INFO)
-        self.buf = None
-        self.setFormatter(_logging.Formatter("%(name)s: %(message)s"))
-
-    def emit(self, record):
-        if self.buf is not None:
-            try:
-                self.buf.write(self.format(record) + "\n")
-            except Exception:
-                pass
-
-_cell_log_handler = _CellLogHandler()
-_logging.root.addHandler(_cell_log_handler)
-_logging.root.setLevel(_logging.INFO)
-
-while True:
-    lines = []
-    eof = False
-    try:
-        # Use explicit readline() instead of iterating stdin.  On Windows,
-        # Python's file iterator over a pipe uses internal block buffering
-        # (~8 KB) and won't yield lines until the buffer fills or the pipe
-        # closes — causing a deadlock.  readline() returns immediately on \n.
-        while True:
-            line = _real_stdin.readline()
-            if not line:
-                # EOF — parent closed stdin
-                eof = True
-                break
-            stripped = line.rstrip("\r\n")
-            if stripped == _CELL_DELIM:
-                break
-            lines.append(line)
-    except EOFError:
-        eof = True
-    if eof:
-        break
-
-    code = "".join(lines)
-    if not code.strip():
-        result = {"stdout": "", "stderr": "", "logs": "", "error": None}
-        _real_stdout.write(_RESULT_START + "\n")
-        _real_stdout.write(json.dumps(result) + "\n")
-        _real_stdout.write(_RESULT_END + "\n")
-        _real_stdout.flush()
-        continue
-
-    out_buf = io.StringIO()
-    err_buf = io.StringIO()
-    log_buf = io.StringIO()
-    error = None
-    _cell_log_handler.buf = log_buf
-
-    sys.stdout = out_buf
-    sys.stderr = err_buf
-    _auto_installed = []
-    try:
-        compiled = compile(code, "<scratchpad>", "exec")
-        exec(compiled, namespace)
-    except ModuleNotFoundError as _mnf:
-        # Auto-install the missing module and retry the cell once
-        _missing = _mnf.name
-        if _missing:
-            sys.stdout = _real_stdout
-            sys.stderr = sys.__stderr__
-            _cell_log_handler.buf = None
-            _real_stdout.write(_PROGRESS_MARKER + " " + f"Installing {_missing}..." + "\n")
-            _real_stdout.flush()
-            import subprocess as _sp
-            _uv_path = os.environ.get("ANTON_UV_PATH", "")
-            if _uv_path:
-                _pip = _sp.run(
-                    [_uv_path, "pip", "install", "--python", sys.executable, _missing],
-                    capture_output=True, timeout=120,
-                )
-            else:
-                _pip = _sp.run(
-                    [sys.executable, "-m", "pip", "install", _missing],
-                    capture_output=True, timeout=120,
-                )
-            # Reset buffers and retry
-            out_buf = io.StringIO()
-            err_buf = io.StringIO()
-            log_buf = io.StringIO()
-            _cell_log_handler.buf = log_buf
-            sys.stdout = out_buf
-            sys.stderr = err_buf
-            if _pip.returncode == 0:
-                _auto_installed.append(_missing)
-                try:
-                    exec(compiled, namespace)
-                except Exception:
-                    error = traceback.format_exc()
-            else:
-                error = (
-                    f"ModuleNotFoundError: No module named '{_missing}'\n"
-                    f"Auto-install failed:\n{_pip.stderr.decode()}"
-                )
-        else:
-            error = traceback.format_exc()
-    except Exception:
-        error = traceback.format_exc()
-    finally:
-        sys.stdout = _real_stdout
-        sys.stderr = sys.__stderr__
-        _cell_log_handler.buf = None
-
-    stdout_val = out_buf.getvalue()
-    if len(stdout_val) > _MAX_OUTPUT:
-        stdout_val = stdout_val[:_MAX_OUTPUT] + f"\n\n... (truncated, {len(stdout_val)} chars total)"
-    result = {
-        "stdout": stdout_val,
-        "stderr": err_buf.getvalue(),
-        "logs": log_buf.getvalue(),
-        "error": error,
-    }
-    if _auto_installed:
-        result["auto_installed"] = _auto_installed
-    _real_stdout.write(_RESULT_START + "\n")
-    _real_stdout.write(json.dumps(result) + "\n")
-    _real_stdout.write(_RESULT_END + "\n")
-    _real_stdout.flush()

From af882f255dd23e9f62dc95319c5ecb944bdd9d99 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:11:15 +0200
Subject: [PATCH 071/134] Boot scratcpad to core

---
 anton/core/backends/scratchpad_boot.py | 631 +++++++++++++++++++++++++
 1 file changed, 631 insertions(+)
 create mode 100644 anton/core/backends/scratchpad_boot.py

diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
new file mode 100644
index 00000000..7f205ea5
--- /dev/null
+++ b/anton/core/backends/scratchpad_boot.py
@@ -0,0 +1,631 @@
+import io
+import json
+import os
+import sys
+import traceback
+
+_CELL_DELIM = "__ANTON_CELL_END__"
+_RESULT_START = "__ANTON_RESULT__"
+_RESULT_END = "__ANTON_RESULT_END__"
+
+# Persistent namespace across cells
+namespace = {"__builtins__": __builtins__}
+
+# --- Inject get_llm() for LLM access from scratchpad code ---
+_scratchpad_model = os.environ.get("ANTON_SCRATCHPAD_MODEL", "")
+if _scratchpad_model:
+    try:
+        import asyncio as _llm_asyncio
+
+        _scratchpad_provider_name = os.environ.get("ANTON_SCRATCHPAD_PROVIDER", "anthropic")
+        if _scratchpad_provider_name in ("openai", "openai-compatible"):
+            from anton.core.llm.openai import OpenAIProvider as _ProviderClass
+        else:
+            from anton.core.llm.anthropic import AnthropicProvider as _ProviderClass
+
+        _llm_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
+        if _scratchpad_provider_name in ("openai", "openai-compatible"):
+            # Explicitly pass base_url so Minds/openai-compatible endpoints work.
+            # The OpenAI SDK may or may not pick up OPENAI_BASE_URL from env,
+            # so we pass it directly to be safe.
+            _llm_base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get("ANTON_OPENAI_BASE_URL")
+            _llm_api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("ANTON_OPENAI_API_KEY")
+            _llm_provider = _ProviderClass(
+                api_key=_llm_api_key or None,
+                base_url=_llm_base_url or None,
+                ssl_verify=_llm_ssl_verify,
+            )
+        else:
+            _llm_provider = _ProviderClass()  # Anthropic doesn't need ssl_verify
+        _llm_model = _scratchpad_model
+
+        _LLM_HEARTBEAT_INTERVAL = 10  # seconds between heartbeats during LLM calls
+
+        async def _run_with_heartbeat(coro):
+            """Run an async coroutine while emitting progress heartbeats.
+
+            LLM API calls can block for 30s+.  Without heartbeats, the
+            scratchpad inactivity timeout (30s) kills the process.  This
+            wrapper runs a heartbeat task alongside the real work.
+            """
+            async def _heartbeat():
+                elapsed = 0
+                while True:
+                    await _llm_asyncio.sleep(_LLM_HEARTBEAT_INTERVAL)
+                    elapsed += _LLM_HEARTBEAT_INTERVAL
+                    _real_stdout.write(
+                        _PROGRESS_MARKER + f" Waiting for LLM… ({elapsed}s)\n"
+                    )
+                    _real_stdout.flush()
+
+            beat = _llm_asyncio.create_task(_heartbeat())
+            try:
+                return await coro
+            finally:
+                beat.cancel()
+                try:
+                    await beat
+                except _llm_asyncio.CancelledError:
+                    pass
+
+        class _ScratchpadLLM:
+            """Sync LLM wrapper for scratchpad use. Mirrors SkillLLM interface."""
+
+            @property
+            def model(self):
+                return _llm_model
+
+            def complete(self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096):
+                """Call the LLM synchronously. Returns an LLMResponse.
+
+                Automatically emits progress heartbeats every 10s so that
+                long API calls don't trip the scratchpad inactivity timeout.
+                """
+                return _llm_asyncio.run(_run_with_heartbeat(
+                    _llm_provider.complete(
+                        model=_llm_model,
+                        system=system,
+                        messages=messages,
+                        tools=tools,
+                        tool_choice=tool_choice,
+                        max_tokens=max_tokens,
+                    )
+                ))
+
+            async def complete_async(self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096):
+                """Call the LLM asynchronously. Returns an LLMResponse.
+
+                Use this inside async code (e.g. asyncio.gather) for concurrent
+                LLM calls.  Emits heartbeats automatically like complete().
+                """
+                return await _run_with_heartbeat(
+                    _llm_provider.complete(
+                        model=_llm_model,
+                        system=system,
+                        messages=messages,
+                        tools=tools,
+                        tool_choice=tool_choice,
+                        max_tokens=max_tokens,
+                    )
+                )
+
+            def generate_object(self, schema_class, *, system, messages, max_tokens=4096):
+                """Generate a structured object matching a Pydantic model.
+
+                Uses tool_choice to force the LLM to return structured data.
+                Supports single models and list[Model].
+
+                Args:
+                    schema_class: A Pydantic BaseModel subclass, or list[Model].
+                    system: System prompt.
+                    messages: Conversation messages.
+                    max_tokens: Max tokens for the LLM call.
+
+                Returns:
+                    An instance of schema_class (or a list of instances).
+                """
+                from pydantic import BaseModel as _BaseModel
+
+                is_list = hasattr(schema_class, "__origin__") and schema_class.__origin__ is list
+                if is_list:
+                    inner_class = schema_class.__args__[0]
+
+                    class _ArrayWrapper(_BaseModel):
+                        items: list[inner_class]
+
+                    schema = _ArrayWrapper.model_json_schema()
+                    tool_name = f"{inner_class.__name__}_array"
+                else:
+                    schema = schema_class.model_json_schema()
+                    tool_name = schema_class.__name__
+
+                tool = {
+                    "name": tool_name,
+                    "description": f"Generate structured output matching the {tool_name} schema.",
+                    "input_schema": schema,
+                }
+
+                response = self.complete(
+                    system=system,
+                    messages=messages,
+                    tools=[tool],
+                    tool_choice={"type": "tool", "name": tool_name},
+                    max_tokens=max_tokens,
+                )
+
+                if not response.tool_calls:
+                    raise ValueError("LLM did not return structured output.")
+
+                import json as _json
+                raw = response.tool_calls[0].input
+
+                if is_list:
+                    wrapper = _ArrayWrapper.model_validate(raw)
+                    return wrapper.items
+                return schema_class.model_validate(raw)
+
+        _scratchpad_llm_instance = _ScratchpadLLM()
+
+        def get_llm():
+            """Get a pre-configured LLM client. No API keys needed."""
+            return _scratchpad_llm_instance
+
+        def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_tokens=4096):
+            """Run a synchronous LLM tool-call loop.
+
+            The LLM reasons, calls tools via handle_tool(name, inputs) -> str,
+            and iterates until it produces a final text response.
+
+            Args:
+                system: System prompt for the LLM.
+                user_message: Initial user message.
+                tools: Tool definitions (Anthropic tool schema format).
+                handle_tool: Callback (tool_name, tool_input) -> result_string.
+                max_turns: Safety limit on LLM round-trips (default 10).
+                max_tokens: Max tokens per LLM call.
+
+            Returns:
+                The final text response from the LLM.
+            """
+            llm = get_llm()
+            messages = [{"role": "user", "content": user_message}]
+
+            response = None
+            for _ in range(max_turns):
+                response = llm.complete(
+                    system=system,
+                    messages=messages,
+                    tools=tools,
+                    max_tokens=max_tokens,
+                )
+
+                if not response.tool_calls:
+                    return response.content
+
+                # Build assistant message with text + tool_use blocks
+                assistant_content = []
+                if response.content:
+                    assistant_content.append({"type": "text", "text": response.content})
+                for tc in response.tool_calls:
+                    assistant_content.append({
+                        "type": "tool_use",
+                        "id": tc.id,
+                        "name": tc.name,
+                        "input": tc.input,
+                    })
+                messages.append({"role": "assistant", "content": assistant_content})
+
+                # Execute each tool and collect results
+                tool_results = []
+                for tc in response.tool_calls:
+                    try:
+                        result = handle_tool(tc.name, tc.input)
+                    except Exception as exc:
+                        result = f"Error: {exc}"
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": tc.id,
+                        "content": result,
+                    })
+                messages.append({"role": "user", "content": tool_results})
+
+            # Hit max_turns
+            return response.content if response else ""
+
+        namespace["get_llm"] = get_llm
+        namespace["agentic_loop"] = agentic_loop
+    except Exception:
+        pass  # LLM not available — not fatal (e.g. anthropic not installed)
+
+# --- Inject query_minds_data() for Minds datasource access from scratchpad ---
+_minds_datasource = os.environ.get("ANTON_MINDS_DATASOURCE", "")
+_minds_api_key = os.environ.get("ANTON_MINDS_API_KEY", "")
+_minds_url = os.environ.get("ANTON_MINDS_URL", "")
+if _minds_datasource and _minds_api_key and _minds_url:
+    try:
+        import ssl as _minds_ssl
+        import urllib.request as _minds_urllib
+
+        _minds_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
+
+        def query_minds_data(query, datasource=None):
+            """Query a Minds datasource with SQL. Returns dict with type, data, column_names, error_message."""
+            ds = datasource or _minds_datasource
+            url = f"{_minds_url}/api/v1/datasources/{ds}/query"
+            payload = json.dumps({"query": query, "native_query": True}).encode()
+
+            req = _minds_urllib.Request(url, data=payload, method="POST")
+            req.add_header("Authorization", f"Bearer {_minds_api_key}")
+            req.add_header("Content-Type", "application/json")
+            req.add_header("Accept", "application/json")
+            req.add_header("User-Agent", "Mozilla/5.0 (compatible; Anton/1.0; +https://github.com/mindsdb/anton)")
+            req.add_header("Accept-Language", "en-US,en;q=0.9")
+            req.add_header("Accept-Encoding", "identity")
+            req.add_header("Connection", "keep-alive")
+
+            ctx = None
+            if not _minds_ssl_verify:
+                ctx = _minds_ssl.create_default_context()
+                ctx.check_hostname = False
+                ctx.verify_mode = _minds_ssl.CERT_NONE
+
+            try:
+                with _minds_urllib.urlopen(req, context=ctx, timeout=60) as resp:
+                    return json.loads(resp.read().decode())
+            except _minds_urllib.HTTPError as e:
+                body = ""
+                try:
+                    body = e.read().decode()
+                except Exception:
+                    pass
+                return {
+                    "type": "error",
+                    "data": None,
+                    "column_names": None,
+                    "error_message": f"HTTP {e.code}: {body or e.reason}",
+                }
+            except Exception as e:
+                return {
+                    "type": "error",
+                    "data": None,
+                    "column_names": None,
+                    "error_message": str(e),
+                }
+
+        namespace["query_minds_data"] = query_minds_data
+    except Exception:
+        pass  # Minds query not available — not fatal
+
+# Read-execute loop
+_real_stdout = sys.stdout
+_real_stdin = sys.stdin
+
+_PROGRESS_MARKER = "__ANTON_PROGRESS__"
+_MAX_OUTPUT = 10_000
+
+def progress(message=""):
+    """Signal that long-running work is still active. Resets the inactivity timer."""
+    _real_stdout.write(_PROGRESS_MARKER + " " + str(message) + "\n")
+    _real_stdout.flush()
+
+namespace["progress"] = progress
+
+def sample(var, mode="preview", _name=None):
+    """Inspect a variable with type-aware formatting.
+
+    Args:
+        var: The variable to inspect.
+        mode: "preview" (default) — compact summary. "full" — complete dump.
+        _name: Optional label printed as header (auto-detected when possible).
+
+    Prints formatted output to stdout (captured by the cell).
+    """
+    _MAX_PREVIEW = 2000
+    _MAX_FULL = 10000
+    limit = _MAX_PREVIEW if mode == "preview" else _MAX_FULL
+
+    header = f"[sample:{type(var).__name__}]"
+    if _name:
+        header = f"[sample:{_name} ({type(var).__name__})]"
+
+    lines = [header]
+
+    try:
+        import pandas as _pd
+        if isinstance(var, _pd.DataFrame):
+            lines.append(f"Shape: {var.shape[0]} rows x {var.shape[1]} cols")
+            lines.append(f"Columns: {list(var.columns)}")
+            lines.append(f"Dtypes:\n{var.dtypes.to_string()}")
+            if mode == "preview":
+                lines.append(f"\nHead (5 rows):\n{var.head().to_string()}")
+                if var.shape[0] > 5:
+                    lines.append(f"\nTail (3 rows):\n{var.tail(3).to_string()}")
+                nulls = var.isnull().sum()
+                nulls = nulls[nulls > 0]
+                if len(nulls) > 0:
+                    lines.append(f"\nNull counts:\n{nulls.to_string()}")
+            else:
+                lines.append(f"\nDescribe:\n{var.describe(include='all').to_string()}")
+                n = min(50, var.shape[0])
+                lines.append(f"\nFirst {n} rows:\n{var.head(n).to_string()}")
+                nulls = var.isnull().sum()
+                nulls = nulls[nulls > 0]
+                if len(nulls) > 0:
+                    lines.append(f"\nNull counts:\n{nulls.to_string()}")
+            print(_truncate_sample("\n".join(lines), limit))
+            return
+
+        if isinstance(var, _pd.Series):
+            lines.append(f"Length: {len(var)}, Dtype: {var.dtype}, Name: {var.name}")
+            if mode == "preview":
+                lines.append(f"\nHead (10):\n{var.head(10).to_string()}")
+            else:
+                lines.append(f"\nDescribe:\n{var.describe().to_string()}")
+                n = min(50, len(var))
+                lines.append(f"\nFirst {n}:\n{var.head(n).to_string()}")
+            print(_truncate_sample("\n".join(lines), limit))
+            return
+    except ImportError:
+        pass
+
+    try:
+        import numpy as _np
+        if isinstance(var, _np.ndarray):
+            lines.append(f"Shape: {var.shape}, Dtype: {var.dtype}")
+            if mode == "preview":
+                flat = var.flatten()
+                n = min(10, len(flat))
+                lines.append(f"First {n} values: {flat[:n].tolist()}")
+                if len(flat) > 10:
+                    lines.append(f"Last 3 values: {flat[-3:].tolist()}")
+                lines.append(f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}")
+            else:
+                lines.append(f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}, Std: {var.std():.4g}")
+                lines.append(f"\n{repr(var)}")
+            print(_truncate_sample("\n".join(lines), limit))
+            return
+    except ImportError:
+        pass
+
+    if isinstance(var, dict):
+        lines.append(f"Keys ({len(var)}): {list(var.keys())[:20]}")
+        if len(var) > 20:
+            lines[-1] += f" ... (+{len(var) - 20} more)"
+        if mode == "preview":
+            for i, (k, v) in enumerate(var.items()):
+                if i >= 10:
+                    lines.append(f"  ... ({len(var) - 10} more entries)")
+                    break
+                val_repr = repr(v)
+                if len(val_repr) > 120:
+                    val_repr = val_repr[:120] + "..."
+                lines.append(f"  {k!r}: {val_repr}")
+        else:
+            import json as _json
+            try:
+                lines.append(_json.dumps(var, indent=2, default=str))
+            except (TypeError, ValueError):
+                lines.append(repr(var))
+        print(_truncate_sample("\n".join(lines), limit))
+        return
+
+    if isinstance(var, (list, tuple)):
+        kind = type(var).__name__
+        lines.append(f"Length: {len(var)}")
+        if len(var) > 0:
+            lines.append(f"Item types: {type(var[0]).__name__}" +
+                         (f" (mixed)" if len(var) > 1 and type(var[0]) != type(var[-1]) else ""))
+        if mode == "preview":
+            n = min(5, len(var))
+            for i in range(n):
+                val_repr = repr(var[i])
+                if len(val_repr) > 200:
+                    val_repr = val_repr[:200] + "..."
+                lines.append(f"  [{i}] {val_repr}")
+            if len(var) > 5:
+                lines.append(f"  ... ({len(var) - 5} more)")
+                val_repr = repr(var[-1])
+                if len(val_repr) > 200:
+                    val_repr = val_repr[:200] + "..."
+                lines.append(f"  [{len(var) - 1}] {val_repr}")
+        else:
+            for i, item in enumerate(var):
+                val_repr = repr(item)
+                if len(val_repr) > 500:
+                    val_repr = val_repr[:500] + "..."
+                lines.append(f"  [{i}] {val_repr}")
+        print(_truncate_sample("\n".join(lines), limit))
+        return
+
+    if isinstance(var, (set, frozenset)):
+        lines.append(f"Length: {len(var)}")
+        items = sorted(var, key=repr)
+        if mode == "preview":
+            for item in items[:10]:
+                lines.append(f"  {repr(item)}")
+            if len(items) > 10:
+                lines.append(f"  ... ({len(items) - 10} more)")
+        else:
+            for item in items:
+                lines.append(f"  {repr(item)}")
+        print(_truncate_sample("\n".join(lines), limit))
+        return
+
+    if isinstance(var, str):
+        lines.append(f"Length: {len(var)}")
+        if mode == "preview":
+            preview = var[:500]
+            if len(var) > 500:
+                preview += f"\n... ({len(var) - 500} more chars)"
+            lines.append(preview)
+        else:
+            lines.append(var)
+        print(_truncate_sample("\n".join(lines), limit))
+        return
+
+    if isinstance(var, bytes):
+        lines.append(f"Length: {len(var)} bytes")
+        if mode == "preview":
+            lines.append(repr(var[:200]))
+            if len(var) > 200:
+                lines.append(f"... ({len(var) - 200} more bytes)")
+        else:
+            lines.append(repr(var))
+        print(_truncate_sample("\n".join(lines), limit))
+        return
+
+    lines.append(f"Type: {type(var).__module__}.{type(var).__qualname__}")
+    # Show public attributes
+    attrs = [a for a in dir(var) if not a.startswith("_")]
+    if attrs:
+        lines.append(f"Attributes ({len(attrs)}): {attrs[:20]}")
+        if len(attrs) > 20:
+            lines[-1] += f" ... (+{len(attrs) - 20} more)"
+    r = repr(var)
+    if mode == "preview" and len(r) > 500:
+        r = r[:500] + "..."
+    lines.append(f"Repr: {r}")
+    print(_truncate_sample("\n".join(lines), limit))
+
+
+def _truncate_sample(text, max_chars):
+    """Truncate sample output to max_chars."""
+    if len(text) <= max_chars:
+        return text
+    return text[:max_chars] + f"\n... (truncated, {len(text)} chars total)"
+
+
+namespace["sample"] = sample
+
+# --- Logging capture ---
+# Libraries like httpx, urllib3, etc. use Python logging. By default these
+# messages are silently dropped (no handler configured). We set up a handler
+# that writes to a per-cell StringIO so the LLM can see connection info,
+# warnings, and errors from libraries.
+import logging as _logging
+
+class _CellLogHandler(_logging.Handler):
+    """Logging handler that writes to whichever StringIO is current."""
+    def __init__(self):
+        super().__init__(level=_logging.INFO)
+        self.buf = None
+        self.setFormatter(_logging.Formatter("%(name)s: %(message)s"))
+
+    def emit(self, record):
+        if self.buf is not None:
+            try:
+                self.buf.write(self.format(record) + "\n")
+            except Exception:
+                pass
+
+_cell_log_handler = _CellLogHandler()
+_logging.root.addHandler(_cell_log_handler)
+_logging.root.setLevel(_logging.INFO)
+
+while True:
+    lines = []
+    eof = False
+    try:
+        # Use explicit readline() instead of iterating stdin.  On Windows,
+        # Python's file iterator over a pipe uses internal block buffering
+        # (~8 KB) and won't yield lines until the buffer fills or the pipe
+        # closes — causing a deadlock.  readline() returns immediately on \n.
+        while True:
+            line = _real_stdin.readline()
+            if not line:
+                # EOF — parent closed stdin
+                eof = True
+                break
+            stripped = line.rstrip("\r\n")
+            if stripped == _CELL_DELIM:
+                break
+            lines.append(line)
+    except EOFError:
+        eof = True
+    if eof:
+        break
+
+    code = "".join(lines)
+    if not code.strip():
+        result = {"stdout": "", "stderr": "", "logs": "", "error": None}
+        _real_stdout.write(_RESULT_START + "\n")
+        _real_stdout.write(json.dumps(result) + "\n")
+        _real_stdout.write(_RESULT_END + "\n")
+        _real_stdout.flush()
+        continue
+
+    out_buf = io.StringIO()
+    err_buf = io.StringIO()
+    log_buf = io.StringIO()
+    error = None
+    _cell_log_handler.buf = log_buf
+
+    sys.stdout = out_buf
+    sys.stderr = err_buf
+    _auto_installed = []
+    try:
+        compiled = compile(code, "<scratchpad>", "exec")
+        exec(compiled, namespace)
+    except ModuleNotFoundError as _mnf:
+        # Auto-install the missing module and retry the cell once
+        _missing = _mnf.name
+        if _missing:
+            sys.stdout = _real_stdout
+            sys.stderr = sys.__stderr__
+            _cell_log_handler.buf = None
+            _real_stdout.write(_PROGRESS_MARKER + " " + f"Installing {_missing}..." + "\n")
+            _real_stdout.flush()
+            import subprocess as _sp
+            _uv_path = os.environ.get("ANTON_UV_PATH", "")
+            if _uv_path:
+                _pip = _sp.run(
+                    [_uv_path, "pip", "install", "--python", sys.executable, _missing],
+                    capture_output=True, timeout=120,
+                )
+            else:
+                _pip = _sp.run(
+                    [sys.executable, "-m", "pip", "install", _missing],
+                    capture_output=True, timeout=120,
+                )
+            # Reset buffers and retry
+            out_buf = io.StringIO()
+            err_buf = io.StringIO()
+            log_buf = io.StringIO()
+            _cell_log_handler.buf = log_buf
+            sys.stdout = out_buf
+            sys.stderr = err_buf
+            if _pip.returncode == 0:
+                _auto_installed.append(_missing)
+                try:
+                    exec(compiled, namespace)
+                except Exception:
+                    error = traceback.format_exc()
+            else:
+                error = (
+                    f"ModuleNotFoundError: No module named '{_missing}'\n"
+                    f"Auto-install failed:\n{_pip.stderr.decode()}"
+                )
+        else:
+            error = traceback.format_exc()
+    except Exception:
+        error = traceback.format_exc()
+    finally:
+        sys.stdout = _real_stdout
+        sys.stderr = sys.__stderr__
+        _cell_log_handler.buf = None
+
+    stdout_val = out_buf.getvalue()
+    if len(stdout_val) > _MAX_OUTPUT:
+        stdout_val = stdout_val[:_MAX_OUTPUT] + f"\n\n... (truncated, {len(stdout_val)} chars total)"
+    result = {
+        "stdout": stdout_val,
+        "stderr": err_buf.getvalue(),
+        "logs": log_buf.getvalue(),
+        "error": error,
+    }
+    if _auto_installed:
+        result["auto_installed"] = _auto_installed
+    _real_stdout.write(_RESULT_START + "\n")
+    _real_stdout.write(json.dumps(result) + "\n")
+    _real_stdout.write(_RESULT_END + "\n")
+    _real_stdout.flush()

From eb66f8d753b901a9ad3878dcc2293dd30a7ee550 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:40:11 +0200
Subject: [PATCH 072/134] Move constants

---
 anton/core/backends/base.py  | 38 +++++------------------
 anton/core/backends/local.py | 59 ++++++++++++++++++++++--------------
 2 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/anton/core/backends/base.py b/anton/core/backends/base.py
index a765e292..cfc7f434 100644
--- a/anton/core/backends/base.py
+++ b/anton/core/backends/base.py
@@ -11,16 +11,6 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 
-_CELL_TIMEOUT_DEFAULT = 120        # Default total timeout when no estimate given
-_CELL_INACTIVITY_TIMEOUT = 30      # Max silence between output lines before killing
-_CELL_INACTIVITY_AFTER_PROGRESS = 60  # Grace window after a progress() call
-_KEEP_RECENT = 5                   # Number of recent cells to keep during compaction
-_INSTALL_TIMEOUT = 120
-
-_CELL_DELIM = "__ANTON_CELL_END__"
-_RESULT_START = "__ANTON_RESULT__"
-_RESULT_END = "__ANTON_RESULT_END__"
-_PROGRESS_MARKER = "__ANTON_PROGRESS__"
 
 
 @dataclass
@@ -35,20 +25,6 @@ class Cell:
     logs: str = ""
 
 
-def _compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
-    """Compute (total_timeout, inactivity_timeout) from an estimated run time.
-
-    - estimate == 0: use defaults (120s total, 30s inactivity).
-    - Otherwise: total = max(estimate * 2, estimate + 30), no hard cap.
-      inactivity = max(estimate * 0.5, 30), scales with estimate.
-    """
-    if estimated_seconds <= 0:
-        return float(_CELL_TIMEOUT_DEFAULT), float(_CELL_INACTIVITY_TIMEOUT)
-    total = max(estimated_seconds * 2, estimated_seconds + 30)
-    inactivity = max(estimated_seconds * 0.5, 30)
-    return float(total), float(inactivity)
-
-
 class ScratchpadRuntime(ABC):
     """Abstract base class for scratchpad execution backends.
 
@@ -223,14 +199,16 @@ def _truncate_output(text: str, max_lines: int = 20, max_chars: int = 2000) -> s
     def _compact_cells(self) -> bool:
         """Collapse old cells into a summary cell to reduce context size.
 
-        Keeps the most recent _KEEP_RECENT cells intact. Returns True if
-        compaction actually happened.
+        Keeps the most recent settings.cell_keep_recent cells intact. Returns
+        True if compaction actually happened.
         """
-        if len(self.cells) <= _KEEP_RECENT + 1:
+        from anton.core.settings import CoreSettings
+        keep = CoreSettings().cell_keep_recent
+        if len(self.cells) <= keep + 1:
             return False
 
-        to_compact = self.cells[:-_KEEP_RECENT]
-        recent = self.cells[-_KEEP_RECENT:]
+        to_compact = self.cells[:-keep]
+        recent = self.cells[-keep:]
 
         summary_lines: list[str] = []
         for i, cell in enumerate(to_compact, 1):
@@ -251,7 +229,7 @@ def _compact_cells(self) -> bool:
             stdout=summary_text,
             stderr="",
             error=None,
-            description=f"Summary of cells 1\u2013{len(to_compact)}",
+            description=f"Summary of cells 1–{len(to_compact)}",
         )
         self.cells = [summary_cell] + recent
         return True
\ No newline at end of file
diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index 78033665..16997bd6 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -11,24 +11,32 @@
 import venv
 from pathlib import Path
 
-from anton.core.backends.base import (
-    Cell,
-    ScratchpadRuntime,
-    _CELL_DELIM,
-    _CELL_INACTIVITY_AFTER_PROGRESS,
-    _CELL_TIMEOUT_DEFAULT,
-    _CELL_INACTIVITY_TIMEOUT,
-    _INSTALL_TIMEOUT,
-    _PROGRESS_MARKER,
-    _RESULT_END,
-    _RESULT_START,
-    _compute_timeouts,
+from anton.core.backends.base import Cell, ScratchpadRuntime
+from anton.core.backends.wire import (
+    CELL_DELIM,
+    PROGRESS_MARKER,
+    RESULT_END,
+    RESULT_START,
 )
+from anton.core.settings import CoreSettings
 
 _BOOT_SCRIPT_PATH = Path(__file__).parent / "scratchpad_boot.py"
 _MAX_OUTPUT = 10_000
 
 
+def _compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
+    """Compute (total_timeout, inactivity_timeout) from an estimated run time.
+
+    Reads defaults from CoreSettings so they're tunable via env vars.
+    """
+    s = CoreSettings()
+    if estimated_seconds <= 0:
+        return float(s.cell_timeout_default), float(s.cell_inactivity_timeout)
+    total = max(estimated_seconds * 2, estimated_seconds + 30)
+    inactivity = max(estimated_seconds * 0.5, 30)
+    return float(total), float(inactivity)
+
+
 class LocalScratchpadRuntime(ScratchpadRuntime):
     """Runs scratchpad cells in a persistent per-named venv subprocess."""
 
@@ -424,7 +432,7 @@ async def execute_streaming(
             )
             return
 
-        payload = code + "\n" + _CELL_DELIM + "\n"
+        payload = code + "\n" + CELL_DELIM + "\n"
         self._proc.stdin.write(payload.encode())  # type: ignore[union-attr]
         await self._proc.stdin.drain()  # type: ignore[union-attr]
 
@@ -507,12 +515,18 @@ async def execute_streaming(
     async def _read_result(
         self,
         *,
-        total_timeout: float = _CELL_TIMEOUT_DEFAULT,
-        inactivity_timeout: float = _CELL_INACTIVITY_TIMEOUT,
+        total_timeout: float | None = None,
+        inactivity_timeout: float | None = None,
     ):
         """Read stdout until result delimiters; yield progress strings then dict."""
         import time as _time
 
+        s = CoreSettings()
+        if total_timeout is None:
+            total_timeout = float(s.cell_timeout_default)
+        if inactivity_timeout is None:
+            inactivity_timeout = float(s.cell_inactivity_timeout)
+
         lines: list[str] = []
         in_result = False
         start = _time.monotonic()
@@ -549,18 +563,18 @@ async def _read_result(
 
             line = raw.decode().rstrip("\r\n")
 
-            if line.startswith(_PROGRESS_MARKER):
+            if line.startswith(PROGRESS_MARKER):
                 current_inactivity = max(
-                    current_inactivity, _CELL_INACTIVITY_AFTER_PROGRESS
+                    current_inactivity, float(s.cell_inactivity_after_progress)
                 )
-                message = line[len(_PROGRESS_MARKER):].strip()
+                message = line[len(PROGRESS_MARKER):].strip()
                 yield message
                 continue
 
-            if line == _RESULT_START:
+            if line == RESULT_START:
                 in_result = True
                 continue
-            if line == _RESULT_END:
+            if line == RESULT_END:
                 break
             if in_result:
                 lines.append(line)
@@ -596,6 +610,7 @@ async def install_packages(self, packages: list[str]) -> str:
         else:
             cmd = [self._venv_python, "-m", "pip", "install", "--no-input", *needed]
 
+        _install_timeout = CoreSettings().cell_install_timeout
         proc = await asyncio.create_subprocess_exec(
             *cmd,
             stdout=asyncio.subprocess.PIPE,
@@ -603,12 +618,12 @@ async def install_packages(self, packages: list[str]) -> str:
         )
         try:
             stdout, _ = await asyncio.wait_for(
-                proc.communicate(), timeout=_INSTALL_TIMEOUT
+                proc.communicate(), timeout=_install_timeout
             )
         except asyncio.TimeoutError:
             proc.kill()
             await proc.wait()
-            return f"Install timed out after {_INSTALL_TIMEOUT}s."
+            return f"Install timed out after {_install_timeout}s."
         output = stdout.decode()
         if proc.returncode != 0:
             return f"Install failed (exit {proc.returncode}):\n{output}"

From 2133a7b2ed6741167c66b2394f4e42871189028f Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:40:16 +0200
Subject: [PATCH 073/134] Move constants

---
 anton/core/backends/scratchpad_boot.py | 27 ++++++++++++++------------
 anton/core/settings.py                 |  9 ++++++++-
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
index 7f205ea5..814a5501 100644
--- a/anton/core/backends/scratchpad_boot.py
+++ b/anton/core/backends/scratchpad_boot.py
@@ -4,9 +4,11 @@
 import sys
 import traceback
 
-_CELL_DELIM = "__ANTON_CELL_END__"
-_RESULT_START = "__ANTON_RESULT__"
-_RESULT_END = "__ANTON_RESULT_END__"
+from anton.core.backends.wire import (
+    CELL_DELIM,
+    RESULT_START,
+    RESULT_END,
+)
 
 # Persistent namespace across cells
 namespace = {"__builtins__": __builtins__}
@@ -54,7 +56,7 @@ async def _heartbeat():
                     await _llm_asyncio.sleep(_LLM_HEARTBEAT_INTERVAL)
                     elapsed += _LLM_HEARTBEAT_INTERVAL
                     _real_stdout.write(
-                        _PROGRESS_MARKER + f" Waiting for LLM… ({elapsed}s)\n"
+                        PROGRESS_MARKER + f" Waiting for LLM… ({elapsed}s)\n"
                     )
                     _real_stdout.flush()
 
@@ -300,12 +302,13 @@ def query_minds_data(query, datasource=None):
 _real_stdout = sys.stdout
 _real_stdin = sys.stdin
 
-_PROGRESS_MARKER = "__ANTON_PROGRESS__"
+from anton.core.backends.wire import PROGRESS_MARKER
+
 _MAX_OUTPUT = 10_000
 
 def progress(message=""):
     """Signal that long-running work is still active. Resets the inactivity timer."""
-    _real_stdout.write(_PROGRESS_MARKER + " " + str(message) + "\n")
+    _real_stdout.write(PROGRESS_MARKER + " " + str(message) + "\n")
     _real_stdout.flush()
 
 namespace["progress"] = progress
@@ -537,7 +540,7 @@ def emit(self, record):
                 eof = True
                 break
             stripped = line.rstrip("\r\n")
-            if stripped == _CELL_DELIM:
+            if stripped == CELL_DELIM:
                 break
             lines.append(line)
     except EOFError:
@@ -548,9 +551,9 @@ def emit(self, record):
     code = "".join(lines)
     if not code.strip():
         result = {"stdout": "", "stderr": "", "logs": "", "error": None}
-        _real_stdout.write(_RESULT_START + "\n")
+        _real_stdout.write(RESULT_START + "\n")
         _real_stdout.write(json.dumps(result) + "\n")
-        _real_stdout.write(_RESULT_END + "\n")
+        _real_stdout.write(RESULT_END + "\n")
         _real_stdout.flush()
         continue
 
@@ -573,7 +576,7 @@ def emit(self, record):
             sys.stdout = _real_stdout
             sys.stderr = sys.__stderr__
             _cell_log_handler.buf = None
-            _real_stdout.write(_PROGRESS_MARKER + " " + f"Installing {_missing}..." + "\n")
+            _real_stdout.write(PROGRESS_MARKER + " " + f"Installing {_missing}..." + "\n")
             _real_stdout.flush()
             import subprocess as _sp
             _uv_path = os.environ.get("ANTON_UV_PATH", "")
@@ -625,7 +628,7 @@ def emit(self, record):
     }
     if _auto_installed:
         result["auto_installed"] = _auto_installed
-    _real_stdout.write(_RESULT_START + "\n")
+    _real_stdout.write(RESULT_START + "\n")
     _real_stdout.write(json.dumps(result) + "\n")
-    _real_stdout.write(_RESULT_END + "\n")
+    _real_stdout.write(RESULT_END + "\n")
     _real_stdout.flush()
diff --git a/anton/core/settings.py b/anton/core/settings.py
index 4cf1b9c6..79e62a00 100644
--- a/anton/core/settings.py
+++ b/anton/core/settings.py
@@ -1,6 +1,6 @@
 from pydantic_settings import BaseSettings
 
-
+#
 class CoreSettings(BaseSettings):
     model_config = {"env_prefix": "ANTON_", "extra": "ignore"}
 
@@ -11,3 +11,10 @@ class CoreSettings(BaseSettings):
     max_consecutive_errors: int = 5
     resilience_nudge_at: int = 2
     token_status_cache_ttl: float = 60.0
+
+    # Scratchpad execution tuning
+    cell_timeout_default: int = 120       # Total timeout when no estimate given (s)
+    cell_inactivity_timeout: int = 30     # Max silence between output lines (s)
+    cell_inactivity_after_progress: int = 60  # Grace window after progress() call (s)
+    cell_install_timeout: int = 120       # pip/uv install timeout (s)
+    cell_keep_recent: int = 5             # Recent cells preserved during compaction

From 483b32c724d0c02f34e926f6bfbc908a1f517bf0 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 15:40:27 +0200
Subject: [PATCH 074/134] Protocol consts

---
 anton/core/backends/wire.py | 10 ++++++++++
 tests/test_scratchpad.py    | 19 +++++++++----------
 2 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 anton/core/backends/wire.py

diff --git a/anton/core/backends/wire.py b/anton/core/backends/wire.py
new file mode 100644
index 00000000..46e16e6d
--- /dev/null
+++ b/anton/core/backends/wire.py
@@ -0,0 +1,10 @@
+"""Wire protocol constants shared between LocalScratchpadRuntime and scratchpad_boot.py.
+
+These delimiter strings must be identical on both sides of the subprocess pipe.
+Neither side should redefine them — import from here.
+"""
+
+CELL_DELIM = "__ANTON_CELL_END__"
+RESULT_START = "__ANTON_RESULT__"
+RESULT_END = "__ANTON_RESULT_END__"
+PROGRESS_MARKER = "__ANTON_PROGRESS__"
diff --git a/tests/test_scratchpad.py b/tests/test_scratchpad.py
index eeea042e..0b443bf5 100644
--- a/tests/test_scratchpad.py
+++ b/tests/test_scratchpad.py
@@ -5,9 +5,8 @@
 
 import pytest
 
-import anton.core.backends.base as backends_base
 from anton.core.backends.base import Cell
-from anton.core.backends.local import LocalScratchpadRuntime
+from anton.core.backends.local import LocalScratchpadRuntime, _compute_timeouts
 from anton.core.backends.manager import ScratchpadManager
 
 # Alias for brevity in tests
@@ -115,8 +114,8 @@ async def test_reset_clears_state(self):
 class TestScratchpadEdgeCases:
     async def test_timeout_kills_process(self, monkeypatch):
         """Long-running code triggers timeout."""
-        monkeypatch.setattr(backends_base, "_CELL_TIMEOUT_DEFAULT", 1)
-        monkeypatch.setattr(backends_base, "_CELL_INACTIVITY_TIMEOUT", 1)
+        monkeypatch.setenv("ANTON_CELL_TIMEOUT_DEFAULT", "1")
+        monkeypatch.setenv("ANTON_CELL_INACTIVITY_TIMEOUT", "1")
         pad = Scratchpad(name="test")
         await pad.start()
         try:
@@ -723,8 +722,8 @@ async def test_progress_function_available_in_namespace(self):
 
     async def test_progress_resets_inactivity_timeout(self, monkeypatch):
         """Code that calls progress() frequently should survive even with a short inactivity timeout."""
-        monkeypatch.setattr(backends_base, "_CELL_INACTIVITY_TIMEOUT", 2)
-        monkeypatch.setattr(backends_base, "_CELL_TIMEOUT_DEFAULT", 10)
+        monkeypatch.setenv("ANTON_CELL_INACTIVITY_TIMEOUT", "2")
+        monkeypatch.setenv("ANTON_CELL_TIMEOUT_DEFAULT", "10")
         pad = Scratchpad(name="progress-keep-alive")
         await pad.start()
         try:
@@ -743,8 +742,8 @@ async def test_progress_resets_inactivity_timeout(self, monkeypatch):
 
     async def test_inactivity_timeout_kills_without_progress(self, monkeypatch):
         """Code that sleeps without progress() calls should be killed by inactivity timeout."""
-        monkeypatch.setattr(backends_base, "_CELL_INACTIVITY_TIMEOUT", 2)
-        monkeypatch.setattr(backends_base, "_CELL_TIMEOUT_DEFAULT", 60)
+        monkeypatch.setenv("ANTON_CELL_INACTIVITY_TIMEOUT", "2")
+        monkeypatch.setenv("ANTON_CELL_TIMEOUT_DEFAULT", "60")
         pad = Scratchpad(name="no-progress")
         await pad.start()
         try:
@@ -782,14 +781,14 @@ async def test_execute_streaming_yields_progress(self):
 
     async def test_compute_timeouts_no_estimate(self):
         """No estimate should use defaults."""
-        from anton.core.backends.base import _compute_timeouts
+        from anton.core.backends.local import _compute_timeouts
         total, inactivity = _compute_timeouts(0)
         assert total == 120.0
         assert inactivity == 30.0
 
     async def test_compute_timeouts_with_estimate(self):
         """Estimate should scale total timeout and inactivity with no hard cap."""
-        from anton.core.backends.base import _compute_timeouts
+        from anton.core.backends.local import _compute_timeouts
 
         # Small estimate: max(10*2, 10+30) = max(20, 40) = 40
         total, inactivity = _compute_timeouts(10)

From e84b78d129eaf9a7725d4fa08dcd051f812be3d6 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Thu, 9 Apr 2026 08:01:16 -0700
Subject: [PATCH 075/134] better data connection experience, use llm on all non
 credential fields to see how far it can get

---
 anton/commands/datasource.py    | 191 ++++++++++----
 anton/connect_collector.py      | 255 +++++++++++++++++++
 anton/tools.py                  |  76 ++++--
 tests/test_connect_collector.py | 438 ++++++++++++++++++++++++++++++++
 tests/test_datasource.py        |  47 ++--
 5 files changed, 913 insertions(+), 94 deletions(-)
 create mode 100644 anton/connect_collector.py
 create mode 100644 tests/test_connect_collector.py

diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index c1de837c..3f57e6f0 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -13,8 +13,14 @@
 from rich.markdown import Markdown
 from rich.padding import Padding
 
+from anton.connect_collector import ConnectionCollector, extract_variables
 from anton.data_vault import DataVault
-from anton.datasource_registry import DatasourceEngine, DatasourceField, DatasourceRegistry
+from anton.datasource_registry import (
+    AuthMethod,
+    DatasourceEngine,
+    DatasourceField,
+    DatasourceRegistry,
+)
 from anton.utils.datasources import (
     register_secret_vars,
     remove_engine_block,
@@ -549,15 +555,56 @@ async def _reconnect_to_saved(
     return session
 
 
+def _record_redirect(
+    session: "ChatSession",
+    collector: ConnectionCollector,
+    user_message: str,
+    target_engine: str | None = None,
+) -> None:
+    """Record a mid-flow redirect so the main agent can pick up where we left off.
+
+    Appends a structured assistant message to history with the variables
+    collected so far and the user's last message, so the LLM can decide
+    whether to re-call connect_new_datasource with the new engine and
+    pre-fill the already-known variables.
+    """
+    collector.redirect_message = user_message.strip()
+    payload = collector.to_redirect_result()
+    parts = [
+        f"REDIRECT during {payload['engine_display']} connection setup.",
+        f"Collected so far: {json.dumps(payload['collected_variables'])}.",
+    ]
+    if payload["missing_required"]:
+        parts.append(
+            f"Still missing: {', '.join(payload['missing_required'])}."
+        )
+    if target_engine:
+        parts.append(f"User wants to switch to: {target_engine}.")
+    parts.append(f'User said: "{collector.redirect_message}".')
+    parts.append(
+        "Decide what to do next — you may call connect_new_datasource "
+        "again with the correct engine and pass known_variables to "
+        "pre-fill what's already collected."
+    )
+    session._history.append(
+        {"role": "assistant", "content": " ".join(parts)}
+    )
+
+
 async def handle_connect_datasource(
     console: Console,
     scratchpads: ScratchpadManager,
     session: "ChatSession",
     datasource_name: str | None = None,
     prefill: str | None = None,
+    known_variables: dict[str, str] | None = None,
 ) -> "ChatSession":
     """
     Connect a data source by entering credentials, either for a new name or re-entering for an existing one.
+
+    `known_variables` may pre-fill credential fields (e.g. when called as a
+    tool by the LLM, which may have already extracted host/port/etc. from
+    the conversation).
     """
 
     vault = DataVault()
@@ -959,6 +1006,7 @@ async def get_create_new_answer() -> str | None:
 
     assert engine_def is not None  # custom_source path always returns before this line
     active_fields = engine_def.fields
+    chosen_method: "AuthMethod | None" = None
     if engine_def.auth_method == "choice" and engine_def.auth_methods:
         console.print()
         console.print(
@@ -1020,63 +1068,110 @@ async def get_create_new_answer() -> str | None:
             console, session, engine_def.display_name, None, active_fields,
         )
 
-    while True:
-        mode_answer = await prompt_or_cancel(
-            "(anton) Do you have these available?",
-            choices_display="y/n/list params", default="y",
+    # ── Smart credential collection ────────────────────────────────────
+    # Track filled vs. missing fields as a puzzle. Each user response is
+    # parsed via the LLM to extract any variables mentioned, so users can
+    # fill multiple fields at once, paste a connection string, or change
+    # direction mid-flow.
+    collector = ConnectionCollector(
+        engine_def=engine_def,
+        auth_method=chosen_method,
+    )
+    if known_variables:
+        accepted = collector.fill_many(known_variables)
+        if accepted:
+            console.print(
+                f"[anton.muted]        Pre-filled from context: "
+                f"{', '.join(accepted)}[/]"
+            )
+            console.print()
+
+    known_engine_slugs = [e.engine for e in registry.all_engines()]
+    partial = False
+
+    while not collector.is_complete:
+        collector.format_status(console)
+        console.print()
+
+        next_field = collector.next_field
+        # When only one required field remains, ask for it directly with
+        # the matching prompt style (password masking, default value,
+        # etc.). No LLM extraction needed — the answer IS the value.
+        only_one_required = (
+            next_field is not None
+            and next_field.required
+            and len(collector.missing_required) == 1
         )
-        if mode_answer is None:
-            return session
-        mode_answer = mode_answer.strip().lower()
 
-        if mode_answer in ("y", "n"):
-            break
+        if only_one_required and next_field is not None:
+            label = f"(anton) {next_field.name}"
+            if next_field.secret:
+                value = await prompt_or_cancel(label, password=True)
+            elif next_field.default:
+                value = await prompt_or_cancel(label, default=next_field.default)
+            else:
+                value = await prompt_or_cancel(label)
+            if value is None:
+                return session
+            if not value:
+                # Empty answer for the only missing required field —
+                # treat as a partial save signal.
+                partial = True
+                break
+            collector.fill(next_field.name, value)
+            continue
 
-        # Check if user gave valid comma-separated param names
-        requested = {n.strip().lower() for n in mode_answer.split(",")}
-        matched = [f for f in active_fields if f.name.lower() in requested]
-        if matched:
+        # Multiple fields remain — open prompt that accepts bulk input
+        missing_names = ", ".join(f.name for f in collector.missing_required)
+        prompt_label = (
+            f"(anton) Provide values for {missing_names} "
+            f"(one at a time, or 'key=value key2=value2', or 'skip')"
+        )
+        value = await prompt_or_cancel(prompt_label)
+        if value is None:
+            return session
+        if value.strip().lower() == "skip":
+            partial = True
             break
+        if not value.strip():
+            continue
 
-        console.print(
-            "[anton.warning]        Please enter y, n, or a comma-separated list of parameter names "
-            f"({', '.join(f.name for f in active_fields)}).[/]"
+        extracted = await extract_variables(
+            value,
+            expected_fields=collector.active_fields,
+            current_engine=engine_def.engine,
+            current_engine_display=engine_def.display_name,
+            known_engine_slugs=known_engine_slugs,
+            session=session,
         )
-        console.print()
 
-    if mode_answer == "n":
-        console.print()
-        console.print(
-            "[anton.cyan](anton)[/] No problem. Which parameters do you have? "
-            "I'll save a partial connection now, and you can fill in the rest later "
-            "with [bold]/edit[/]."
-        )
-        console.print()
-        console.print("       Provide what you have (press enter to skip any field):")
-        console.print()
-        fields_to_collect = active_fields
-        partial = True
-    elif mode_answer == "y":
-        fields_to_collect = active_fields
-        partial = False
-    else:
-        fields_to_collect = matched
-        partial = False
+        if extracted.is_redirect:
+            _record_redirect(
+                session, collector, value, extracted.redirect_engine
+            )
+            return session
 
-    console.print()
-    credentials: dict[str, str] = {}
+        if extracted.variables:
+            filled = collector.fill_many(extracted.variables)
+            if filled:
+                console.print(
+                    f"[anton.muted]        Got: {', '.join(filled)}[/]"
+                )
+                console.print()
+                continue
 
-    for f in fields_to_collect:
-        if f.secret:
-            value = await prompt_or_cancel(f"(anton) {f.name}", password=True)
-        elif f.default:
-            value = await prompt_or_cancel(f"(anton) {f.name}", default=f.default)
+        # LLM returned nothing structured — fall back to treating the
+        # input as the value for the next missing required field.
+        if next_field is not None:
+            collector.fill(next_field.name, value.strip())
         else:
-            value = await prompt_or_cancel(f"(anton) {f.name}")
-        if value is None:
-            return session
-        if value:
-            credentials[f.name] = value
+            console.print(
+                "[anton.warning]        Couldn't parse that. "
+                "Try 'key=value' or one value at a time.[/]"
+            )
+            console.print()
+
+    credentials: dict[str, str] = dict(collector.collected)
 
     if partial:
         auto_name = uuid.uuid4().hex[:8]
diff --git a/anton/connect_collector.py b/anton/connect_collector.py
new file mode 100644
index 00000000..a7133752
--- /dev/null
+++ b/anton/connect_collector.py
@@ -0,0 +1,255 @@
+"""Smart variable collection for the /connect flow.
+
+Provides:
+- `ConnectionCollector` — a state machine that tracks which credential
+  fields have been filled vs. are still missing for a specific engine.
+- `extract_variables()` — an LLM-driven parser that reads free-form user
+  input and returns (a) the structured variables detected and (b) whether
+  the user is redirecting (changing datasource, cancelling, etc).
+
+The LLM handles all the messy cases naturally: natural language
+("my host is db.example.com"), connection strings
+(`postgres://u:p@host:5432/db`), aliases (pwd→password, hostname→host),
+comma-separated lists, and redirect phrasing ("actually it's mysql").
+
+This mirrors the LLM-returns-JSON pattern already used by
+`handle_add_custom_datasource()` in anton/commands/datasource.py.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+from anton.datasource_registry import AuthMethod, DatasourceEngine, DatasourceField
+
+if TYPE_CHECKING:
+    from rich.console import Console
+
+    from anton.core.session import ChatSession
+
+
+@dataclass
+class ExtractedData:
+    """Result of running extract_variables() on a user response."""
+
+    variables: dict[str, str] = field(default_factory=dict)
+    is_redirect: bool = False
+    redirect_engine: str | None = None
+    redirect_reason: str = ""
+
+
+@dataclass
+class ConnectionCollector:
+    """Tracks the puzzle state of a single connection attempt.
+
+    Holds the engine definition and which fields have been filled in so
+    far. Use `fill_many()` to apply extracted variables and the
+    `missing_*` / `is_complete` / `next_field` properties to drive the
+    smart prompt loop.
+    """
+
+    engine_def: DatasourceEngine
+    auth_method: AuthMethod | None = None
+    collected: dict[str, str] = field(default_factory=dict)
+    redirect_message: str = ""
+
+    @property
+    def active_fields(self) -> list[DatasourceField]:
+        if self.auth_method is not None:
+            return self.auth_method.fields
+        return self.engine_def.fields
+
+    @property
+    def field_names(self) -> set[str]:
+        return {f.name for f in self.active_fields}
+
+    @property
+    def missing_required(self) -> list[DatasourceField]:
+        return [
+            f for f in self.active_fields
+            if f.required and not self.collected.get(f.name)
+        ]
+
+    @property
+    def missing_optional(self) -> list[DatasourceField]:
+        return [
+            f for f in self.active_fields
+            if not f.required and not self.collected.get(f.name)
+        ]
+
+    @property
+    def is_complete(self) -> bool:
+        return not self.missing_required
+
+    @property
+    def next_field(self) -> DatasourceField | None:
+        """The next field to ask about — first missing required, else first missing optional."""
+        if self.missing_required:
+            return self.missing_required[0]
+        if self.missing_optional:
+            return self.missing_optional[0]
+        return None
+
+    def fill(self, key: str, value: str) -> bool:
+        """Store value for a field. Returns True if accepted, False if unknown field."""
+        if key not in self.field_names:
+            return False
+        if value:
+            self.collected[key] = value
+        return True
+
+    def fill_many(self, pairs: dict[str, str]) -> list[str]:
+        """Bulk-fill from a dict. Returns list of keys actually accepted."""
+        accepted: list[str] = []
+        for k, v in pairs.items():
+            if self.fill(k, v):
+                accepted.append(k)
+        return accepted
+
+    def format_status(self, console: "Console") -> None:
+        """Print a Rich-formatted summary of what's filled vs. missing."""
+        filled_active = [
+            f.name for f in self.active_fields if self.collected.get(f.name)
+        ]
+        if filled_active:
+            console.print(
+                "        [anton.muted]Filled:[/] " + ", ".join(filled_active)
+            )
+        if self.missing_required:
+            console.print(
+                "        [anton.muted]Still needed:[/] "
+                + ", ".join(f.name for f in self.missing_required)
+            )
+
+    def to_redirect_result(self) -> dict:
+        """Serializable summary for the main agent when the user changes direction."""
+        return {
+            "status": "redirect",
+            "engine": self.engine_def.engine,
+            "engine_display": self.engine_def.display_name,
+            "collected_variables": dict(self.collected),
+            "missing_required": [f.name for f in self.missing_required],
+            "redirect_message": self.redirect_message,
+        }
+
+
+_SYSTEM_PROMPT = (
+    "You extract structured connection credentials from user messages. "
+    "You are helping fill out a form for a specific datasource. "
+    "Return ONLY valid JSON — no commentary, no markdown fences."
+)
+
+
+async def extract_variables(
+    raw_input: str,
+    *,
+    expected_fields: list[DatasourceField],
+    current_engine: str,
+    current_engine_display: str,
+    known_engine_slugs: list[str],
+    session: "ChatSession",
+) -> ExtractedData:
+    """Use the LLM to parse free-form user input into connection variables.
+
+    Returns an `ExtractedData` with:
+      - `variables`: field name → value for any credentials detected
+      - `is_redirect`: True if the user is changing direction
+      - `redirect_engine`: the new engine slug if they named one
+      - `redirect_reason`: a short description of the redirect
+
+    Trusts the LLM to handle aliases (hostname→host, pwd→password),
+    connection strings (postgres://user:pass@host:5432/db), natural
+    language ("my host is db.example.com"), and free-form redirect
+    phrasing ("actually let's do mysql instead"). Falls back to an empty
+    result on any parse error — callers should treat an empty result as
+    "treat the raw input as the next field's value".
+    """
+    result = ExtractedData()
+    text = (raw_input or "").strip()
+    if not text:
+        return result
+
+    field_lines = "\n".join(
+        f"  - {f.name}{' (secret)' if f.secret else ''}: "
+        f"{f.description or '(no description)'}"
+        for f in expected_fields
+    )
+    other_engines = ", ".join(s for s in known_engine_slugs if s != current_engine)
+
+    user_prompt = (
+        f"Current datasource: {current_engine_display} (slug: {current_engine})\n"
+        f"Expected fields for this datasource:\n{field_lines}\n\n"
+        f"Other known datasource slugs: {other_engines}\n\n"
+        f"The user was asked to provide credentials and wrote:\n"
+        f"{text!r}\n\n"
+        "Return ONLY valid JSON with this exact shape:\n"
+        '{\n'
+        '  "variables": {"<field_name>": "<value>", ...},\n'
+        '  "is_redirect": true or false,\n'
+        '  "redirect_engine": "<slug or empty string>",\n'
+        '  "redirect_reason": "<short phrase or empty string>"\n'
+        '}\n\n'
+        "Rules:\n"
+        "- Only include fields from the expected list above. Use the exact "
+        "field names (snake_case).\n"
+        "- Recognize common aliases (hostname→host, pwd→password, "
+        "db→database, username→user, etc.) and map to the canonical name.\n"
+        "- If the user pasted a connection string (e.g. "
+        "postgres://u:p@host:5432/db), extract host/port/user/password/"
+        "database from it.\n"
+        "- Set `is_redirect` to true ONLY if the user is clearly trying to "
+        "cancel or switch to a DIFFERENT datasource (e.g. \"actually it's "
+        "mysql\", \"never mind\", \"cancel\"). Providing credentials is NOT "
+        "a redirect.\n"
+        "- If they mention a different datasource by name (from the other "
+        "known slugs list), set `redirect_engine` to that slug.\n"
+        "- If the user just provided a plain value for one field (e.g. "
+        "typed \"localhost\" when asked for host), and did NOT mention a "
+        "field name, leave `variables` empty — the caller will treat the "
+        "raw text as the next field's value.\n"
+        "- Never invent values. Only extract what the user explicitly wrote."
+    )
+
+    try:
+        response = await session._llm.plan(
+            system=_SYSTEM_PROMPT,
+            messages=[{"role": "user", "content": user_prompt}],
+            max_tokens=512,
+        )
+        content = (response.content or "").strip()
+        # Strip optional markdown fences, same pattern as
+        # handle_add_custom_datasource().
+        content = re.sub(
+            r"^```[^\n]*\n|```\s*$", "", content, flags=re.MULTILINE
+        ).strip()
+        data = json.loads(content)
+    except Exception:
+        return result
+
+    if not isinstance(data, dict):
+        return result
+
+    raw_vars = data.get("variables") or {}
+    if isinstance(raw_vars, dict):
+        valid_names = {f.name for f in expected_fields}
+        for k, v in raw_vars.items():
+            if not isinstance(v, (str, int, float)):
+                continue
+            key = str(k).strip()
+            if key in valid_names:
+                value = str(v).strip()
+                if value:
+                    result.variables[key] = value
+
+    result.is_redirect = bool(data.get("is_redirect"))
+    redirect_engine = data.get("redirect_engine")
+    if isinstance(redirect_engine, str) and redirect_engine.strip():
+        result.redirect_engine = redirect_engine.strip()
+    redirect_reason = data.get("redirect_reason")
+    if isinstance(redirect_reason, str):
+        result.redirect_reason = redirect_reason.strip()
+
+    return result
diff --git a/anton/tools.py b/anton/tools.py
index 04ea780a..42cff5d3 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -15,6 +15,12 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     if not engine:
         return "Engine name is required."
 
+    raw_known = tc_input.get("known_variables") or {}
+    known_variables: dict[str, str] = (
+        {str(k): str(v) for k, v in raw_known.items() if v is not None and v != ""}
+        if isinstance(raw_known, dict) else {}
+    )
+
     console = session._console
     if console is None:
         return "Cannot connect datasource — no console available."
@@ -36,6 +42,7 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
         session._scratchpads,
         session,
         prefill=engine,
+        known_variables=known_variables or None,
     )
 
     # Check if a new connection was actually added
@@ -48,30 +55,42 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
             f"Successfully connected '{slug}'. The datasource is now available. "
             f"Continue helping the user with their original request using this data source."
         )
-    else:
-        # User cancelled or connection failed — show briefly with spinner
-        # so user knows the agent is picking back up
-        from rich.live import Live
-        from rich.spinner import Spinner
-        from rich.text import Text
-        import asyncio
 
-        console.print()
-        console.print("[anton.muted]  No worries, let's continue where we left off.[/]")
-        with Live(
-            Spinner("dots", text=Text("", style="anton.muted"), style="anton.cyan"),
-            console=console,
-            refresh_per_second=10,
-            transient=True,
+    # Did the flow record a mid-flow redirect? If so, the last history
+    # entry starts with "REDIRECT" — pass it through instead of treating
+    # it as a cancellation.
+    if session._history and isinstance(session._history[-1], dict):
+        last = session._history[-1]
+        if (
+            last.get("role") == "assistant"
+            and isinstance(last.get("content"), str)
+            and last["content"].startswith("REDIRECT")
         ):
-            await asyncio.sleep(1.5)
-        console.print()
-        return (
-            f"CANCELLED: The user pressed Escape and cancelled the '{engine}' connection. "
-            f"STOP — do NOT call connect_new_datasource again. Do NOT retry. "
-            f"Acknowledge the cancellation briefly and ask the user what they'd like to do instead. "
-            f"Respond with TEXT ONLY — no tool calls."
-        )
+            return last["content"]
+
+    # User cancelled or connection failed — show briefly with spinner
+    # so user knows the agent is picking back up
+    from rich.live import Live
+    from rich.spinner import Spinner
+    from rich.text import Text
+    import asyncio
+
+    console.print()
+    console.print("[anton.muted]  No worries, let's continue where we left off.[/]")
+    with Live(
+        Spinner("dots", text=Text("", style="anton.muted"), style="anton.cyan"),
+        console=console,
+        refresh_per_second=10,
+        transient=True,
+    ):
+        await asyncio.sleep(1.5)
+    console.print()
+    return (
+        f"CANCELLED: The user pressed Escape and cancelled the '{engine}' connection. "
+        f"STOP — do NOT call connect_new_datasource again. Do NOT retry. "
+        f"Acknowledge the cancellation briefly and ask the user what they'd like to do instead. "
+        f"Respond with TEXT ONLY — no tool calls."
+    )
 
 
 CONNECT_DATASOURCE_TOOL = ToolDef(
@@ -83,6 +102,9 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
         "where the user enters their credentials.\n\n"
         "Pass the datasource type/name (e.g. 'gmail', 'postgres', 'salesforce', 'hubspot'). "
         "Anton will match it to the right connector and guide the user through setup.\n\n"
+        "If the user has ALREADY mentioned credential values in the conversation "
+        "(e.g. 'connect to dynamodb, my access key is AKIA... and region is us-east-1'), "
+        "pass them as `known_variables` so the user is not asked again.\n\n"
         "Do NOT print any message before calling this tool — it handles the user-facing output."
     ),
     input_schema = {
@@ -96,6 +118,16 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
                 "type": "string",
                 "description": "Brief explanation of why this datasource is needed",
             },
+            "known_variables": {
+                "type": "object",
+                "description": (
+                    "Pre-extracted credential field values from the conversation. "
+                    "Use snake_case field names (e.g. {\"host\": \"db.example.com\", "
+                    "\"port\": \"5432\", \"user\": \"admin\"}). Only pass fields the "
+                    "user actually mentioned — never invent values."
+                ),
+                "additionalProperties": {"type": "string"},
+            },
         },
         "required": ["engine"],
     },
diff --git a/tests/test_connect_collector.py b/tests/test_connect_collector.py
new file mode 100644
index 00000000..f62bca64
--- /dev/null
+++ b/tests/test_connect_collector.py
@@ -0,0 +1,438 @@
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from anton.connect_collector import (
+    ConnectionCollector,
+    ExtractedData,
+    extract_variables,
+)
+from anton.datasource_registry import AuthMethod, DatasourceEngine, DatasourceField
+
+
+def _postgres_engine() -> DatasourceEngine:
+    return DatasourceEngine(
+        engine="postgres",
+        display_name="PostgreSQL",
+        fields=[
+            DatasourceField(name="host", required=True, description="hostname"),
+            DatasourceField(
+                name="port", required=True, default="5432", description="port"
+            ),
+            DatasourceField(name="database", required=True, description="db name"),
+            DatasourceField(name="user", required=True, description="username"),
+            DatasourceField(
+                name="password", required=True, secret=True, description="pwd"
+            ),
+            DatasourceField(name="schema", required=False, description="schema"),
+        ],
+    )
+
+
+def _hubspot_choice_engine() -> DatasourceEngine:
+    return DatasourceEngine(
+        engine="hubspot",
+        display_name="HubSpot",
+        auth_method="choice",
+        auth_methods=[
+            AuthMethod(
+                name="pat",
+                display="Personal Access Token",
+                fields=[
+                    DatasourceField(name="access_token", required=True, secret=True)
+                ],
+            ),
+            AuthMethod(
+                name="oauth",
+                display="OAuth2",
+                fields=[
+                    DatasourceField(name="client_id", required=True),
+                    DatasourceField(
+                        name="client_secret", required=True, secret=True
+                    ),
+                ],
+            ),
+        ],
+    )
+
+
+def _mock_session_with_plan_response(content: str) -> MagicMock:
+    """Build a session mock whose `_llm.plan()` returns the given JSON content."""
+    session = MagicMock()
+    plan_response = MagicMock()
+    plan_response.content = content
+    session._llm = MagicMock()
+    session._llm.plan = AsyncMock(return_value=plan_response)
+    return session
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# ConnectionCollector
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestConnectionCollector:
+    def test_initial_state_all_required_missing(self):
+        c = ConnectionCollector(_postgres_engine())
+        assert not c.is_complete
+        assert len(c.missing_required) == 5
+        assert c.next_field is not None
+        assert c.next_field.name == "host"
+
+    def test_fill_simple(self):
+        c = ConnectionCollector(_postgres_engine())
+        assert c.fill("host", "localhost") is True
+        assert c.collected["host"] == "localhost"
+        assert len(c.missing_required) == 4
+        assert c.next_field is not None
+        assert c.next_field.name == "port"
+
+    def test_fill_unknown_field_rejected(self):
+        c = ConnectionCollector(_postgres_engine())
+        assert c.fill("made_up_field", "x") is False
+        assert "made_up_field" not in c.collected
+
+    def test_fill_empty_value_does_nothing(self):
+        c = ConnectionCollector(_postgres_engine())
+        assert c.fill("host", "") is True  # accepted (known field)
+        assert "host" not in c.collected  # but not stored
+
+    def test_fill_many(self):
+        c = ConnectionCollector(_postgres_engine())
+        accepted = c.fill_many(
+            {
+                "host": "db.x",
+                "port": "5432",
+                "user": "admin",
+                "password": "secret",
+                "database": "mydb",
+                "unknown_garbage": "ignored",
+            }
+        )
+        assert set(accepted) == {"host", "port", "user", "password", "database"}
+        assert "unknown_garbage" not in c.collected
+        assert c.is_complete
+
+    def test_complete_when_all_required_filled(self):
+        c = ConnectionCollector(_postgres_engine())
+        c.fill_many(
+            {
+                "host": "x",
+                "port": "5432",
+                "database": "d",
+                "user": "u",
+                "password": "p",
+            }
+        )
+        assert c.is_complete
+        assert c.missing_required == []
+
+    def test_optional_does_not_block_completion(self):
+        c = ConnectionCollector(_postgres_engine())
+        c.fill_many(
+            {
+                "host": "x",
+                "port": "5432",
+                "database": "d",
+                "user": "u",
+                "password": "p",
+            }
+        )
+        assert c.is_complete
+        # schema is optional and unfilled
+        assert any(f.name == "schema" for f in c.missing_optional)
+
+    def test_next_field_falls_back_to_optional_when_required_done(self):
+        c = ConnectionCollector(_postgres_engine())
+        c.fill_many(
+            {"host": "x", "port": "5", "database": "d", "user": "u", "password": "p"}
+        )
+        # All required filled — next_field is the first optional
+        assert c.next_field is not None
+        assert c.next_field.name == "schema"
+
+    def test_auth_method_active_fields(self):
+        engine = _hubspot_choice_engine()
+        pat = engine.auth_methods[0]
+        c = ConnectionCollector(engine, auth_method=pat)
+        # PAT method has only access_token, not client_id/client_secret
+        assert {f.name for f in c.active_fields} == {"access_token"}
+        c.fill("access_token", "abc123")
+        assert c.is_complete
+
+    def test_to_redirect_result(self):
+        c = ConnectionCollector(_postgres_engine())
+        c.fill_many({"host": "db.x", "user": "admin"})
+        c.redirect_message = "actually it's mysql"
+        result = c.to_redirect_result()
+        assert result["status"] == "redirect"
+        assert result["engine"] == "postgres"
+        assert result["engine_display"] == "PostgreSQL"
+        assert result["collected_variables"] == {"host": "db.x", "user": "admin"}
+        assert "port" in result["missing_required"]
+        assert "password" in result["missing_required"]
+        assert result["redirect_message"] == "actually it's mysql"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# extract_variables (LLM-driven)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestExtractVariables:
+    @pytest.mark.asyncio
+    async def test_empty_input_returns_empty(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response("{}")
+        result = await extract_variables(
+            "",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres", "mysql"],
+            session=session,
+        )
+        assert result.variables == {}
+        assert not result.is_redirect
+        # Empty input shouldn't even call the LLM
+        session._llm.plan.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_llm_extracts_variables_from_bulk_input(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            json.dumps(
+                {
+                    "variables": {
+                        "host": "db.example.com",
+                        "port": "5432",
+                        "user": "admin",
+                    },
+                    "is_redirect": False,
+                    "redirect_engine": "",
+                    "redirect_reason": "",
+                }
+            )
+        )
+        result = await extract_variables(
+            "host=db.example.com port=5432 user=admin",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres", "mysql"],
+            session=session,
+        )
+        assert result.variables == {
+            "host": "db.example.com",
+            "port": "5432",
+            "user": "admin",
+        }
+        assert not result.is_redirect
+
+    @pytest.mark.asyncio
+    async def test_llm_parses_connection_string(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            json.dumps(
+                {
+                    "variables": {
+                        "host": "db.example.com",
+                        "port": "5432",
+                        "user": "admin",
+                        "password": "secret",
+                        "database": "mydb",
+                    },
+                    "is_redirect": False,
+                    "redirect_engine": "",
+                    "redirect_reason": "",
+                }
+            )
+        )
+        result = await extract_variables(
+            "postgres://admin:secret@db.example.com:5432/mydb",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        assert result.variables["host"] == "db.example.com"
+        assert result.variables["user"] == "admin"
+        assert result.variables["password"] == "secret"
+        assert result.variables["database"] == "mydb"
+
+    @pytest.mark.asyncio
+    async def test_llm_resolves_aliases(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            json.dumps(
+                {
+                    "variables": {
+                        "host": "db.x",
+                        "user": "admin",
+                        "password": "secret",
+                    },
+                    "is_redirect": False,
+                    "redirect_engine": "",
+                    "redirect_reason": "",
+                }
+            )
+        )
+        result = await extract_variables(
+            "hostname=db.x username=admin pwd=secret",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        assert result.variables == {
+            "host": "db.x",
+            "user": "admin",
+            "password": "secret",
+        }
+
+    @pytest.mark.asyncio
+    async def test_llm_detects_redirect(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            json.dumps(
+                {
+                    "variables": {},
+                    "is_redirect": True,
+                    "redirect_engine": "mysql",
+                    "redirect_reason": "user wants mysql instead",
+                }
+            )
+        )
+        result = await extract_variables(
+            "actually let's use mysql instead",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres", "mysql"],
+            session=session,
+        )
+        assert result.is_redirect
+        assert result.redirect_engine == "mysql"
+        assert "mysql" in result.redirect_reason
+
+    @pytest.mark.asyncio
+    async def test_llm_ignores_fields_not_in_expected_list(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            json.dumps(
+                {
+                    "variables": {
+                        "host": "db.x",
+                        "bogus_field": "should be dropped",
+                    },
+                    "is_redirect": False,
+                    "redirect_engine": "",
+                    "redirect_reason": "",
+                }
+            )
+        )
+        result = await extract_variables(
+            "host=db.x bogus=y",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        assert result.variables == {"host": "db.x"}
+        assert "bogus_field" not in result.variables
+
+    @pytest.mark.asyncio
+    async def test_llm_strips_markdown_fences(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            '```json\n{"variables": {"host": "db.x"}, '
+            '"is_redirect": false, "redirect_engine": "", '
+            '"redirect_reason": ""}\n```'
+        )
+        result = await extract_variables(
+            "host=db.x",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        assert result.variables == {"host": "db.x"}
+
+    @pytest.mark.asyncio
+    async def test_invalid_json_returns_empty_result(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response("this is not JSON at all")
+        result = await extract_variables(
+            "some input",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        # Invalid JSON is caught → empty result, caller will fall back
+        # to treating the raw text as the next field's value
+        assert result.variables == {}
+        assert not result.is_redirect
+
+    @pytest.mark.asyncio
+    async def test_llm_exception_returns_empty_result(self):
+        engine = _postgres_engine()
+        session = MagicMock()
+        session._llm = MagicMock()
+        session._llm.plan = AsyncMock(side_effect=RuntimeError("network error"))
+        result = await extract_variables(
+            "host=db.x",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        # Exception is caught → empty result, never crashes the flow
+        assert result.variables == {}
+        assert not result.is_redirect
+
+    @pytest.mark.asyncio
+    async def test_coerces_numeric_values_to_strings(self):
+        engine = _postgres_engine()
+        session = _mock_session_with_plan_response(
+            json.dumps(
+                {
+                    "variables": {"port": 5432},  # LLM returned int
+                    "is_redirect": False,
+                    "redirect_engine": "",
+                    "redirect_reason": "",
+                }
+            )
+        )
+        result = await extract_variables(
+            "port is 5432",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        assert result.variables == {"port": "5432"}
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# ExtractedData dataclass sanity
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestExtractedData:
+    def test_default_values(self):
+        e = ExtractedData()
+        assert e.variables == {}
+        assert not e.is_redirect
+        assert e.redirect_engine is None
+        assert e.redirect_reason == ""
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index 2326513b..74ab2c1e 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -585,12 +585,12 @@ async def test_unknown_engine_returns_early(
         assert DataVault(vault_dir=vault_dir).list_connections() == []
 
     @pytest.mark.asyncio
-    async def test_partial_save_on_n_answer(self, registry, vault_dir, make_session):
-        """Answering 'n' saves partial credentials and returns without testing."""
+    async def test_partial_save_on_skip(self, registry, vault_dir, make_session):
+        """Answering 'skip' at the bulk prompt saves partial credentials and returns without testing."""
         session = make_session()
         console = MagicMock()
         vault = DataVault(vault_dir=vault_dir)
-        responses = iter(["PostgreSQL", "n", "n", "db.example.com", "", "", "", "", ""])
+        responses = iter(["PostgreSQL", "n", "skip"])
 
         with (
             patch("anton.commands.datasource.DataVault", return_value=vault),
@@ -627,13 +627,11 @@ async def test_successful_connection_saves_and_injects_history(
             [
                 "PostgreSQL",
                 "n",
-                "y",
                 "db.example.com",
                 "5432",
                 "prod_db",
                 "alice",
                 "s3cr3t",
-                "",
             ]
         )
 
@@ -679,13 +677,11 @@ async def test_failed_test_offers_retry(
             [
                 "PostgreSQL",
                 "n",
-                "y",
                 "db.example.com",
                 "5432",
                 "prod_db",
                 "alice",
                 "wrongpassword",
-                "",
                 "y",
                 "correctpassword",
             ]
@@ -723,13 +719,11 @@ async def test_failed_test_no_retry_returns_without_saving(
             [
                 "PostgreSQL",
                 "n",
-                "y",
                 "db.example.com",
                 "5432",
                 "prod_db",
                 "alice",
                 "badpass",
-                "",
                 "n",
             ]
         )
@@ -765,13 +759,11 @@ async def test_ds_env_injected_after_successful_connect(
             [
                 "PostgreSQL",
                 "n",
-                "y",
                 "db.example.com",
                 "5432",
                 "prod_db",
                 "alice",
                 "s3cr3t",
-                "",
             ]
         )
 
@@ -800,7 +792,7 @@ async def test_auth_method_choice_selects_fields(
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
 
-        responses = iter(["HubSpot", "1", "n", "y", "pat-na1-abc123"])
+        responses = iter(["HubSpot", "1", "n", "pat-na1-abc123"])
 
         with (
             patch("anton.commands.datasource.DataVault", return_value=vault),
@@ -822,14 +814,24 @@ async def test_auth_method_choice_selects_fields(
         assert "client_secret" not in saved
 
     @pytest.mark.asyncio
-    async def test_selective_field_collection(
+    async def test_bulk_key_value_extraction(
         self, registry, vault_dir, make_session, make_cell, make_pad
     ):
-        """Typing 'host,user,password' collects only those three fields."""
+        """A single bulk response with key=value pairs fills multiple fields at once."""
         session = make_session()
         console = MagicMock()
         vault = DataVault(vault_dir=vault_dir)
 
+        # Mock the LLM to return a structured JSON extraction when it sees
+        # the bulk key=value string.
+        bulk_response = MagicMock()
+        bulk_response.content = (
+            '{"variables": {"host": "db.example.com", "port": "5432", '
+            '"database": "prod_db", "user": "alice"}, '
+            '"is_redirect": false, "redirect_engine": "", "redirect_reason": ""}'
+        )
+        session._llm.plan = AsyncMock(return_value=bulk_response)
+
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
 
@@ -837,10 +839,8 @@ async def test_selective_field_collection(
             [
                 "PostgreSQL",
                 "n",
-                "host,user,password",
-                "db.example.com",
-                "alice",
-                "s3cr3t",
+                "host=db.example.com port=5432 database=prod_db user=alice",
+                "s3cr3t",  # only password remains → single-field prompt
             ]
         )
 
@@ -858,7 +858,11 @@ async def test_selective_field_collection(
         assert len(conns) == 1
         saved = vault.load("postgresql", conns[0]["name"])
         assert saved is not None
-        assert set(saved.keys()) == {"host", "user", "password"}
+        assert saved["host"] == "db.example.com"
+        assert saved["port"] == "5432"
+        assert saved["database"] == "prod_db"
+        assert saved["user"] == "alice"
+        assert saved["password"] == "s3cr3t"
 
 
 class TestCredentialScrubbing:
@@ -926,13 +930,11 @@ async def test_register_and_scrub_on_connect(
             [
                 "PostgreSQL",
                 "n",
-                "y",
                 "db.host.com",
                 "5432",
                 "mydb",
                 "alice",
                 secret_pw,
-                "public",
             ]
         )
 
@@ -1483,13 +1485,11 @@ async def test_connect_clears_previous_ds_vars(
             [
                 "PostgreSQL",
                 "n",
-                "y",
                 "db.example.com",
                 "5432",
                 "prod_db",
                 "alice",
                 "s3cr3t",
-                "",
             ]
         )
 
@@ -2569,7 +2569,6 @@ def _capture(label, **kw):
             "(reconnect/cancel)",
             "(anton) Would you like to re-enter your credentials? (y/n)",
             "(anton) Use this datasource? (y/n)",
-            "(anton) Do you have these available? (y/n/<list params>)",
             "(anton) (reconnect/cancel)",
         ],
     )

From 294d0739612c7b44dea70cfc9331cddbb88c6457 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 18:43:22 +0200
Subject: [PATCH 076/134] Lint

---
 anton/chat.py                          |   8 +-
 anton/chat_session.py                  |   7 +-
 anton/cli.py                           |   5 +-
 anton/core/backends/base.py            |  10 +-
 anton/core/backends/local.py           |  45 +++++---
 anton/core/backends/scratchpad_boot.py | 137 ++++++++++++++++++-------
 anton/core/llm/anthropic.py            |  29 +++++-
 anton/core/llm/client.py               |  12 ++-
 anton/core/llm/openai.py               |  94 +++++++++++------
 anton/core/llm/prompt_builder.py       |   9 +-
 anton/core/llm/provider.py             |   4 +
 anton/core/settings.py                 |   9 +-
 anton/core/tools/tool_defs.py          |  41 +++++---
 anton/core/tools/tool_handlers.py      |  18 ++--
 anton/core/utils/scratchpad.py         |   2 +-
 tests/test_chat.py                     |  13 +--
 tests/test_chat_context.py             |  39 +++----
 tests/test_chat_scratchpad.py          |  20 ++--
 tests/test_datasource.py               |   3 +-
 19 files changed, 325 insertions(+), 180 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 7f28a398..4316d04b 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -17,7 +17,7 @@
     parse_dropped_paths as _parse_dropped_paths,
     save_clipboard_image,
 )
-from anton.core.session import ChatSession
+from anton.core.session import ChatSession, ChatSessionConfig
 from anton.core.llm.provider import (
     TokenLimitExceeded,
     StreamComplete,
@@ -984,8 +984,8 @@ async def _chat_loop(
         if settings.coding_provider == "anthropic"
         else settings.openai_api_key
     ) or ""
-    session = ChatSession(
-        state["llm_client"],
+    session = ChatSession(ChatSessionConfig(
+        llm_client=state["llm_client"],
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
@@ -999,7 +999,7 @@ async def _chat_loop(
         session_id=current_session_id,
         proactive_dashboards=settings.proactive_dashboards,
         tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL],
-    )
+    ))
 
     # Handle --resume flag at startup
     if resume:
diff --git a/anton/chat_session.py b/anton/chat_session.py
index b06a64c9..9c8b5e5d 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -67,6 +67,7 @@ def rebuild_session(
     """Rebuild LLMClient + ChatSession after settings change."""
     from anton.core.llm.client import LLMClient
     from anton.chat import ChatSession
+    from anton.core.session import ChatSessionConfig
 
     state["llm_client"] = LLMClient.from_settings(settings)
 
@@ -84,8 +85,8 @@ def rebuild_session(
         if settings.coding_provider == "anthropic"
         else settings.openai_api_key
     ) or ""
-    return ChatSession(
-        state["llm_client"],
+    return ChatSession(ChatSessionConfig(
+        llm_client=state["llm_client"],
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
@@ -99,4 +100,4 @@ def rebuild_session(
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
         output_dir=settings.output_dir,
-    )
+    ))
diff --git a/anton/cli.py b/anton/cli.py
index 940bf01a..d484eaa6 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -24,6 +24,7 @@
 from anton.core.llm.openai import build_chat_completion_kwargs
 
 from anton.chat import ChatSession
+from anton.core.session import ChatSessionConfig
 from anton.core.llm.client import LLMClient
 from anton.core.backends.manager import ScratchpadManager
 
@@ -1170,7 +1171,7 @@ def connect_data_source(
         )
         or "",
     )
-    session = ChatSession(llm_client)
+    session = ChatSession(ChatSessionConfig(llm_client=llm_client))
 
     async def _run() -> None:
         await handle_connect_datasource(
@@ -1214,7 +1215,7 @@ def edit_data_source(
         )
         or "",
     )
-    session = ChatSession(llm_client)
+    session = ChatSession(ChatSessionConfig(llm_client=llm_client))
 
     async def _run() -> None:
         await handle_connect_datasource(
diff --git a/anton/core/backends/base.py b/anton/core/backends/base.py
index cfc7f434..51a267de 100644
--- a/anton/core/backends/base.py
+++ b/anton/core/backends/base.py
@@ -12,10 +12,10 @@
 from pathlib import Path
 
 
-
 @dataclass
 class Cell:
     """A single scratchpad execution unit — code in, outputs out."""
+
     code: str
     stdout: str
     stderr: str
@@ -203,6 +203,7 @@ def _compact_cells(self) -> bool:
         True if compaction actually happened.
         """
         from anton.core.settings import CoreSettings
+
         keep = CoreSettings().cell_keep_recent
         if len(self.cells) <= keep + 1:
             return False
@@ -220,9 +221,8 @@ def _compact_cells(self) -> bool:
                 first_line = output.strip().split("\n")[0][:120]
             summary_lines.append(f"  [{status}] {desc}: {first_line}")
 
-        summary_text = (
-            f"# Compacted {len(to_compact)} earlier cells:\n"
-            + "\n".join(summary_lines)
+        summary_text = f"# Compacted {len(to_compact)} earlier cells:\n" + "\n".join(
+            summary_lines
         )
         summary_cell = Cell(
             code="# (compacted — see summary above)",
@@ -232,4 +232,4 @@ def _compact_cells(self) -> bool:
             description=f"Summary of cells 1–{len(to_compact)}",
         )
         self.cells = [summary_cell] + recent
-        return True
\ No newline at end of file
+        return True
diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index 16997bd6..bed8801e 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -74,8 +74,6 @@ def __init__(
         else:
             self._venvs_base = Path("~/.anton/scratchpad-venvs").expanduser()
 
-    # ── venv management ────────────────────────────────────────────────────
-
     def _ensure_venv(self) -> None:
         if self._venv_dir is not None and self._verify_venv_python():
             return
@@ -138,9 +136,14 @@ def _create_venv(self) -> None:
         if uv:
             _sp.run(
                 [
-                    uv, "venv", self._venv_dir,
-                    "--python", sys.executable,
-                    "--system-site-packages", "--seed", "--quiet",
+                    uv,
+                    "venv",
+                    self._venv_dir,
+                    "--python",
+                    sys.executable,
+                    "--system-site-packages",
+                    "--seed",
+                    "--quiet",
                 ],
                 check=True,
                 capture_output=True,
@@ -169,6 +172,7 @@ def _verify_venv_python(self) -> bool:
             return False
         try:
             import subprocess
+
             result = subprocess.run(
                 [self._venv_python, "-c", "print('ok')"],
                 capture_output=True,
@@ -191,12 +195,19 @@ def _add_windows_firewall_rule(self) -> None:
         if self._venv_python is None or not os.path.isfile(self._venv_python):
             return
         import subprocess as _sp
+
         rule_name = f"Anton Scratchpad - {self.name}"
         try:
             _sp.run(
                 [
-                    "netsh", "advfirewall", "firewall", "add", "rule",
-                    f"name={rule_name}", "dir=out", "action=allow",
+                    "netsh",
+                    "advfirewall",
+                    "firewall",
+                    "add",
+                    "rule",
+                    f"name={rule_name}",
+                    "dir=out",
+                    "action=allow",
                     f"program={self._venv_python}",
                 ],
                 capture_output=True,
@@ -209,6 +220,7 @@ def _add_windows_firewall_rule(self) -> None:
     def _setup_parent_site_packages(self) -> None:
         if sys.prefix != sys.base_prefix:
             import site as _site
+
             parent_site = _site.getsitepackages()
             child_site = None
             for dirpath, dirnames, _ in os.walk(self._venv_dir):
@@ -287,8 +299,6 @@ def _check_python_version(self) -> bool:
         except FileNotFoundError:
             return False
 
-    # ── Lifecycle ──────────────────────────────────────────────────────────
-
     async def start(self) -> None:
         """Write the boot script to a temp file and launch the subprocess."""
         self._ensure_venv()
@@ -347,8 +357,8 @@ async def start(self) -> None:
         _anton_root = str(Path(__file__).resolve().parent.parent.parent.parent)
         python_path = env.get("PYTHONPATH", "")
         if _anton_root not in python_path:
-            env["PYTHONPATH"] = (
-                _anton_root + (os.pathsep + python_path if python_path else "")
+            env["PYTHONPATH"] = _anton_root + (
+                os.pathsep + python_path if python_path else ""
             )
 
         try:
@@ -410,8 +420,6 @@ async def cleanup(self) -> None:
         await self._stop_process()
         self._nuke_venv()
 
-    # ── Execution ──────────────────────────────────────────────────────────
-
     async def execute_streaming(
         self,
         code: str,
@@ -558,7 +566,11 @@ async def _read_result(
                 ) from None
 
             if not raw:
-                yield {"stdout": "", "stderr": "", "error": "Process exited unexpectedly."}
+                yield {
+                    "stdout": "",
+                    "stderr": "",
+                    "error": "Process exited unexpectedly.",
+                }
                 return
 
             line = raw.decode().rstrip("\r\n")
@@ -567,7 +579,7 @@ async def _read_result(
                 current_inactivity = max(
                     current_inactivity, float(s.cell_inactivity_after_progress)
                 )
-                message = line[len(PROGRESS_MARKER):].strip()
+                message = line[len(PROGRESS_MARKER) :].strip()
                 yield message
                 continue
 
@@ -631,8 +643,6 @@ async def install_packages(self, packages: list[str]) -> str:
             self._installed_packages.add(p.lower())
         return output
 
-    # ── Internal helpers ───────────────────────────────────────────────────
-
     async def _stop_process(self) -> None:
         if self._proc is not None and self._proc.returncode is None:
             try:
@@ -662,6 +672,7 @@ def _kill_tree(self) -> None:
         pid = self._proc.pid
         if sys.platform != "win32":
             import signal
+
             try:
                 os.killpg(pid, signal.SIGKILL)
             except (ProcessLookupError, PermissionError):
diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
index 814a5501..c627a5f7 100644
--- a/anton/core/backends/scratchpad_boot.py
+++ b/anton/core/backends/scratchpad_boot.py
@@ -19,19 +19,27 @@
     try:
         import asyncio as _llm_asyncio
 
-        _scratchpad_provider_name = os.environ.get("ANTON_SCRATCHPAD_PROVIDER", "anthropic")
+        _scratchpad_provider_name = os.environ.get(
+            "ANTON_SCRATCHPAD_PROVIDER", "anthropic"
+        )
         if _scratchpad_provider_name in ("openai", "openai-compatible"):
             from anton.core.llm.openai import OpenAIProvider as _ProviderClass
         else:
             from anton.core.llm.anthropic import AnthropicProvider as _ProviderClass
 
-        _llm_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
+        _llm_ssl_verify = (
+            os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
+        )
         if _scratchpad_provider_name in ("openai", "openai-compatible"):
             # Explicitly pass base_url so Minds/openai-compatible endpoints work.
             # The OpenAI SDK may or may not pick up OPENAI_BASE_URL from env,
             # so we pass it directly to be safe.
-            _llm_base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get("ANTON_OPENAI_BASE_URL")
-            _llm_api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("ANTON_OPENAI_API_KEY")
+            _llm_base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get(
+                "ANTON_OPENAI_BASE_URL"
+            )
+            _llm_api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get(
+                "ANTON_OPENAI_API_KEY"
+            )
             _llm_provider = _ProviderClass(
                 api_key=_llm_api_key or None,
                 base_url=_llm_base_url or None,
@@ -50,6 +58,7 @@ async def _run_with_heartbeat(coro):
             scratchpad inactivity timeout (30s) kills the process.  This
             wrapper runs a heartbeat task alongside the real work.
             """
+
             async def _heartbeat():
                 elapsed = 0
                 while True:
@@ -77,24 +86,30 @@ class _ScratchpadLLM:
             def model(self):
                 return _llm_model
 
-            def complete(self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096):
+            def complete(
+                self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096
+            ):
                 """Call the LLM synchronously. Returns an LLMResponse.
 
                 Automatically emits progress heartbeats every 10s so that
                 long API calls don't trip the scratchpad inactivity timeout.
                 """
-                return _llm_asyncio.run(_run_with_heartbeat(
-                    _llm_provider.complete(
-                        model=_llm_model,
-                        system=system,
-                        messages=messages,
-                        tools=tools,
-                        tool_choice=tool_choice,
-                        max_tokens=max_tokens,
+                return _llm_asyncio.run(
+                    _run_with_heartbeat(
+                        _llm_provider.complete(
+                            model=_llm_model,
+                            system=system,
+                            messages=messages,
+                            tools=tools,
+                            tool_choice=tool_choice,
+                            max_tokens=max_tokens,
+                        )
                     )
-                ))
+                )
 
-            async def complete_async(self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096):
+            async def complete_async(
+                self, *, system, messages, tools=None, tool_choice=None, max_tokens=4096
+            ):
                 """Call the LLM asynchronously. Returns an LLMResponse.
 
                 Use this inside async code (e.g. asyncio.gather) for concurrent
@@ -111,7 +126,9 @@ async def complete_async(self, *, system, messages, tools=None, tool_choice=None
                     )
                 )
 
-            def generate_object(self, schema_class, *, system, messages, max_tokens=4096):
+            def generate_object(
+                self, schema_class, *, system, messages, max_tokens=4096
+            ):
                 """Generate a structured object matching a Pydantic model.
 
                 Uses tool_choice to force the LLM to return structured data.
@@ -128,7 +145,10 @@ def generate_object(self, schema_class, *, system, messages, max_tokens=4096):
                 """
                 from pydantic import BaseModel as _BaseModel
 
-                is_list = hasattr(schema_class, "__origin__") and schema_class.__origin__ is list
+                is_list = (
+                    hasattr(schema_class, "__origin__")
+                    and schema_class.__origin__ is list
+                )
                 if is_list:
                     inner_class = schema_class.__args__[0]
 
@@ -159,6 +179,7 @@ class _ArrayWrapper(_BaseModel):
                     raise ValueError("LLM did not return structured output.")
 
                 import json as _json
+
                 raw = response.tool_calls[0].input
 
                 if is_list:
@@ -172,7 +193,9 @@ def get_llm():
             """Get a pre-configured LLM client. No API keys needed."""
             return _scratchpad_llm_instance
 
-        def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_tokens=4096):
+        def agentic_loop(
+            *, system, user_message, tools, handle_tool, max_turns=10, max_tokens=4096
+        ):
             """Run a synchronous LLM tool-call loop.
 
             The LLM reasons, calls tools via handle_tool(name, inputs) -> str,
@@ -209,12 +232,14 @@ def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_
                 if response.content:
                     assistant_content.append({"type": "text", "text": response.content})
                 for tc in response.tool_calls:
-                    assistant_content.append({
-                        "type": "tool_use",
-                        "id": tc.id,
-                        "name": tc.name,
-                        "input": tc.input,
-                    })
+                    assistant_content.append(
+                        {
+                            "type": "tool_use",
+                            "id": tc.id,
+                            "name": tc.name,
+                            "input": tc.input,
+                        }
+                    )
                 messages.append({"role": "assistant", "content": assistant_content})
 
                 # Execute each tool and collect results
@@ -224,11 +249,13 @@ def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_
                         result = handle_tool(tc.name, tc.input)
                     except Exception as exc:
                         result = f"Error: {exc}"
-                    tool_results.append({
-                        "type": "tool_result",
-                        "tool_use_id": tc.id,
-                        "content": result,
-                    })
+                    tool_results.append(
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": tc.id,
+                            "content": result,
+                        }
+                    )
                 messages.append({"role": "user", "content": tool_results})
 
             # Hit max_turns
@@ -248,7 +275,9 @@ def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_
         import ssl as _minds_ssl
         import urllib.request as _minds_urllib
 
-        _minds_ssl_verify = os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
+        _minds_ssl_verify = (
+            os.environ.get("ANTON_MINDS_SSL_VERIFY", "true").lower() != "false"
+        )
 
         def query_minds_data(query, datasource=None):
             """Query a Minds datasource with SQL. Returns dict with type, data, column_names, error_message."""
@@ -260,7 +289,10 @@ def query_minds_data(query, datasource=None):
             req.add_header("Authorization", f"Bearer {_minds_api_key}")
             req.add_header("Content-Type", "application/json")
             req.add_header("Accept", "application/json")
-            req.add_header("User-Agent", "Mozilla/5.0 (compatible; Anton/1.0; +https://github.com/mindsdb/anton)")
+            req.add_header(
+                "User-Agent",
+                "Mozilla/5.0 (compatible; Anton/1.0; +https://github.com/mindsdb/anton)",
+            )
             req.add_header("Accept-Language", "en-US,en;q=0.9")
             req.add_header("Accept-Encoding", "identity")
             req.add_header("Connection", "keep-alive")
@@ -306,13 +338,16 @@ def query_minds_data(query, datasource=None):
 
 _MAX_OUTPUT = 10_000
 
+
 def progress(message=""):
     """Signal that long-running work is still active. Resets the inactivity timer."""
     _real_stdout.write(PROGRESS_MARKER + " " + str(message) + "\n")
     _real_stdout.flush()
 
+
 namespace["progress"] = progress
 
+
 def sample(var, mode="preview", _name=None):
     """Inspect a variable with type-aware formatting.
 
@@ -335,6 +370,7 @@ def sample(var, mode="preview", _name=None):
 
     try:
         import pandas as _pd
+
         if isinstance(var, _pd.DataFrame):
             lines.append(f"Shape: {var.shape[0]} rows x {var.shape[1]} cols")
             lines.append(f"Columns: {list(var.columns)}")
@@ -373,6 +409,7 @@ def sample(var, mode="preview", _name=None):
 
     try:
         import numpy as _np
+
         if isinstance(var, _np.ndarray):
             lines.append(f"Shape: {var.shape}, Dtype: {var.dtype}")
             if mode == "preview":
@@ -381,9 +418,13 @@ def sample(var, mode="preview", _name=None):
                 lines.append(f"First {n} values: {flat[:n].tolist()}")
                 if len(flat) > 10:
                     lines.append(f"Last 3 values: {flat[-3:].tolist()}")
-                lines.append(f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}")
+                lines.append(
+                    f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}"
+                )
             else:
-                lines.append(f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}, Std: {var.std():.4g}")
+                lines.append(
+                    f"Min: {var.min()}, Max: {var.max()}, Mean: {var.mean():.4g}, Std: {var.std():.4g}"
+                )
                 lines.append(f"\n{repr(var)}")
             print(_truncate_sample("\n".join(lines), limit))
             return
@@ -405,6 +446,7 @@ def sample(var, mode="preview", _name=None):
                 lines.append(f"  {k!r}: {val_repr}")
         else:
             import json as _json
+
             try:
                 lines.append(_json.dumps(var, indent=2, default=str))
             except (TypeError, ValueError):
@@ -416,8 +458,14 @@ def sample(var, mode="preview", _name=None):
         kind = type(var).__name__
         lines.append(f"Length: {len(var)}")
         if len(var) > 0:
-            lines.append(f"Item types: {type(var[0]).__name__}" +
-                         (f" (mixed)" if len(var) > 1 and type(var[0]) != type(var[-1]) else ""))
+            lines.append(
+                f"Item types: {type(var[0]).__name__}"
+                + (
+                    f" (mixed)"
+                    if len(var) > 1 and type(var[0]) != type(var[-1])
+                    else ""
+                )
+            )
         if mode == "preview":
             n = min(5, len(var))
             for i in range(n):
@@ -507,8 +555,10 @@ def _truncate_sample(text, max_chars):
 # warnings, and errors from libraries.
 import logging as _logging
 
+
 class _CellLogHandler(_logging.Handler):
     """Logging handler that writes to whichever StringIO is current."""
+
     def __init__(self):
         super().__init__(level=_logging.INFO)
         self.buf = None
@@ -521,6 +571,7 @@ def emit(self, record):
             except Exception:
                 pass
 
+
 _cell_log_handler = _CellLogHandler()
 _logging.root.addHandler(_cell_log_handler)
 _logging.root.setLevel(_logging.INFO)
@@ -576,19 +627,24 @@ def emit(self, record):
             sys.stdout = _real_stdout
             sys.stderr = sys.__stderr__
             _cell_log_handler.buf = None
-            _real_stdout.write(PROGRESS_MARKER + " " + f"Installing {_missing}..." + "\n")
+            _real_stdout.write(
+                PROGRESS_MARKER + " " + f"Installing {_missing}..." + "\n"
+            )
             _real_stdout.flush()
             import subprocess as _sp
+
             _uv_path = os.environ.get("ANTON_UV_PATH", "")
             if _uv_path:
                 _pip = _sp.run(
                     [_uv_path, "pip", "install", "--python", sys.executable, _missing],
-                    capture_output=True, timeout=120,
+                    capture_output=True,
+                    timeout=120,
                 )
             else:
                 _pip = _sp.run(
                     [sys.executable, "-m", "pip", "install", _missing],
-                    capture_output=True, timeout=120,
+                    capture_output=True,
+                    timeout=120,
                 )
             # Reset buffers and retry
             out_buf = io.StringIO()
@@ -619,7 +675,10 @@ def emit(self, record):
 
     stdout_val = out_buf.getvalue()
     if len(stdout_val) > _MAX_OUTPUT:
-        stdout_val = stdout_val[:_MAX_OUTPUT] + f"\n\n... (truncated, {len(stdout_val)} chars total)"
+        stdout_val = (
+            stdout_val[:_MAX_OUTPUT]
+            + f"\n\n... (truncated, {len(stdout_val)} chars total)"
+        )
     result = {
         "stdout": stdout_val,
         "stderr": err_buf.getvalue(),
diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
index 7d61f132..264e4f1f 100644
--- a/anton/core/llm/anthropic.py
+++ b/anton/core/llm/anthropic.py
@@ -57,10 +57,15 @@ async def complete(
                 raise ContextOverflowError(str(exc)) from exc
             raise
         except anthropic.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+            if (
+                exc.status_code == 429
+                and isinstance(exc.body, dict)
+                and exc.body.get("detail")
+            ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
+
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
@@ -132,7 +137,12 @@ async def stream(
                         idx = event.index
                         block = event.content_block
                         if block.type == "tool_use":
-                            blocks[idx] = {"type": "tool_use", "id": block.id, "name": block.name, "json_parts": []}
+                            blocks[idx] = {
+                                "type": "tool_use",
+                                "id": block.id,
+                                "name": block.name,
+                                "json_parts": [],
+                            }
                             yield StreamToolUseStart(id=block.id, name=block.name)
                         else:
                             blocks[idx] = {"type": "text"}
@@ -147,7 +157,9 @@ async def stream(
                             info = blocks.get(idx, {})
                             if info.get("type") == "tool_use":
                                 info["json_parts"].append(delta.partial_json)
-                                yield StreamToolUseDelta(id=info["id"], json_delta=delta.partial_json)
+                                yield StreamToolUseDelta(
+                                    id=info["id"], json_delta=delta.partial_json
+                                )
 
                     elif event.type == "content_block_stop":
                         idx = event.index
@@ -156,7 +168,9 @@ async def stream(
                             raw_json = "".join(info["json_parts"])
                             parsed_input = json.loads(raw_json) if raw_json else {}
                             tool_calls.append(
-                                ToolCall(id=info["id"], name=info["name"], input=parsed_input)
+                                ToolCall(
+                                    id=info["id"], name=info["name"], input=parsed_input
+                                )
                             )
                             yield StreamToolUseEnd(id=info["id"])
 
@@ -169,10 +183,15 @@ async def stream(
                 raise ContextOverflowError(str(exc)) from exc
             raise
         except anthropic.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+            if (
+                exc.status_code == 429
+                and isinstance(exc.body, dict)
+                and exc.body.get("detail")
+            ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
+
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py
index a96c8ae7..df368ffb 100644
--- a/anton/core/llm/client.py
+++ b/anton/core/llm/client.py
@@ -91,8 +91,16 @@ def from_settings(cls, settings: AntonSettings) -> LLMClient:
 
         providers = {
             "anthropic": lambda: AnthropicProvider(api_key=settings.anthropic_api_key),
-            "openai": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify),
-            "openai-compatible": lambda: OpenAIProvider(api_key=settings.openai_api_key, base_url=settings.openai_base_url, ssl_verify=settings.minds_ssl_verify),
+            "openai": lambda: OpenAIProvider(
+                api_key=settings.openai_api_key,
+                base_url=settings.openai_base_url,
+                ssl_verify=settings.minds_ssl_verify,
+            ),
+            "openai-compatible": lambda: OpenAIProvider(
+                api_key=settings.openai_api_key,
+                base_url=settings.openai_base_url,
+                ssl_verify=settings.minds_ssl_verify,
+            ),
         }
 
         planning_factory = providers.get(settings.planning_provider)
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
index 95440393..c9425a3f 100644
--- a/anton/core/llm/openai.py
+++ b/anton/core/llm/openai.py
@@ -25,14 +25,16 @@ def _translate_tools(tools: list[dict]) -> list[dict]:
     """Anthropic tool format -> OpenAI function-calling format."""
     result = []
     for tool in tools:
-        result.append({
-            "type": "function",
-            "function": {
-                "name": tool["name"],
-                "description": tool.get("description", ""),
-                "parameters": tool.get("input_schema", {}),
-            },
-        })
+        result.append(
+            {
+                "type": "function",
+                "function": {
+                    "name": tool["name"],
+                    "description": tool.get("description", ""),
+                    "parameters": tool.get("input_schema", {}),
+                },
+            }
+        )
     return result
 
 
@@ -99,14 +101,16 @@ def _translate_assistant_blocks(blocks: list[dict]) -> list[dict]:
         if block.get("type") == "text":
             text_parts.append(block["text"])
         elif block.get("type") == "tool_use":
-            tool_calls.append({
-                "id": block["id"],
-                "type": "function",
-                "function": {
-                    "name": block["name"],
-                    "arguments": json.dumps(block.get("input", {})),
-                },
-            })
+            tool_calls.append(
+                {
+                    "id": block["id"],
+                    "type": "function",
+                    "function": {
+                        "name": block["name"],
+                        "arguments": json.dumps(block.get("input", {})),
+                    },
+                }
+            )
 
     msg: dict = {"role": "assistant"}
     content = "\n".join(text_parts) if text_parts else None
@@ -133,11 +137,13 @@ def _translate_user_blocks(blocks: list[dict]) -> list[dict]:
                 content = "\n".join(
                     b.get("text", "") for b in content if b.get("type") == "text"
                 )
-            result.append({
-                "role": "tool",
-                "tool_call_id": block["tool_use_id"],
-                "content": str(content),
-            })
+            result.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": block["tool_use_id"],
+                    "content": str(content),
+                }
+            )
         elif block.get("type") == "text":
             content_parts.append({"type": "text", "text": block.get("text", "")})
         elif block.get("type") == "image":
@@ -146,18 +152,22 @@ def _translate_user_blocks(blocks: list[dict]) -> list[dict]:
             if source.get("type") == "base64":
                 media_type = source.get("media_type", "image/png")
                 data = source.get("data", "")
-                content_parts.append({
-                    "type": "image_url",
-                    "image_url": {"url": f"data:{media_type};base64,{data}"},
-                })
+                content_parts.append(
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:{media_type};base64,{data}"},
+                    }
+                )
 
     if content_parts:
         # If only text parts, flatten to a simple string for compatibility
         if all(p.get("type") == "text" for p in content_parts):
-            result.append({
-                "role": "user",
-                "content": "\n".join(p["text"] for p in content_parts),
-            })
+            result.append(
+                {
+                    "role": "user",
+                    "content": "\n".join(p["text"] for p in content_parts),
+                }
+            )
         else:
             result.append({"role": "user", "content": content_parts})
 
@@ -231,10 +241,15 @@ async def complete(
                 raise ContextOverflowError(str(exc)) from exc
             raise
         except openai.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+            if (
+                exc.status_code == 429
+                and isinstance(exc.body, dict)
+                and exc.body.get("detail")
+            ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
+
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
@@ -256,7 +271,9 @@ async def complete(
                     ToolCall(
                         id=tc.id,
                         name=tc.function.name,
-                        input=json.loads(tc.function.arguments) if tc.function.arguments else {},
+                        input=json.loads(tc.function.arguments)
+                        if tc.function.arguments
+                        else {},
                     )
                 )
 
@@ -331,7 +348,9 @@ async def stream(
                             # New tool call
                             tc_state[idx] = {
                                 "id": tc_delta.id or "",
-                                "name": tc_delta.function.name if tc_delta.function and tc_delta.function.name else "",
+                                "name": tc_delta.function.name
+                                if tc_delta.function and tc_delta.function.name
+                                else "",
                                 "args_parts": [],
                             }
                             if tc_state[idx]["id"] and tc_state[idx]["name"]:
@@ -348,7 +367,9 @@ async def stream(
 
                         # Accumulate argument fragments
                         if tc_delta.function and tc_delta.function.arguments:
-                            tc_state[idx]["args_parts"].append(tc_delta.function.arguments)
+                            tc_state[idx]["args_parts"].append(
+                                tc_delta.function.arguments
+                            )
                             yield StreamToolUseDelta(
                                 id=tc_state[idx]["id"],
                                 json_delta=tc_delta.function.arguments,
@@ -359,10 +380,15 @@ async def stream(
                 raise ContextOverflowError(str(exc)) from exc
             raise
         except openai.APIStatusError as exc:
-            if exc.status_code == 429 and isinstance(exc.body, dict) and exc.body.get("detail"):
+            if (
+                exc.status_code == 429
+                and isinstance(exc.body, dict)
+                and exc.body.get("detail")
+            ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
                 msg += " Visit https://mdb.ai to upgrade or top up your tokens."
                 from .provider import TokenLimitExceeded
+
                 raise TokenLimitExceeded(msg) from exc
             else:
                 msg = f"Server returned {exc.status_code} — the LLM endpoint may be temporarily unavailable. Try again in a moment."
diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index 867e9e23..960a8b61 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -4,8 +4,8 @@
 from typing import TYPE_CHECKING
 
 from .prompts import (
-    BASE_VISUALIZATIONS_PROMPT, 
-    CHAT_SYSTEM_PROMPT, 
+    BASE_VISUALIZATIONS_PROMPT,
+    CHAT_SYSTEM_PROMPT,
     VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT,
     VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT,
 )
@@ -54,7 +54,9 @@ def _build_visualizations_section(
             else VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT
         )
         # The output-format prompt can reference `{output_path}`.
-        output_format = visualizations_output_format_prompt.format(output_path=output_path)
+        output_format = visualizations_output_format_prompt.format(
+            output_path=output_path
+        )
         return BASE_VISUALIZATIONS_PROMPT.format(output_format=output_format)
 
     def build(
@@ -100,4 +102,3 @@ def build(
 
 
 __all__ = ["ChatSystemPromptBuilder"]
-
diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py
index ceae40bf..02bf93de 100644
--- a/anton/core/llm/provider.py
+++ b/anton/core/llm/provider.py
@@ -27,6 +27,7 @@ class LLMResponse:
     usage: Usage = field(default_factory=Usage)
     stop_reason: str | None = None
 
+
 @dataclass
 class StreamTextDelta:
     text: str
@@ -57,6 +58,7 @@ class StreamComplete:
 @dataclass
 class StreamTaskProgress:
     """Progress event from agent task execution (planning, building, executing)."""
+
     phase: str
     message: str
     eta_seconds: float | None = None
@@ -65,12 +67,14 @@ class StreamTaskProgress:
 @dataclass
 class StreamToolResult:
     """Tool result that should be displayed to the user (e.g. scratchpad dump)."""
+
     content: str
 
 
 @dataclass
 class StreamContextCompacted:
     """Notification that context was compacted to free up space."""
+
     message: str
 
 
diff --git a/anton/core/settings.py b/anton/core/settings.py
index 79e62a00..ff5186ee 100644
--- a/anton/core/settings.py
+++ b/anton/core/settings.py
@@ -1,5 +1,6 @@
 from pydantic_settings import BaseSettings
 
+
 #
 class CoreSettings(BaseSettings):
     model_config = {"env_prefix": "ANTON_", "extra": "ignore"}
@@ -13,8 +14,8 @@ class CoreSettings(BaseSettings):
     token_status_cache_ttl: float = 60.0
 
     # Scratchpad execution tuning
-    cell_timeout_default: int = 120       # Total timeout when no estimate given (s)
-    cell_inactivity_timeout: int = 30     # Max silence between output lines (s)
+    cell_timeout_default: int = 120  # Total timeout when no estimate given (s)
+    cell_inactivity_timeout: int = 30  # Max silence between output lines (s)
     cell_inactivity_after_progress: int = 60  # Grace window after progress() call (s)
-    cell_install_timeout: int = 120       # pip/uv install timeout (s)
-    cell_keep_recent: int = 5             # Recent cells preserved during compaction
+    cell_install_timeout: int = 120  # pip/uv install timeout (s)
+    cell_keep_recent: int = 5  # Recent cells preserved during compaction
diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py
index fb3ba4c9..d4319619 100644
--- a/anton/core/tools/tool_defs.py
+++ b/anton/core/tools/tool_defs.py
@@ -1,4 +1,8 @@
-from anton.core.tools.tool_handlers import handle_scratchpad, handle_memorize, handle_recall
+from anton.core.tools.tool_handlers import (
+    handle_scratchpad,
+    handle_memorize,
+    handle_recall,
+)
 
 from dataclasses import dataclass
 from typing import Callable, Optional
@@ -10,12 +14,14 @@ class ToolDef:
     description: str
     input_schema: dict
     handler: Callable  # async (session, tc_input) -> str
-    prompt: Optional[str] = None  # Optional prompt relevant to the tool to be injected into the system prompt.
+    prompt: Optional[str] = (
+        None  # Optional prompt relevant to the tool to be injected into the system prompt.
+    )
 
 
 SCRATCHPAD_TOOL = ToolDef(
-    name = "scratchpad",
-    description = (
+    name="scratchpad",
+    description=(
         "Run Python code in a persistent scratchpad. Use this whenever you need to "
         "count characters, do math, parse data, transform text, or any task that "
         "benefits from precise computation rather than guessing. Variables, imports, "
@@ -51,10 +57,13 @@ class ToolDef:
         "for every exec call. For very long operations, provide a realistic estimate "
         "and use progress() to keep the cell alive."
     ),
-        input_schema = {
+    input_schema={
         "type": "object",
         "properties": {
-            "action": {"type": "string", "enum": ["exec", "view", "reset", "remove", "dump", "install"]},
+            "action": {
+                "type": "string",
+                "enum": ["exec", "view", "reset", "remove", "dump", "install"],
+            },
             "name": {"type": "string", "description": "Scratchpad name"},
             "code": {
                 "type": "string",
@@ -78,13 +87,13 @@ class ToolDef:
         },
         "required": ["action", "name"],
     },
-    handler = handle_scratchpad,
+    handler=handle_scratchpad,
 )
 
 
 MEMORIZE_TOOL = ToolDef(
-    name = "memorize",
-    description = (
+    name="memorize",
+    description=(
         "Encode a rule or lesson into long-term memory for future sessions. "
         "Use this when you learn something important, discover a useful pattern, "
         "or the user asks you to remember something.\n\n"
@@ -95,7 +104,7 @@ class ToolDef:
         "- lesson: Factual knowledge ('CoinGecko rate-limits at 50/min')\n"
         "- profile: Fact about the user ('Name: Jorge', 'Prefers dark mode')"
     ),
-    input_schema = {
+    input_schema={
         "type": "object",
         "properties": {
             "entries": {
@@ -126,13 +135,13 @@ class ToolDef:
         },
         "required": ["entries"],
     },
-    handler = handle_memorize,
+    handler=handle_memorize,
 )
 
 
 RECALL_TOOL = ToolDef(
-    name = "recall",
-    description = (
+    name="recall",
+    description=(
         "Search your episodic memory — an archive of past conversations. "
         "ONLY use this when the user explicitly asks about a previous conversation "
         "or session (e.g. 'what did we talk about last time?', 'remember when we...', "
@@ -141,7 +150,7 @@ class ToolDef:
         "Returns timestamped episodes matching the query (newest first). "
         "A single call is enough — do not call multiple times with different queries."
     ),
-    input_schema = {
+    input_schema={
         "type": "object",
         "properties": {
             "query": {
@@ -159,5 +168,5 @@ class ToolDef:
         },
         "required": ["query"],
     },
-    handler = handle_recall,
-)
\ No newline at end of file
+    handler=handle_recall,
+)
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index d94d724a..80334fb4 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -58,14 +58,16 @@ async def handle_memorize(session: ChatSession, tc_input: dict) -> str:
             scope = "project"
 
         # User-sourced memories (via explicit tool call) get high confidence
-        engrams.append(Engram(
-            text=entry["text"],
-            kind=kind,
-            scope=scope,
-            confidence="high",
-            topic=entry.get("topic", ""),
-            source="user",
-        ))
+        engrams.append(
+            Engram(
+                text=entry["text"],
+                kind=kind,
+                scope=scope,
+                confidence="high",
+                topic=entry.get("topic", ""),
+                source="user",
+            )
+        )
 
     if not engrams:
         return "No valid entries provided."
diff --git a/anton/core/utils/scratchpad.py b/anton/core/utils/scratchpad.py
index 18d7efd9..da518ff4 100644
--- a/anton/core/utils/scratchpad.py
+++ b/anton/core/utils/scratchpad.py
@@ -63,4 +63,4 @@ def format_cell_result(cell) -> str:
         parts.append(f"[error]\n{cell.error}")
     if not parts:
         return "Code executed successfully (no output)."
-    return "\n".join(parts)
\ No newline at end of file
+    return "\n".join(parts)
diff --git a/tests/test_chat.py b/tests/test_chat.py
index d0f465fd..82a11276 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -5,6 +5,7 @@
 import pytest
 
 from anton.chat import ChatSession
+from anton.core.session import ChatSessionConfig
 from anton.core.llm.provider import (
     ContextOverflowError,
     LLMResponse,
@@ -31,7 +32,7 @@ async def test_conversational_turn(self):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hey! How can I help?"))
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         reply = await session.turn("hi")
 
         assert reply == "Hey! How can I help?"
@@ -48,7 +49,7 @@ async def test_history_grows_across_turns(self):
             ]
         )
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         await session.turn("hello")
         await session.turn("can you check something")
         await session.turn("the anton repo")
@@ -86,7 +87,7 @@ async def _stream(**kwargs):
 
         mock_llm.plan_stream = _stream
 
-        session = ChatSession(mock_llm)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         events = []
         async for event in session.turn_stream("hi"):
             events.append(event)
@@ -123,7 +124,7 @@ async def _plan_stream(**kwargs):
                     )
                 )
 
-        session = ChatSession(AsyncMock())
+        session = ChatSession(ChatSessionConfig(llm_client=AsyncMock()))
         session._llm.plan_stream = _plan_stream
         session._llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — done"))
         session._summarize_history = AsyncMock()
@@ -144,7 +145,7 @@ async def _plan_stream(**kwargs):
                 )
             )
 
-        session = ChatSession(AsyncMock())
+        session = ChatSession(ChatSessionConfig(llm_client=AsyncMock()))
         session._llm.plan_stream = _plan_stream
         session._llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — done"))
         session._summarize_history = AsyncMock()
@@ -165,7 +166,7 @@ async def _plan_stream(**kwargs):
                 )
             )
 
-        session = ChatSession(AsyncMock())
+        session = ChatSession(ChatSessionConfig(llm_client=AsyncMock()))
         session._llm.plan_stream = _plan_stream
         session._llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — done"))
         session._summarize_history = AsyncMock()
diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py
index 52719aaf..84909d25 100644
--- a/tests/test_chat_context.py
+++ b/tests/test_chat_context.py
@@ -9,6 +9,7 @@
 import pytest
 
 from anton.chat import ChatSession, _handle_connect
+from anton.core.session import ChatSessionConfig
 from anton.minds_client import describe_minds_connection_error
 from anton.config.settings import AntonSettings
 from anton.core.tools.tool_defs import MEMORIZE_TOOL
@@ -98,7 +99,7 @@ async def test_memorize_creates_rule(self, cortex, memory_dirs):
             ]
         )
 
-        session = ChatSession(mock_llm, cortex=cortex)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
         reply = await session.turn("always use httpx instead of requests")
         await asyncio.sleep(0)  # Let fire-and-forget encode task run
 
@@ -124,7 +125,7 @@ async def test_memorize_creates_lesson(self, cortex, memory_dirs):
             ]
         )
 
-        session = ChatSession(mock_llm, cortex=cortex)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
         await session.turn("coingecko rate limits at 50 per minute")
         await asyncio.sleep(0)  # Let fire-and-forget encode task run
 
@@ -145,7 +146,7 @@ async def test_memory_injected_into_system_prompt(self, cortex, memory_dirs):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(mock_llm, cortex=cortex)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args
@@ -158,7 +159,7 @@ async def test_no_cortex_excludes_memorize_tool(self):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
-        session = ChatSession(mock_llm, self_awareness=None, cortex=None)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, self_awareness=None, cortex=None))
         await session.turn("hello")
 
         call_kwargs = mock_llm.plan.call_args
@@ -172,7 +173,7 @@ async def test_cortex_includes_memorize_tool(self, cortex):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
-        session = ChatSession(mock_llm, cortex=cortex)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
         await session.turn("hello")
 
         call_kwargs = mock_llm.plan.call_args
@@ -194,7 +195,7 @@ async def test_tool_result_in_history(self, cortex, memory_dirs):
             ]
         )
 
-        session = ChatSession(mock_llm, cortex=cortex)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
         await session.turn("note this")
 
         tool_result_msgs = [
@@ -214,11 +215,11 @@ async def test_anton_md_injected_into_system_prompt(self, ws, cortex):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(
-            mock_llm,
+        session = ChatSession(ChatSessionConfig(
+            llm_client=mock_llm,
             cortex=cortex,
             workspace=ws,
-        )
+        ))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args
@@ -233,11 +234,11 @@ async def test_empty_anton_md_no_section(self, ws, cortex):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(
-            mock_llm,
+        session = ChatSession(ChatSessionConfig(
+            llm_client=mock_llm,
             cortex=cortex,
             workspace=ws,
-        )
+        ))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args
@@ -251,10 +252,10 @@ async def test_runtime_context_injected_into_system_prompt(self):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(
-            mock_llm,
+        session = ChatSession(ChatSessionConfig(
+            llm_client=mock_llm,
             runtime_context="- Provider: anthropic\n- Planning model: claude-sonnet-4-6\n- Coding model: claude-opus-4-6",
-        )
+        ))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args
@@ -268,10 +269,10 @@ async def test_system_prompt_warns_not_to_ask_about_llm(self):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(
-            mock_llm,
+        session = ChatSession(ChatSessionConfig(
+            llm_client=mock_llm,
             runtime_context="- Provider: anthropic",
-        )
+        ))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args
@@ -283,7 +284,7 @@ async def test_conversation_discipline_in_prompt(self):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(mock_llm, runtime_context="")
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, runtime_context=""))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args
diff --git a/tests/test_chat_scratchpad.py b/tests/test_chat_scratchpad.py
index c00773b2..aec784f7 100644
--- a/tests/test_chat_scratchpad.py
+++ b/tests/test_chat_scratchpad.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from anton.core.session import ChatSession
+from anton.core.session import ChatSession, ChatSessionConfig
 from anton.core.tools.tool_defs import SCRATCHPAD_TOOL
 from anton.commands.session import handle_resume
 from anton.core.llm.provider import LLMResponse, StreamComplete, StreamToolResult, ToolCall, Usage
@@ -72,7 +72,7 @@ async def test_scratchpad_tool_in_tools(self, workspace):
         mock_llm = AsyncMock()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             await session.turn("hello")
 
@@ -95,7 +95,7 @@ async def test_scratchpad_exec_via_chat(self, workspace):
             ]
         )
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             reply = await session.turn("what is 7 * 6?")
 
@@ -122,7 +122,7 @@ async def test_scratchpad_view_via_chat(self, workspace):
             ]
         )
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             await session.turn("run and show")
 
@@ -151,7 +151,7 @@ async def test_scratchpad_remove_via_chat(self, workspace):
             ]
         )
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             await session.turn("create and remove")
 
@@ -180,7 +180,7 @@ async def test_scratchpad_dump_via_chat(self, workspace):
             ]
         )
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             await session.turn("show me my work")
 
@@ -242,7 +242,7 @@ def fake_plan_stream(**kwargs):
 
         mock_llm.plan_stream = fake_plan_stream
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             events = []
             async for event in session.turn_stream("show work"):
@@ -285,7 +285,7 @@ def fake_plan_stream(**kwargs):
 
         mock_llm.plan_stream = fake_plan_stream
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             events = []
             async for event in session.turn_stream("compute 99"):
@@ -317,7 +317,7 @@ async def test_install_action_dispatch(self, workspace):
             ]
         )
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             reply = await session.turn("install cowsay")
 
@@ -341,7 +341,7 @@ async def test_install_empty_packages_via_chat(self, workspace):
             ]
         )
 
-        session = ChatSession(mock_llm, workspace=workspace)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
         try:
             await session.turn("install nothing")
 
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index 74ab2c1e..3041c4ef 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -12,6 +12,7 @@
 from anton.chat import (
     ChatSession,
 )
+from anton.core.session import ChatSessionConfig
 from anton.commands.datasource import (
     _PROMPT_RECONNECT_CANCEL,
     handle_add_custom_datasource,
@@ -149,7 +150,7 @@ def _factory():
         plan_response = MagicMock()
         plan_response.content = "UNKNOWN"
         mock_llm.plan = AsyncMock(return_value=plan_response)
-        session = ChatSession(mock_llm)
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         session._scratchpads = AsyncMock()
         return session
 

From c8be9f3492594d28bf04d04059d64125a534d255 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Thu, 9 Apr 2026 18:43:39 +0200
Subject: [PATCH 077/134] Add chat sesion config

---
 anton/core/session.py | 126 ++++++++++++++++++++++++++----------------
 1 file changed, 78 insertions(+), 48 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index a920346d..bdb39f8a 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -2,6 +2,7 @@
 
 import asyncio
 from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 
 from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
@@ -14,11 +15,16 @@
     StreamTaskProgress,
     StreamTextDelta,
     StreamToolResult,
-    TokenLimitExceeded
+    TokenLimitExceeded,
 )
 from anton.core.backends.manager import ScratchpadManager
 from anton.core.tools.registry import ToolRegistry
-from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef
+from anton.core.tools.tool_defs import (
+    SCRATCHPAD_TOOL,
+    MEMORIZE_TOOL,
+    RECALL_TOOL,
+    ToolDef,
+)
 from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
 
 from anton.utils.datasources import (
@@ -39,67 +45,75 @@
     from anton.workspace import Workspace
 
 
+@dataclass
+class ChatSessionConfig:
+    """All construction parameters for a ChatSession.
+
+    Separates configuration assembly (the host app's job) from session
+    orchestration (the core's job). Hosts build this object and pass it
+    to ChatSession — the session never needs to know where values came from.
+    """
+
+    llm_client: LLMClient
+    settings: CoreSettings | None = None
+    self_awareness: SelfAwarenessContext | None = None
+    cortex: Cortex | None = None
+    episodic: EpisodicMemory | None = None
+    runtime_context: str = ""
+    workspace: Workspace | None = None
+    console: Console | None = None
+    coding_provider: str = "anthropic"
+    coding_api_key: str = ""
+    coding_base_url: str = ""
+    initial_history: list[dict] | None = None
+    history_store: HistoryStore | None = None
+    session_id: str | None = None
+    proactive_dashboards: bool = False
+    output_dir: str = ""
+    tools: list[ToolDef] = field(default_factory=list)
 
 
 class ChatSession:
     """Manages a multi-turn conversation with tool-call delegation."""
 
-    def __init__(
-        self,
-        llm_client: LLMClient,
-        *,
-        settings: CoreSettings | None = None,
-        self_awareness: SelfAwarenessContext | None = None,
-        cortex: Cortex | None = None,
-        episodic: EpisodicMemory | None = None,
-        runtime_context: str = "",
-        workspace: Workspace | None = None,
-        console: Console | None = None,
-        coding_provider: str = "anthropic",
-        coding_api_key: str = "",
-        coding_base_url: str = "",
-        initial_history: list[dict] | None = None,
-        history_store: HistoryStore | None = None,
-        session_id: str | None = None,
-        proactive_dashboards: bool = False,
-        output_dir: str = "",
-        tools: list[ToolDef] | None = None,
-    ) -> None:
-        s = settings or CoreSettings()
+    def __init__(self, config: ChatSessionConfig) -> None:
+        s = config.settings or CoreSettings()
         self._max_tool_rounds = s.max_tool_rounds
         self._max_continuations = s.max_continuations
         self._context_pressure_threshold = s.context_pressure_threshold
         self._max_consecutive_errors = s.max_consecutive_errors
         self._resilience_nudge_at = s.resilience_nudge_at
         self._token_status_cache_ttl = s.token_status_cache_ttl
-        self._llm = llm_client
-        self._self_awareness = self_awareness
-        self._cortex = cortex
-        self._episodic = episodic
-        self._runtime_context = runtime_context
-        self._proactive_dashboards = proactive_dashboards
-        self._extra_tools = tools or []
-        self._output_dir = output_dir
-        self._workspace = workspace
-        self._console = console
-        self._history: list[dict] = list(initial_history) if initial_history else []
+        self._llm = config.llm_client
+        self._self_awareness = config.self_awareness
+        self._cortex = config.cortex
+        self._episodic = config.episodic
+        self._runtime_context = config.runtime_context
+        self._proactive_dashboards = config.proactive_dashboards
+        self._extra_tools = config.tools
+        self._output_dir = config.output_dir
+        self._workspace = config.workspace
+        self._console = config.console
+        self._history: list[dict] = (
+            list(config.initial_history) if config.initial_history else []
+        )
         self._pending_memory_confirmations: list = []
         self._turn_count = (
             sum(1 for m in self._history if m.get("role") == "user")
-            if initial_history
+            if config.initial_history
             else 0
         )
-        self._history_store = history_store
-        self._session_id = session_id
+        self._history_store = config.history_store
+        self._session_id = config.session_id
         self._cancel_event = asyncio.Event()
         self._escape_watcher: EscapeWatcher | None = None
         self._active_datasource: str | None = None
         self._scratchpads = ScratchpadManager(
-            coding_provider=coding_provider,
-            coding_model=getattr(llm_client, "coding_model", ""),
-            coding_api_key=coding_api_key,
-            coding_base_url=coding_base_url,
-            workspace_path=workspace.base if workspace else None,
+            coding_provider=config.coding_provider,
+            coding_model=getattr(config.llm_client, "coding_model", ""),
+            coding_api_key=config.coding_api_key,
+            coding_base_url=config.coding_base_url,
+            workspace_path=config.workspace.base if config.workspace else None,
         )
         self.tool_registry = ToolRegistry()
 
@@ -117,7 +131,13 @@ def _apply_error_tracking(
         """Track consecutive errors per tool and append nudge/circuit-breaker messages."""
         is_error = any(
             marker in result_text
-            for marker in ("[error]", "Task failed:", "failed", "timed out", "Rejected:")
+            for marker in (
+                "[error]",
+                "Task failed:",
+                "failed",
+                "timed out",
+                "Rejected:",
+            )
         )
         if is_error:
             error_streak[tool_name] = error_streak.get(tool_name, 0) + 1
@@ -184,6 +204,7 @@ def _persist_history(self) -> None:
 
     async def _build_system_prompt(self, user_message: str = "") -> str:
         import datetime as _dt
+
         _now = _dt.datetime.now()
         _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p")
 
@@ -297,7 +318,9 @@ def _build_core_tools(self) -> None:
         if self._cortex is not None:
             wisdom = self._cortex.get_scratchpad_context()
             if wisdom:
-                scratchpad_tool.description += f"\n\nLessons from past sessions:\n{wisdom}"
+                scratchpad_tool.description += (
+                    f"\n\nLessons from past sessions:\n{wisdom}"
+                )
 
         self.tool_registry.register_tool(scratchpad_tool)
 
@@ -694,7 +717,10 @@ async def _stream_and_handle_tools(
 
         # Detect max_tokens truncation — the LLM was cut off mid-response.
         # Inject a continuation prompt so it can finish what it was doing.
-        if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls:
+        if (
+            llm_response.stop_reason in ("max_tokens", "length")
+            and not llm_response.tool_calls
+        ):
             self._history.append(
                 {"role": "assistant", "content": llm_response.content or ""}
             )
@@ -877,7 +903,8 @@ async def _stream_and_handle_tools(
                                         description=description,
                                     )
                         elif tc.name == "connect_new_datasource" or (
-                            tc.name == "publish_or_preview" and tc.input.get("action") == "publish"
+                            tc.name == "publish_or_preview"
+                            and tc.input.get("action") == "publish"
                         ):
                             # Interactive tool — pause spinner AND escape watcher
                             yield StreamTaskProgress(
@@ -971,7 +998,10 @@ async def _stream_and_handle_tools(
                 llm_response = response.response
 
                 # Detect max_tokens truncation inside tool loop
-                if llm_response.stop_reason in ("max_tokens", "length") and not llm_response.tool_calls:
+                if (
+                    llm_response.stop_reason in ("max_tokens", "length")
+                    and not llm_response.tool_calls
+                ):
                     self._history.append(
                         {"role": "assistant", "content": llm_response.content or ""}
                     )

From bd3a32b90c6381ce68fbb0b747380cc8e069ff36 Mon Sep 17 00:00:00 2001
From: pnewsam <paul@mindsdb.com>
Date: Wed, 8 Apr 2026 15:49:31 -0700
Subject: [PATCH 078/134] Explainability v1

---
 anton/chat.py                          |   5 +-
 anton/commands/ui.py                   |  65 +++++-
 anton/core/backends/scratchpad_boot.py |  28 ++-
 anton/core/session.py                  | 175 ++++++++++------
 anton/core/tools/tool_handlers.py      |   4 +
 anton/explainability.py                | 271 +++++++++++++++++++++++++
 tests/test_explainability.py           | 137 +++++++++++++
 7 files changed, 623 insertions(+), 62 deletions(-)
 create mode 100644 anton/explainability.py
 create mode 100644 tests/test_explainability.py

diff --git a/anton/chat.py b/anton/chat.py
index 7f28a398..0164cd07 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -35,7 +35,7 @@
     handle_setup,
     handle_setup_models,
 )
-from anton.commands.ui import handle_theme, print_slash_help
+from anton.commands.ui import handle_explain, handle_theme, print_slash_help
 from anton.utils.clipboard import (
     ensure_clipboard,
     format_clipboard_image_message,
@@ -1274,6 +1274,9 @@ def _bottom_toolbar():
                 elif cmd == "/unpublish":
                     await _handle_unpublish(console, settings, workspace)
                     continue
+                elif cmd == "/explain":
+                    handle_explain(console, settings.workspace_path)
+                    continue
                 elif cmd == "/help":
                     print_slash_help(console)
                     continue
diff --git a/anton/commands/ui.py b/anton/commands/ui.py
index f5bcabf6..c07a62b7 100644
--- a/anton/commands/ui.py
+++ b/anton/commands/ui.py
@@ -1,9 +1,11 @@
-"""Slash-command handlers for /theme and /help."""
+"""Slash-command handlers for /theme, /explain, and /help."""
 
 from __future__ import annotations
 
 from rich.console import Console
 
+from anton.explainability import ExplainabilityStore
+
 
 def handle_theme(console: Console, arg: str) -> None:
     """Switch the color theme (light/dark)."""
@@ -17,7 +19,9 @@ def handle_theme(console: Console, arg: str) -> None:
     elif arg in ("light", "dark"):
         new_mode = arg
     else:
-        console.print(f"[anton.warning]Unknown theme '{arg}'. Use: /theme light | /theme dark[/]")
+        console.print(
+            f"[anton.warning]Unknown theme '{arg}'. Use: /theme light | /theme dark[/]"
+        )
         console.print()
         return
 
@@ -37,7 +41,9 @@ def print_slash_help(console: Console) -> None:
     console.print("  [bold]/llm[/]      — Change LLM provider or API key")
 
     console.print("\n[bold]Data Connections[/]")
-    console.print("  [bold]/connect[/]   — Connect a database or API to your Local Vault")
+    console.print(
+        "  [bold]/connect[/]   — Connect a database or API to your Local Vault"
+    )
     console.print("  [bold]/list[/]      — List all saved connections")
     console.print("  [bold]/edit[/]      — Edit credentials for an existing connection")
     console.print("  [bold]/remove[/]    — Remove a saved connection")
@@ -53,9 +59,62 @@ def print_slash_help(console: Console) -> None:
     console.print("  [bold]/resume[/]    — Continue a previous session")
     console.print("  [bold]/publish[/]   — Publish an HTML report to the web")
     console.print("  [bold]/unpublish[/] — Remove a published report")
+    console.print(
+        "  [bold]/explain[/]   — Show explainability details for the latest answer"
+    )
 
     console.print("\n[bold]General[/]")
     console.print("  [bold]/help[/]      — Show this help menu")
     console.print("  [bold]exit[/]       — Exit the chat")
 
     console.print()
+
+
+def handle_explain(console: Console, workspace_path) -> None:
+    """Print explainability details for the latest answer in the workspace."""
+    store = ExplainabilityStore(workspace_path)
+    record = store.load_latest()
+    if record is None:
+        console.print(
+            "[anton.warning]No explainability record found yet for this workspace.[/]"
+        )
+        console.print()
+        return
+
+    console.print()
+    console.print("[anton.cyan]Explain This Answer[/]")
+    console.print(f"[anton.muted]Turn {record.turn} • {record.created_at}[/]")
+    console.print()
+
+    console.print("[bold]Summary[/]")
+    console.print(record.summary or "No summary available.")
+    console.print()
+
+    console.print("[bold]Data Sources Used[/]")
+    if record.data_sources:
+        for source in record.data_sources:
+            engine = source.get("engine")
+            if engine:
+                console.print(f"  - {source.get('name', 'Unknown')} ({engine})")
+            else:
+                console.print(f"  - {source.get('name', 'Unknown')}")
+    else:
+        console.print("  - None captured")
+    console.print()
+
+    console.print("[bold]Generated SQL[/]")
+    if record.sql_queries:
+        for i, query in enumerate(record.sql_queries, 1):
+            header = f"  Query {i}: {query.get('datasource', 'Unknown datasource')}"
+            if query.get("engine"):
+                header += f" ({query['engine']})"
+            console.print(header)
+            console.print("```sql")
+            console.print(query.get("sql", ""))
+            console.print("```")
+            if query.get("status") == "error" and query.get("error_message"):
+                console.print(f"[anton.warning]{query['error_message']}[/]")
+            console.print()
+    else:
+        console.print("  - No SQL generated")
+        console.print()
diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
index 814a5501..adf8647d 100644
--- a/anton/core/backends/scratchpad_boot.py
+++ b/anton/core/backends/scratchpad_boot.py
@@ -12,6 +12,7 @@
 
 # Persistent namespace across cells
 namespace = {"__builtins__": __builtins__}
+namespace["_anton_explainability_queries"] = []
 
 # --- Inject get_llm() for LLM access from scratchpad code ---
 _scratchpad_model = os.environ.get("ANTON_SCRATCHPAD_MODEL", "")
@@ -243,6 +244,7 @@ def agentic_loop(*, system, user_message, tools, handle_tool, max_turns=10, max_
 _minds_datasource = os.environ.get("ANTON_MINDS_DATASOURCE", "")
 _minds_api_key = os.environ.get("ANTON_MINDS_API_KEY", "")
 _minds_url = os.environ.get("ANTON_MINDS_URL", "")
+_minds_engine = os.environ.get("ANTON_MINDS_DATASOURCE_ENGINE", "")
 if _minds_datasource and _minds_api_key and _minds_url:
     try:
         import ssl as _minds_ssl
@@ -273,13 +275,28 @@ def query_minds_data(query, datasource=None):
 
             try:
                 with _minds_urllib.urlopen(req, context=ctx, timeout=60) as resp:
-                    return json.loads(resp.read().decode())
+                    parsed = json.loads(resp.read().decode())
+                    namespace.setdefault("_anton_explainability_queries", []).append({
+                        "datasource": ds,
+                        "sql": query,
+                        "engine": _minds_engine or None,
+                        "status": "ok",
+                        "error_message": None,
+                    })
+                    return parsed
             except _minds_urllib.HTTPError as e:
                 body = ""
                 try:
                     body = e.read().decode()
                 except Exception:
                     pass
+                namespace.setdefault("_anton_explainability_queries", []).append({
+                    "datasource": ds,
+                    "sql": query,
+                    "engine": _minds_engine or None,
+                    "status": "error",
+                    "error_message": f"HTTP {e.code}: {body or e.reason}",
+                })
                 return {
                     "type": "error",
                     "data": None,
@@ -287,6 +304,13 @@ def query_minds_data(query, datasource=None):
                     "error_message": f"HTTP {e.code}: {body or e.reason}",
                 }
             except Exception as e:
+                namespace.setdefault("_anton_explainability_queries", []).append({
+                    "datasource": ds,
+                    "sql": query,
+                    "engine": _minds_engine or None,
+                    "status": "error",
+                    "error_message": str(e),
+                })
                 return {
                     "type": "error",
                     "data": None,
@@ -561,6 +585,7 @@ def emit(self, record):
     err_buf = io.StringIO()
     log_buf = io.StringIO()
     error = None
+    namespace["_anton_explainability_queries"] = []
     _cell_log_handler.buf = log_buf
 
     sys.stdout = out_buf
@@ -625,6 +650,7 @@ def emit(self, record):
         "stderr": err_buf.getvalue(),
         "logs": log_buf.getvalue(),
         "error": error,
+        "explainability_queries": list(namespace.get("_anton_explainability_queries", [])),
     }
     if _auto_installed:
         result["auto_installed"] = _auto_installed
diff --git a/anton/core/session.py b/anton/core/session.py
index a920346d..cdb8ca59 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -21,6 +21,8 @@
 from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef
 from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
 
+from anton.explainability import ExplainabilityCollector, ExplainabilityStore
+
 from anton.utils.datasources import (
     build_datasource_context,
     scrub_credentials,
@@ -102,6 +104,10 @@ def __init__(
             workspace_path=workspace.base if workspace else None,
         )
         self.tool_registry = ToolRegistry()
+        self._explainability_store = (
+            ExplainabilityStore(workspace.base) if workspace is not None else None
+        )
+        self._active_explainability: ExplainabilityCollector | None = None
 
     @property
     def history(self) -> list[dict]:
@@ -182,6 +188,44 @@ def _persist_history(self) -> None:
         if self._history_store and self._session_id:
             self._history_store.save(self._session_id, self._history)
 
+    def _record_cell_explainability(
+        self, *, pad_name: str, description: str, cell
+    ) -> None:
+        if self._active_explainability is None:
+            return
+        if description:
+            self._active_explainability.add_scratchpad_step(description)
+        elif pad_name:
+            self._active_explainability.add_scratchpad_step(
+                f"work in scratchpad {pad_name}"
+            )
+        for query in getattr(cell, "explainability_queries", []) or []:
+            if not isinstance(query, dict):
+                continue
+            self._active_explainability.add_query(
+                datasource=str(query.get("datasource", "")),
+                sql=str(query.get("sql", "")),
+                engine=(
+                    str(query.get("engine"))
+                    if query.get("engine") is not None
+                    else None
+                ),
+                status=str(query.get("status", "ok")),
+                error_message=(
+                    str(query.get("error_message"))
+                    if query.get("error_message") is not None
+                    else None
+                ),
+            )
+        self._active_explainability.add_sources_from_text(
+            getattr(cell, "code", ""),
+            getattr(cell, "stdout", ""),
+            getattr(cell, "logs", ""),
+        )
+        self._active_explainability.add_inferred_queries_from_code(
+            getattr(cell, "code", "")
+        )
+
     async def _build_system_prompt(self, user_message: str = "") -> str:
         import datetime as _dt
         _now = _dt.datetime.now()
@@ -572,64 +616,75 @@ async def turn_stream(
         assistant_text_parts: list[str] = []
         _max_auto_retries = 2
         _retry_count = 0
+        self._active_explainability = ExplainabilityCollector(
+            self._explainability_store,
+            turn=self._turn_count + 1,
+            user_message=user_msg_str,
+        )
 
-        while True:
-            try:
-                async for event in self._stream_and_handle_tools(user_msg_str):
-                    if isinstance(event, StreamTextDelta):
-                        assistant_text_parts.append(event.text)
-                    yield event
-                break  # completed successfully
-            except Exception as _agent_exc:
-                # Token/billing limit — don't retry, let the chat loop handle it
-                if isinstance(_agent_exc, TokenLimitExceeded):
-                    raise
-                _retry_count += 1
-                if _retry_count <= _max_auto_retries:
-                    # Inject the error into history and let the LLM try to recover
-                    self._history.append(
-                        {
-                            "role": "user",
-                            "content": (
-                                f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n"
-                                "If you can diagnose and fix the issue, continue working on the task. "
-                                "Adjust your approach to avoid the same error. "
-                                "If this is unrecoverable, summarize what you accomplished and suggest next steps."
-                            ),
-                        }
-                    )
-                    # Continue the while loop — _stream_and_handle_tools will be called
-                    # again with the error context now in history
-                    continue
-                else:
-                    # Exhausted retries — stop and summarize for the user
-                    self._history.append(
-                        {
-                            "role": "user",
-                            "content": (
-                                f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n"
-                                "Stop retrying. Please:\n"
-                                "1. Summarize what you accomplished so far.\n"
-                                "2. Explain what went wrong in plain language.\n"
-                                "3. Suggest next steps — what the user can try (e.g. rephrase, "
-                                "simplify the request, or ask you to continue from where you left off).\n"
-                                "Be concise and helpful."
-                            ),
-                        }
-                    )
-                    try:
-                        async for event in self._llm.plan_stream(
-                            system=await self._build_system_prompt(user_msg_str),
-                            messages=self._history,
-                        ):
-                            if isinstance(event, StreamTextDelta):
-                                assistant_text_parts.append(event.text)
-                            yield event
-                    except Exception:
-                        fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request."
-                        assistant_text_parts.append(fallback)
-                        yield StreamTextDelta(text=fallback)
-                    break
+        try:
+            while True:
+                try:
+                    async for event in self._stream_and_handle_tools(user_msg_str):
+                        if isinstance(event, StreamTextDelta):
+                            assistant_text_parts.append(event.text)
+                        yield event
+                    break  # completed successfully
+                except Exception as _agent_exc:
+                    # Token/billing limit — don't retry, let the chat loop handle it
+                    if isinstance(_agent_exc, TokenLimitExceeded):
+                        raise
+                    _retry_count += 1
+                    if _retry_count <= _max_auto_retries:
+                        # Inject the error into history and let the LLM try to recover
+                        self._history.append(
+                            {
+                                "role": "user",
+                                "content": (
+                                    f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n"
+                                    "If you can diagnose and fix the issue, continue working on the task. "
+                                    "Adjust your approach to avoid the same error. "
+                                    "If this is unrecoverable, summarize what you accomplished and suggest next steps."
+                                ),
+                            }
+                        )
+                        # Continue the while loop — _stream_and_handle_tools will be called
+                        # again with the error context now in history
+                        continue
+                    else:
+                        # Exhausted retries — stop and summarize for the user
+                        self._history.append(
+                            {
+                                "role": "user",
+                                "content": (
+                                    f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n"
+                                    "Stop retrying. Please:\n"
+                                    "1. Summarize what you accomplished so far.\n"
+                                    "2. Explain what went wrong in plain language.\n"
+                                    "3. Suggest next steps — what the user can try (e.g. rephrase, "
+                                    "simplify the request, or ask you to continue from where you left off).\n"
+                                    "Be concise and helpful."
+                                ),
+                            }
+                        )
+                        try:
+                            async for event in self._llm.plan_stream(
+                                system=await self._build_system_prompt(user_msg_str),
+                                messages=self._history,
+                            ):
+                                if isinstance(event, StreamTextDelta):
+                                    assistant_text_parts.append(event.text)
+                                yield event
+                        except Exception:
+                            fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request."
+                            assistant_text_parts.append(fallback)
+                            yield StreamTextDelta(text=fallback)
+                        break
+        finally:
+            if self._active_explainability is not None:
+                self._active_explainability.finalize(
+                    "".join(assistant_text_parts)[:2000]
+                )
 
         # Log assistant response to episodic memory
         if self._episodic is not None and assistant_text_parts:
@@ -869,6 +924,12 @@ async def _stream_and_handle_tools(
                                     if cell
                                     else "No result produced."
                                 )
+                                if cell is not None:
+                                    self._record_cell_explainability(
+                                        pad_name=tc.input.get("name", ""),
+                                        description=description,
+                                        cell=cell,
+                                    )
                                 if self._episodic is not None and cell is not None:
                                     self._episodic.log_turn(
                                         self._turn_count + 1,
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index d94d724a..c9ba531f 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -106,6 +106,10 @@ async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
             estimated_time=estimated_time,
             estimated_seconds=estimated_seconds,
         )
+        if cell is not None:
+            session._record_cell_explainability(
+                pad_name=name, description=description, cell=cell,
+            )
         return format_cell_result(cell)
 
     elif action == "view":
diff --git a/anton/explainability.py b/anton/explainability.py
new file mode 100644
index 00000000..627e9b88
--- /dev/null
+++ b/anton/explainability.py
@@ -0,0 +1,271 @@
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from urllib.parse import urlparse
+
+
+def _utc_now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+@dataclass
+class ExplainabilityQuery:
+    datasource: str
+    sql: str
+    engine: str | None = None
+    status: str = "ok"
+    error_message: str | None = None
+
+    def to_dict(self) -> dict:
+        return {
+            "datasource": self.datasource,
+            "sql": self.sql,
+            "engine": self.engine,
+            "status": self.status,
+            "error_message": self.error_message,
+        }
+
+
+@dataclass
+class ExplainabilityRecord:
+    turn: int
+    created_at: str
+    user_message: str
+    answer_text: str
+    summary: str
+    data_sources: list[dict] = field(default_factory=list)
+    sql_queries: list[dict] = field(default_factory=list)
+    scratchpad_steps: list[str] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "turn": self.turn,
+            "created_at": self.created_at,
+            "user_message": self.user_message,
+            "answer_text": self.answer_text,
+            "summary": self.summary,
+            "data_sources": self.data_sources,
+            "sql_queries": self.sql_queries,
+            "scratchpad_steps": self.scratchpad_steps,
+        }
+
+
+class ExplainabilityStore:
+    def __init__(self, workspace_path: Path) -> None:
+        self._dir = workspace_path / ".anton" / "explainability"
+
+    def save(self, record: ExplainabilityRecord) -> None:
+        self._dir.mkdir(parents=True, exist_ok=True)
+        payload = json.dumps(record.to_dict(), ensure_ascii=False, indent=2) + "\n"
+        latest = self._dir / "latest.json"
+        latest.write_text(payload, encoding="utf-8")
+        turn_file = self._dir / f"turn-{record.turn:04d}.json"
+        turn_file.write_text(payload, encoding="utf-8")
+
+    def load_latest(self) -> ExplainabilityRecord | None:
+        latest = self._dir / "latest.json"
+        if not latest.is_file():
+            return None
+        try:
+            payload = json.loads(latest.read_text(encoding="utf-8"))
+        except Exception:
+            return None
+        try:
+            return ExplainabilityRecord(
+                turn=int(payload.get("turn", 0)),
+                created_at=str(payload.get("created_at", "")),
+                user_message=str(payload.get("user_message", "")),
+                answer_text=str(payload.get("answer_text", "")),
+                summary=str(payload.get("summary", "")),
+                data_sources=list(payload.get("data_sources", [])),
+                sql_queries=list(payload.get("sql_queries", [])),
+                scratchpad_steps=list(payload.get("scratchpad_steps", [])),
+            )
+        except Exception:
+            return None
+
+
+class ExplainabilityCollector:
+    def __init__(self, store: ExplainabilityStore, *, turn: int, user_message: str) -> None:
+        self._store = store
+        self._turn = turn
+        self._user_message = user_message
+        self._created_at = _utc_now()
+        self._scratchpad_steps: list[str] = []
+        self._queries: list[ExplainabilityQuery] = []
+        self._sources: list[dict[str, str | None]] = []
+
+    def add_scratchpad_step(self, description: str) -> None:
+        cleaned = (description or "").strip()
+        if cleaned and cleaned not in self._scratchpad_steps:
+            self._scratchpad_steps.append(cleaned)
+
+    def add_query(
+        self,
+        *,
+        datasource: str,
+        sql: str,
+        engine: str | None = None,
+        status: str = "ok",
+        error_message: str | None = None,
+    ) -> None:
+        cleaned_sql = (sql or "").strip()
+        cleaned_ds = (datasource or "").strip() or "Unknown datasource"
+        if not cleaned_sql:
+            return
+        entry = ExplainabilityQuery(
+            datasource=cleaned_ds,
+            sql=cleaned_sql,
+            engine=(engine or "").strip() or None,
+            status=status,
+            error_message=(error_message or "").strip() or None,
+        )
+        if any(
+            existing.datasource == entry.datasource
+            and existing.sql == entry.sql
+            and existing.status == entry.status
+            for existing in self._queries
+        ):
+            return
+        self._queries.append(entry)
+        self.add_source(name=cleaned_ds, engine=(engine or "").strip() or None)
+
+    def add_source(self, *, name: str, engine: str | None = None) -> None:
+        cleaned_name = (name or "").strip()
+        if not cleaned_name:
+            return
+        entry = {"name": cleaned_name, "engine": (engine or "").strip() or None}
+        if entry not in self._sources:
+            self._sources.append(entry)
+
+    def add_sources_from_text(self, *texts: str) -> None:
+        for text in texts:
+            if not text:
+                continue
+            for source in _extract_sources_from_text(text):
+                self.add_source(name=source, engine=None)
+
+    def add_inferred_queries_from_code(self, code: str) -> None:
+        if self._queries:
+            return
+        sql_statements = _extract_sql_from_code(code)
+        datasource_names = _extract_datasource_names_from_code(code)
+        datasource = datasource_names[0] if datasource_names else "connected datasource"
+        for sql in sql_statements:
+            self.add_query(
+                datasource=datasource,
+                sql=sql,
+                engine=None,
+                status="ok",
+                error_message=None,
+            )
+
+    def finalize(self, answer_text: str) -> ExplainabilityRecord:
+        data_sources: list[dict] = []
+        seen_sources: set[tuple[str, str | None]] = set()
+        for source in self._sources:
+            key = (str(source.get("name", "")), source.get("engine"))
+            if key in seen_sources:
+                continue
+            seen_sources.add(key)
+            data_sources.append({"name": key[0], "engine": key[1]})
+
+        summary = self._build_summary(answer_text, data_sources)
+        record = ExplainabilityRecord(
+            turn=self._turn,
+            created_at=self._created_at,
+            user_message=self._user_message,
+            answer_text=answer_text.strip(),
+            summary=summary,
+            data_sources=data_sources,
+            sql_queries=[query.to_dict() for query in self._queries],
+            scratchpad_steps=list(self._scratchpad_steps),
+        )
+        if self._store is not None:
+            self._store.save(record)
+        return record
+
+    def _build_summary(self, answer_text: str, data_sources: list[dict]) -> str:
+        if self._queries:
+            source_names = ", ".join(source["name"] for source in data_sources[:3])
+            query_count = len(self._queries)
+            step_text = ""
+            if self._scratchpad_steps:
+                lead = self._scratchpad_steps[0].rstrip(".")
+                step_text = f" I used the scratchpad to {lead.lower()}."
+            return (
+                f"I queried {source_names} with {query_count} SQL "
+                f"{'statement' if query_count == 1 else 'statements'} to gather the data behind this answer."
+                f"{step_text}"
+            )
+        if data_sources:
+            source_names = ", ".join(source["name"] for source in data_sources[:3])
+            if self._scratchpad_steps:
+                lead = self._scratchpad_steps[0].rstrip(".").lower()
+                return (
+                    f"I gathered information from {source_names} and used the scratchpad to "
+                    f"{lead} before drafting the answer."
+                )
+            return f"I gathered information from {source_names} before drafting the answer."
+        if self._scratchpad_steps:
+            primary_step = self._scratchpad_steps[0].rstrip(".").lower()
+            return f"I used the scratchpad to {primary_step} before drafting the answer."
+        if answer_text.strip():
+            return "I answered directly from the conversation context without querying a datasource or generating SQL."
+        return "No explainability details were captured for this answer."
+
+
+_URL_RE = re.compile(r"https?://[^\s)\"'>]+")
+_SQL_LITERAL_RE = re.compile(
+    r"(?P<quote>'''|\"\"\"|'|\")(?P<body>.*?)(?P=quote)",
+    re.DOTALL,
+)
+_DS_PREFIX_RE = re.compile(r"\b(DS_[A-Z0-9_]+)__[A-Z0-9_]+\b")
+
+
+def _extract_sources_from_text(text: str) -> list[str]:
+    sources: list[str] = []
+    for match in _URL_RE.findall(text):
+        parsed = urlparse(match)
+        host = (parsed.hostname or "").lower()
+        host = host.removeprefix("www.")
+        if not host:
+            continue
+        if host not in sources:
+            sources.append(host)
+    return sources
+
+
+def _looks_like_sql(text: str) -> bool:
+    normalized = " ".join(text.strip().split()).upper()
+    if len(normalized) < 12:
+        return False
+    starters = ("SELECT ", "WITH ", "INSERT ", "UPDATE ", "DELETE ", "SHOW ", "DESCRIBE ")
+    if not normalized.startswith(starters):
+        return False
+    return any(keyword in normalized for keyword in (" FROM ", " JOIN ", " INTO ", " TABLE ", "SELECT "))
+
+
+def _extract_sql_from_code(code: str) -> list[str]:
+    sql_statements: list[str] = []
+    for match in _SQL_LITERAL_RE.finditer(code or ""):
+        body = match.group("body").strip()
+        if not _looks_like_sql(body):
+            continue
+        cleaned = "\n".join(line.rstrip() for line in body.splitlines()).strip()
+        if cleaned and cleaned not in sql_statements:
+            sql_statements.append(cleaned)
+    return sql_statements
+
+
+def _extract_datasource_names_from_code(code: str) -> list[str]:
+    names: list[str] = []
+    for prefix in _DS_PREFIX_RE.findall(code or ""):
+        slug = prefix.removeprefix("DS_").lower().replace("_", "-")
+        if slug not in names:
+            names.append(slug)
+    return names
diff --git a/tests/test_explainability.py b/tests/test_explainability.py
new file mode 100644
index 00000000..1b835cc6
--- /dev/null
+++ b/tests/test_explainability.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+from anton.commands.ui import handle_explain
+from anton.explainability import ExplainabilityCollector, ExplainabilityStore
+
+
+def test_explainability_store_persists_latest_and_turn_file(tmp_path):
+    store = ExplainabilityStore(tmp_path)
+    collector = ExplainabilityCollector(store, turn=3, user_message="How did revenue change?")
+    collector.add_scratchpad_step("Query monthly revenue")
+    collector.add_query(
+        datasource="warehouse.orders",
+        sql="SELECT month, revenue FROM revenue_by_month",
+        engine="postgres",
+    )
+
+    record = collector.finalize("Revenue increased 12% month over month.")
+
+    latest = tmp_path / ".anton" / "explainability" / "latest.json"
+    turn_file = tmp_path / ".anton" / "explainability" / "turn-0003.json"
+
+    assert latest.is_file()
+    assert turn_file.is_file()
+
+    latest_payload = json.loads(latest.read_text())
+    assert latest_payload["turn"] == 3
+    assert latest_payload["sql_queries"][0]["datasource"] == "warehouse.orders"
+    assert "queried warehouse.orders" in latest_payload["summary"].lower()
+    assert record.summary == latest_payload["summary"]
+
+
+def test_explainability_sql_shape_includes_datasources_and_queries(tmp_path):
+    store = ExplainabilityStore(tmp_path)
+    collector = ExplainabilityCollector(store, turn=4, user_message="What was monthly revenue?")
+    collector.add_scratchpad_step("Query monthly revenue")
+    collector.add_query(
+        datasource="finance.monthly_revenue",
+        sql="SELECT month, revenue FROM monthly_revenue ORDER BY month DESC",
+        engine="snowflake",
+    )
+
+    record = collector.finalize("Revenue rose in March.")
+
+    assert record.data_sources == [{"name": "finance.monthly_revenue", "engine": "snowflake"}]
+    assert record.sql_queries == [
+        {
+            "datasource": "finance.monthly_revenue",
+            "sql": "SELECT month, revenue FROM monthly_revenue ORDER BY month DESC",
+            "engine": "snowflake",
+            "status": "ok",
+            "error_message": None,
+        }
+    ]
+    assert "sql statement" in record.summary.lower()
+
+
+def test_explainability_summary_without_queries_is_direct_answer(tmp_path):
+    store = ExplainabilityStore(tmp_path)
+    collector = ExplainabilityCollector(store, turn=1, user_message="What is Anton?")
+
+    record = collector.finalize("Anton is MindsDB's autonomous AI coworker.")
+
+    assert record.sql_queries == []
+    assert (
+        record.summary
+        == "I answered directly from the conversation context without querying a datasource or generating SQL."
+    )
+
+
+def test_explainability_extracts_non_sql_sources_from_text(tmp_path):
+    store = ExplainabilityStore(tmp_path)
+    collector = ExplainabilityCollector(store, turn=2, user_message="Compare green coffee prices")
+    collector.add_scratchpad_step("Fetch green coffee bean prices and compute roasting cost comparison")
+    collector.add_sources_from_text(
+        'See https://www.happymugcoffee.com/collections/green-coffee and https://burmancoffee.com/'
+    )
+
+    record = collector.finalize("Home roasting is much cheaper.")
+
+    source_names = [source["name"] for source in record.data_sources]
+    assert "happymugcoffee.com" in source_names
+    assert "burmancoffee.com" in source_names
+    assert "gathered information from" in record.summary.lower()
+
+
+def test_handle_explain_prints_sections_for_latest_record(tmp_path):
+    store = ExplainabilityStore(tmp_path)
+    collector = ExplainabilityCollector(store, turn=5, user_message="What was revenue?")
+    collector.add_scratchpad_step("Query monthly revenue")
+    collector.add_query(
+        datasource="finance.monthly_revenue",
+        sql="SELECT month, revenue FROM monthly_revenue",
+        engine="postgres",
+    )
+    collector.finalize("Revenue rose.")
+
+    console = MagicMock()
+    handle_explain(console, tmp_path)
+
+    rendered = "\n".join(
+        str(call.args[0]) for call in console.print.call_args_list if call.args
+    )
+    assert "Explain This Answer" in rendered
+    assert "Summary" in rendered
+    assert "Data Sources Used" in rendered
+    assert "Generated SQL" in rendered
+    assert "finance.monthly_revenue" in rendered
+    assert "SELECT month, revenue FROM monthly_revenue" in rendered
+
+
+def test_explainability_infers_sql_and_datasource_from_scratchpad_code(tmp_path):
+    store = ExplainabilityStore(tmp_path)
+    collector = ExplainabilityCollector(store, turn=6, user_message="Average revenue")
+    collector.add_scratchpad_step("Average annual revenue over last 10 years in the dataset")
+    collector.add_inferred_queries_from_code(
+        """
+import os
+sql = \"\"\"
+SELECT EXTRACT(YEAR FROM sale_date) AS year, AVG(revenue) AS avg_revenue
+FROM sales
+GROUP BY 1
+ORDER BY 1
+\"\"\"
+host = os.environ["DS_POSTGRES_PROD_DB__HOST"]
+cur.execute(sql)
+"""
+    )
+
+    record = collector.finalize("Average revenue is stable.")
+
+    assert record.data_sources == [{"name": "postgres-prod-db", "engine": None}]
+    assert len(record.sql_queries) == 1
+    assert "SELECT EXTRACT(YEAR FROM sale_date)" in record.sql_queries[0]["sql"]
+    assert record.sql_queries[0]["datasource"] == "postgres-prod-db"

From 851bfb0a587824c3d24d6c47971c0eccc9babe63 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Thu, 9 Apr 2026 09:57:36 -0700
Subject: [PATCH 079/134] mergeres into v.2

---
 anton/commands/datasource.py | 205 ++++++++++++++++++++++-------------
 anton/tools.py               |  25 +++--
 tests/test_datasource.py     | 129 ++++++++++++++++++++++
 3 files changed, 275 insertions(+), 84 deletions(-)

diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index 3f57e6f0..f25ecb62 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -528,6 +528,8 @@ async def _reconnect_to_saved(
     registry: "DatasourceRegistry",
     slug: str,
     conn: dict,
+    *,
+    from_tool_call: bool = False,
 ) -> "ChatSession":
     """Inject env for a saved connection and mark it as the active datasource."""
     restore_namespaced_env(vault)
@@ -543,30 +545,34 @@ async def _reconnect_to_saved(
         f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
     )
     console.print()
-    session._history.append(
-        {
-            "role": "assistant",
-            "content": (
-                f'I\'ve reconnected to the {engine_label} connection "{slug}" '
-                f"in the Local Vault. I can now query this data source when needed."
-            ),
-        }
-    )
+    if not from_tool_call:
+        # When invoked via the LLM tool call, we must not append to
+        # session._history here — it would land between a tool_use and
+        # its tool_result. The tool wrapper returns a fresh message.
+        session._history.append(
+            {
+                "role": "assistant",
+                "content": (
+                    f'I\'ve reconnected to the {engine_label} connection "{slug}" '
+                    f"in the Local Vault. I can now query this data source when needed."
+                ),
+            }
+        )
     return session
 
 
-def _record_redirect(
-    session: "ChatSession",
+def _build_redirect_message(
     collector: ConnectionCollector,
     user_message: str,
     target_engine: str | None = None,
-) -> None:
-    """Record a mid-flow redirect so the main agent can pick up where we left off.
-
-    Appends a structured assistant message to history with the variables
-    collected so far and the user's last message, so the LLM can decide
-    whether to re-call connect_new_datasource with the new engine and
-    pre-fill the already-known variables.
+) -> str:
+    """Build a structured REDIRECT message for the main agent.
+
+    Returns a string describing what was collected so far, what's still
+    missing, and what the user said. The caller decides where to put it
+    (session history for slash-command path, or tool-result return for
+    the LLM tool-call path — never both, to keep tool_use/tool_result
+    ordering intact).
     """
     collector.redirect_message = user_message.strip()
     payload = collector.to_redirect_result()
@@ -586,9 +592,7 @@ def _record_redirect(
         "again with the correct engine and pass known_variables to "
         "pre-fill what's already collected."
     )
-    session._history.append(
-        {"role": "assistant", "content": " ".join(parts)}
-    )
+    return " ".join(parts)
 
 
 async def handle_connect_datasource(
@@ -598,6 +602,7 @@ async def handle_connect_datasource(
     datasource_name: str | None = None,
     prefill: str | None = None,
     known_variables: dict[str, str] | None = None,
+    from_tool_call: bool = False,
 ) -> "ChatSession":
     """
     Connect a data source by entering credentials, either for a new name or re-entering for an existing one.
@@ -605,6 +610,13 @@ async def handle_connect_datasource(
     `known_variables` may pre-fill credential fields (e.g. when called as a
     tool by the LLM, which may have already extracted host/port/etc. from
     the conversation).
+
+    `from_tool_call=True` when invoked via the LLM `connect_new_datasource`
+    tool. In that case we must NOT append assistant messages to
+    `session._history` — we are sitting between a `tool_use` block and its
+    `tool_result` block, and appending messages there violates the
+    Anthropic API invariant. The tool wrapper builds its own return
+    message from the vault diff instead.
     """
 
     vault = DataVault()
@@ -716,15 +728,16 @@ async def handle_connect_datasource(
             "[anton.muted]        You can now ask me questions about your data.[/]"
         )
         console.print()
-        session._history.append(
-            {
-                "role": "assistant",
-                "content": (
-                    f"I've updated the credentials for the {engine_def.display_name} connection "
-                    f'"{datasource_name}" in the Local Vault.'
-                ),
-            }
-        )
+        if not from_tool_call:
+            session._history.append(
+                {
+                    "role": "assistant",
+                    "content": (
+                        f"I've updated the credentials for the {engine_def.display_name} connection "
+                        f'"{datasource_name}" in the Local Vault.'
+                    ),
+                }
+            )
         return session
 
     console.print()
@@ -835,7 +848,8 @@ async def get_create_new_answer() -> str | None:
             picked_conn = saved_connections[int(pick) - 1]
             picked_slug = f"{picked_conn['engine']}-{picked_conn['name']}"
             return await _reconnect_to_saved(
-                console, session, vault, registry, picked_slug, picked_conn
+                console, session, vault, registry, picked_slug, picked_conn,
+                from_tool_call=from_tool_call,
             )
 
         # top_choice == "2": create new connection
@@ -854,7 +868,8 @@ async def get_create_new_answer() -> str | None:
     if stripped_answer in known_slugs:
         conn = known_slugs[stripped_answer]
         return await _reconnect_to_saved(
-            console, session, vault, registry, stripped_answer, conn
+            console, session, vault, registry, stripped_answer, conn,
+            from_tool_call=from_tool_call,
         )
 
     engine_def: DatasourceEngine | None = None
@@ -993,15 +1008,16 @@ async def get_create_new_answer() -> str | None:
             "[anton.muted]        You can now ask me questions about your data.[/]"
         )
         console.print()
-        session._history.append(
-            {
-                "role": "assistant",
-                "content": (
-                    f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
-                    f"to the Local Vault. I can now query this data source when needed."
-                ),
-            }
-        )
+        if not from_tool_call:
+            session._history.append(
+                {
+                    "role": "assistant",
+                    "content": (
+                        f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
+                        f"to the Local Vault. I can now query this data source when needed."
+                    ),
+                }
+            )
         return session
 
     assert engine_def is not None  # custom_source path always returns before this line
@@ -1057,17 +1073,6 @@ async def get_create_new_answer() -> str | None:
 
     console.print()
 
-    help_answer = await prompt_or_cancel(
-        "(anton) Do you need instructions on how to obtain these credentials?",
-        choices=["y", "n"], default="n",
-    )
-    if help_answer is None:
-        return session
-    if help_answer.strip().lower() == "y":
-        await show_credential_help(
-            console, session, engine_def.display_name, None, active_fields,
-        )
-
     # ── Smart credential collection ────────────────────────────────────
     # Track filled vs. missing fields as a puzzle. Each user response is
     # parsed via the LLM to extract any variables mentioned, so users can
@@ -1089,6 +1094,48 @@ async def get_create_new_answer() -> str | None:
     known_engine_slugs = [e.engine for e in registry.all_engines()]
     partial = False
 
+    # Offer instructions — but if the user answers by pasting credentials
+    # instead of "y"/"n", extract them straight into the collector rather
+    # than forcing a re-prompt.
+    help_answer = await prompt_or_cancel(
+        "(anton) Do you need instructions on how to obtain these credentials?",
+        choices_display="y/n", default="n",
+    )
+    if help_answer is None:
+        return session
+    normalized = help_answer.strip().lower()
+    if normalized == "y":
+        await show_credential_help(
+            console, session, engine_def.display_name, None, active_fields,
+        )
+    elif normalized and normalized != "n":
+        # Non-y/n answer — maybe the user pasted credentials here.
+        extracted = await extract_variables(
+            help_answer,
+            expected_fields=collector.active_fields,
+            current_engine=engine_def.engine,
+            current_engine_display=engine_def.display_name,
+            known_engine_slugs=known_engine_slugs,
+            session=session,
+        )
+        if extracted.is_redirect:
+            redirect_text = _build_redirect_message(
+                collector, help_answer, extracted.redirect_engine
+            )
+            session._pending_connect_redirect = redirect_text
+            if not from_tool_call:
+                session._history.append(
+                    {"role": "assistant", "content": redirect_text}
+                )
+            return session
+        if extracted.variables:
+            filled = collector.fill_many(extracted.variables)
+            if filled:
+                console.print(
+                    f"[anton.muted]        Got: {', '.join(filled)}[/]"
+                )
+                console.print()
+
     while not collector.is_complete:
         collector.format_status(console)
         console.print()
@@ -1146,9 +1193,16 @@ async def get_create_new_answer() -> str | None:
         )
 
         if extracted.is_redirect:
-            _record_redirect(
-                session, collector, value, extracted.redirect_engine
+            redirect_text = _build_redirect_message(
+                collector, value, extracted.redirect_engine
             )
+            # Stash for the tool wrapper; also mirror to history only if
+            # we're NOT inside a tool_use/tool_result pair.
+            session._pending_connect_redirect = redirect_text
+            if not from_tool_call:
+                session._history.append(
+                    {"role": "assistant", "content": redirect_text}
+                )
             return session
 
         if extracted.variables:
@@ -1218,15 +1272,16 @@ async def get_create_new_answer() -> str | None:
             f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
         )
         console.print()
-        session._history.append(
-            {
-                "role": "assistant",
-                "content": (
-                    f'I\'ve reconnected to the {engine_def.display_name} connection "{slug}" '
-                    f"in the Local Vault. I can now query this data source when needed."
-                ),
-            }
-        )
+        if not from_tool_call:
+            session._history.append(
+                {
+                    "role": "assistant",
+                    "content": (
+                        f'I\'ve reconnected to the {engine_def.display_name} connection "{slug}" '
+                        f"in the Local Vault. I can now query this data source when needed."
+                    ),
+                }
+            )
         return session
 
     vault.save(engine_def.engine, conn_name, credentials)
@@ -1241,16 +1296,20 @@ async def get_create_new_answer() -> str | None:
     )
     console.print()
 
-    # Inject a brief assistant message so the LLM is aware of the new connection
-    session._history.append(
-        {
-            "role": "assistant",
-            "content": (
-                f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
-                f"to the Local Vault. I can now query this data source when needed."
-            ),
-        }
-    )
+    # Inject a brief assistant message so the LLM is aware of the new
+    # connection — but only when NOT in a tool call (in that case the
+    # tool wrapper constructs its own return message; appending here
+    # would break tool_use/tool_result pairing).
+    if not from_tool_call:
+        session._history.append(
+            {
+                "role": "assistant",
+                "content": (
+                    f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
+                    f"to the Local Vault. I can now query this data source when needed."
+                ),
+            }
+        )
     return session
 
 
diff --git a/anton/tools.py b/anton/tools.py
index 42cff5d3..ff66ee5a 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -37,12 +37,16 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     vault = DataVault()
     before = {f"{c['engine']}-{c['name']}" for c in vault.list_connections()}
 
+    # Clear any stale redirect marker before running
+    setattr(session, "_pending_connect_redirect", None)
+
     await handle_connect_datasource(
         console,
         session._scratchpads,
         session,
         prefill=engine,
         known_variables=known_variables or None,
+        from_tool_call=True,
     )
 
     # Check if a new connection was actually added
@@ -56,17 +60,16 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
             f"Continue helping the user with their original request using this data source."
         )
 
-    # Did the flow record a mid-flow redirect? If so, the last history
-    # entry starts with "REDIRECT" — pass it through instead of treating
-    # it as a cancellation.
-    if session._history and isinstance(session._history[-1], dict):
-        last = session._history[-1]
-        if (
-            last.get("role") == "assistant"
-            and isinstance(last.get("content"), str)
-            and last["content"].startswith("REDIRECT")
-        ):
-            return last["content"]
+    # Did the flow record a mid-flow redirect? Read it from the session
+    # attribute stashed by _build_redirect_message. We CANNOT append to
+    # session._history from within the handler — we're between the
+    # tool_use and tool_result blocks and doing so breaks the Anthropic
+    # API invariant that every tool_use must be immediately followed by
+    # its tool_result.
+    redirect_text = getattr(session, "_pending_connect_redirect", None)
+    if redirect_text:
+        setattr(session, "_pending_connect_redirect", None)
+        return redirect_text
 
     # User cancelled or connection failed — show briefly with spinner
     # so user knows the agent is picking back up
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index 74ab2c1e..51508bf0 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -658,6 +658,135 @@ async def test_successful_connection_saves_and_injects_history(
         assert last["role"] == "assistant"
         assert "postgresql" in last["content"].lower()
 
+    @pytest.mark.asyncio
+    async def test_credentials_pasted_at_help_prompt(
+        self, registry, vault_dir, make_session, make_cell, make_pad
+    ):
+        """Pasting credentials at the 'Do you need instructions?' prompt
+        should extract them instead of forcing a re-prompt or re-asking
+        for every field.
+        """
+        session = make_session()
+        console = MagicMock()
+        vault = DataVault(vault_dir=vault_dir)
+
+        # Mock the LLM to return a structured extraction for the paste
+        extract_response = MagicMock()
+        extract_response.content = (
+            '{"variables": {"host": "db.example.com", "port": "5432", '
+            '"database": "prod_db", "user": "alice", "password": "s3cr3t"}, '
+            '"is_redirect": false, "redirect_engine": "", "redirect_reason": ""}'
+        )
+        session._llm.plan = AsyncMock(return_value=extract_response)
+
+        pad = make_pad()
+        session._scratchpads.get_or_create = AsyncMock(return_value=pad)
+
+        pasted = (
+            "host: db.example.com\n"
+            "port: 5432\n"
+            "database: prod_db\n"
+            "user: alice\n"
+            "password: s3cr3t"
+        )
+        # Only two user inputs needed: the engine pick, then the paste
+        # at the help prompt. The collector becomes complete immediately
+        # after extraction, so no further prompts are issued.
+        responses = iter(["PostgreSQL", pasted])
+
+        with (
+            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch(
+                "anton.commands.datasource.prompt_or_cancel",
+                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+            ),
+        ):
+            await handle_connect_datasource(console, session._scratchpads, session)
+
+        conns = vault.list_connections()
+        assert len(conns) == 1
+        saved = vault.load("postgresql", conns[0]["name"])
+        assert saved is not None
+        assert saved["host"] == "db.example.com"
+        assert saved["port"] == "5432"
+        assert saved["database"] == "prod_db"
+        assert saved["user"] == "alice"
+        assert saved["password"] == "s3cr3t"
+
+    @pytest.mark.asyncio
+    async def test_from_tool_call_does_not_append_to_history(
+        self, registry, vault_dir, make_session, make_cell, make_pad
+    ):
+        """With from_tool_call=True the handler must NOT mutate session._history.
+
+        If it did, the appended assistant message would land between the
+        surrounding tool_use and tool_result blocks, violating the
+        Anthropic API invariant and producing a 400 error on the next
+        LLM call ("tool_use ids were found without tool_result blocks").
+        """
+        session = make_session()
+        # Simulate being mid-tool-call: history already contains an
+        # assistant message with a tool_use that needs a tool_result next.
+        session._history = [
+            {"role": "user", "content": "connect to postgres"},
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": "Let me connect you."},
+                    {
+                        "type": "tool_use",
+                        "id": "toolu_test_123",
+                        "name": "connect_new_datasource",
+                        "input": {"engine": "postgres"},
+                    },
+                ],
+            },
+        ]
+        history_len_before = len(session._history)
+
+        console = MagicMock()
+        vault = DataVault(vault_dir=vault_dir)
+        pad = make_pad()
+        session._scratchpads.get_or_create = AsyncMock(return_value=pad)
+
+        responses = iter(
+            [
+                "PostgreSQL",
+                "n",
+                "db.example.com",
+                "5432",
+                "prod_db",
+                "alice",
+                "s3cr3t",
+            ]
+        )
+
+        with (
+            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch(
+                "anton.commands.datasource.prompt_or_cancel",
+                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+            ),
+        ):
+            await handle_connect_datasource(
+                console,
+                session._scratchpads,
+                session,
+                from_tool_call=True,
+            )
+
+        # Connection saved successfully...
+        conns = vault.list_connections()
+        assert len(conns) == 1
+        # ...but history MUST be untouched (the tool wrapper appends
+        # the tool_result separately after this returns).
+        assert len(session._history) == history_len_before
+        assert session._history[-1]["role"] == "assistant"
+        assert isinstance(session._history[-1]["content"], list)
+        assert session._history[-1]["content"][-1]["type"] == "tool_use"
+
     @pytest.mark.asyncio
     async def test_failed_test_offers_retry(
         self, registry, vault_dir, make_session, make_cell, make_pad

From 6c6c8dc94d7bf4eb99a8c7c9bc15ea00fc2f0edd Mon Sep 17 00:00:00 2001
From: ianu82 <ianu82@yahoo.co.uk>
Date: Thu, 9 Apr 2026 18:06:25 +0100
Subject: [PATCH 080/134] docs: reposition Anton as general-purpose autonomous
 agent

- Reframe opening from BI-focused to broad autonomous agent positioning
- Add 'What can Anton do?' section with multiple use case categories
- Add email cleanup use case (from community feedback)
- Add self-built integrations use case (WhatsApp/Telegram)
- Add calendar management, email sending, and other action-oriented examples
- Update 'How Anton differs' section with broader outcome examples
- Replace em dashes with hyphens throughout for consistency
---
 README.md | 100 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 55 insertions(+), 45 deletions(-)

diff --git a/README.md b/README.md
index ba21141a..f0cbf787 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,3 @@
-<div align="center">
-<a href="https://www.producthunt.com/products/mindsdb?embed=true&amp;utm_source=badge-featured&amp;utm_medium=badge&amp;utm_campaign=badge-mindsdb-anton-2" target="_blank" rel="noopener noreferrer"><img alt="MindsDB Anton - Business intelligence that doesn't just answer — it acts. | Product Hunt" width="250" height="54" src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1115197&amp;theme=light&amp;t=1775607473112"></a>
-</div>
-
-<br>
-
 ```
         ▐
    ▄█▀██▀█▄   ♡♡♡♡
@@ -12,18 +6,14 @@
     ▐   ▐        █▀█ █ ▀█  █  █▄█ █ ▀█
     ▐   ▐
 ```
+# Meet Anton - an autonomous agent that gets real work done
+Anton is your personal AI agent that works so you don't have to. Tell it what you need in plain language and it takes it from there - sending emails, calling APIs, connecting to data sources, building dashboards, and delivering results. No setup, no plugins, no fuss.
 
-# MindsDB Anton — What Business Intelligence is supposed to be
-
-Business intelligence was supposed to give you the right data, at the right time, to get real work done.
-
-That is Anton. You ask questions in plain language, and Anton takes ownership of the entire analytical process:
-it pulls and unifies data from multiple sources, runs the analysis, surfaces insights, builds rich dashboards, suggests next steps, and can even take action - A business intelligence agent that works like an expert analyst — 24/7, at machine speed.
+It doesn't just answer questions. It *does things*: cleans your inbox, builds integrations, analyzes your data, automates workflows - whatever the task requires.
 
 ![ezgif-24b9e7c74652f0dc](https://github.com/user-attachments/assets/c92f87c1-ff30-4272-92ba-49a8585d5954)
 
-
-## Quick start 
+## Quick start
 **macOS - Desktop App:**
 
 <a href="https://mindsdb-anton.s3.us-east-2.amazonaws.com/anton-latest-universal-signed.pkg">
@@ -48,17 +38,20 @@ That's it, you can now run it by simply typing the command:
 anton
 ```
 
-## Using Anton
+## What can Anton do?
+
+Anton figures things out live. It doesn't rely on pre-built plugins or predefined workflows - it writes code on the fly, calls APIs, and chains together whatever steps are needed to get the job done.
 
-Talk to Anton like a person, for example, ask Anton this:
+Here are a few examples of what people are using it for:
 
+### 📊 Data analysis & dashboards
 ```
 I hold 50 AAPL, 200 NVDA, and 10 AMZN. Get today's prices, calculate my
 total portfolio value, show me the 30-day performance of each stock, and
 any other information that might be useful. Give me a complete dashboard.
 ```
 
-What happens next is the interesting part. At first, Anton doesn't have any particular skill related to this question. However, it figures it out live: scrapes live prices, writes code on the fly, crunches the numbers, and builds you a full dashboard — all in one conversation, with no setup.
+What happens next is the interesting part. At first, Anton doesn't have any particular skill related to this question. However, it figures it out live: scrapes live prices, writes code on the fly, crunches the numbers, and builds you a full dashboard - all in one conversation, with no setup.
 
 
 ```text
@@ -70,13 +63,42 @@ Summary: Concentration risk is your #1 issue. If you're comfortable being a high
         <img width="800" alt="Anton's response" src="https://github.com/user-attachments/assets/6dc6ee81-2a2c-4358-be05-bfe884c32685" />
 </p>
 
-**Key features**
+### 📬 Email cleanup
+```
+Dear Anton, please help me clear unwanted emails...
+```
+
+Anton scans your inbox, classifies emails by signal vs. noise, identifies unsubscribable marketing, cold outreach, and internal tool notifications - then surfaces a breakdown and handles the cleanup. One user ran it on ~1,000 emails and found ~35% were un-subscribable. Anton surfaced everything AND handled the cleanup.
+
+### 💬 Build its own integrations
+```
+Set up a WhatsApp integration so I can message you from my phone.
+```
+
+Anton doesn't wait for someone to build a connector. It writes the integration code itself, sets it up, and gets it running - so you can chat with it from WhatsApp, Telegram, or whatever channel you need.
+
+### 🔧 Ask for anything that requires action
+- **Send emails** - connect accounts, draft messages or even send them on your behalf.
+- **Manage Calendarss** - Summarize your day, create meetings, block time, etc. All just by asking.
+- **Automated reporting** - pull from multiple databases, crunch numbers, deliver a report on a schedule.
+- **Workflow automation** - monitor a source, react to changes, take action.
+- **Research & synthesis** - scrape the web, summarize findings, build a reference document.
+- **Data pipeline prototyping** - connect sources, transform data, load into a destination.
+- **System administration** - audit configurations, generate reports, fix issues.
+
+The pattern is always the same: you describe the outcome, Anton figures out the steps.
+
+---
+
+## Key features
 - **Credential vault** - prevents secrets from being exposed to LLMs.
-- **Isolated code execution** - protected, reproducible “show your work” environment.
-- **Multi-layer memory & continuous learning** - session, semantic and long-term business knowledge.
+- **Isolated code execution** - protected, reproducible "show your work" environment.
+- **Multi-layer memory & continuous learning** - session, semantic and long-term knowledge. Anton remembers what it learned and gets better at your specific workflows over time.
+
+---
 
 #### Connect your data
-Although you can use Anton with just public data, the real power happens when you combine that with your own data. This can be anything: files,  databases, application APIs,... etc. Open the Local Vault with `/connect` command, then follow the prompts to add your secrets. Anton only has access to secret names - secret values remain hidden.
+Although you can use Anton with just public data, the real power happens when you combine that with your own data. This can be anything: files, databases, application APIs,... etc. Open the Local Vault with `/connect` command, then follow the prompts to add your secrets. Anton only has access to secret names - secret values remain hidden.
 
 ```powershell
 /connect
@@ -116,14 +138,11 @@ ANTON>
 
 ---
 
-### Explainable by default
-
-You can always ask Anton to explain what it did. Ask it to dump its scratchpad and you get a full notebook-style breakdown: every cell of code it ran, the outputs, and errors — so you can follow its reasoning step by step.
+You can always ask Anton to explain what it did. Ask it to dump its scratchpad and you get a full notebook-style breakdown: every cell of code it ran, the outputs, and errors - so you can follow its reasoning step by step.
 
 ---
 
 ## What's inside
-
 <p align="center"><img width="800"  alt="image" src="/assets/anton-diagram.png" /></p>
 
 For the full architecture of Anton, file formats, and developer guide, see **[anton/README.md](anton/README.md)**.
@@ -131,17 +150,16 @@ For the full architecture of Anton, file formats, and developer guide, see **[an
 ---
 
 ## Workspace layout
-
 When you run `anton` in a directory:
 
-- `.anton/` — workspace folder containing scratchpad state, episodic memory, and local secrets.  
-- `.anton/anton.md` — optional project context (Anton reads this at conversation start).  
-- `.anton/.env` — workspace configuration variables file (local file). 
-- `.anton/episodes/*` — episodic memories, one file per session.
+- `.anton/` - workspace folder containing scratchpad state, episodic memory, and local secrets.  
+- `.anton/anton.md` - optional project context (Anton reads this at conversation start).  
+- `.anton/.env` - workspace configuration variables file (local file). 
+- `.anton/episodes/*` - episodic memories, one file per session.
 - `.anton/memory/rules.md` - behavioral rules: Always/never/when rules (e.g., never hardcode credentials, how to build HTML)     
 - `.anton/memory/lessons.md` - factual knowledge: Things I've learned (stock API quirks, dashboard patterns, data fetching notes)   
 - `.anton/memory/topics/*` - topic-specific lessons:  Deeper notes organized by subject (dashboard-visualization, stock-data-api, etc.) 
-                                         
+
 Override the working folder:
 ```bash
 anton --folder /path/to/workspace
@@ -150,25 +168,22 @@ anton --folder /path/to/workspace
 ---
 
 ## Memory systems
-
 Anton provides two human-readable memory systems:
 
-- **Semantic memory** — rules, lessons, identity and domain expertise stored as markdown at global and project scope.  
-- **Episodic memory** — a timestamped archive of every conversation (JSONL in `.anton/episodes/`). Anton can recall prior sessions with the `recall` tool.
+- **Semantic memory** - rules, lessons, identity and domain expertise stored as markdown at global and project scope.  
+- **Episodic memory** - a timestamped archive of every conversation (JSONL in `.anton/episodes/`). Anton can recall prior sessions with the `recall` tool.
 
 Configure memory via `/setup` > Memory or via environment variables.
 
 ---
 
 ### Prerequisites
-
-- `git` — required  
+- `git` - required  
 - Python **3.11+** (Anton will bootstrap an environment if missing)  
-- `curl` — macOS / Linux installs  
+- `curl` - macOS / Linux installs  
 - Internet connection (scratchpad may access web sources)
 
 ### Windows scratchpad firewall
-
 The Windows installer can add a firewall rule so the scratchpad can reach the internet. If you skipped it, run in an elevated PowerShell:
 
 ```powershell
@@ -178,23 +193,19 @@ netsh advfirewall firewall add rule name="Anton Scratchpad" dir=out action=allow
 ---
 
 ## How Anton differs from coding agents
-
-Anton is a *doing* agent: code is a tool to get results. Where coding agents focus on producing code for a codebase, Anton focuses on delivering the outcome — a dataset, report, dashboard, or automated workflow — and will write whatever code is necessary to achieve that goal.
+Anton is a *doing* agent: code is a means, not the end. Where coding agents focus on producing code for a codebase, Anton focuses on delivering the outcome - a cleaned inbox, a live dashboard, a working integration, an automated workflow - and will write whatever code is necessary to achieve that goal.
 
 ---
 
 ## Is "Anton" a Mind?
-
 Yes, at MindsDB we build AI systems that collaborate with people to accomplish tasks, inspired by the culture series books, so yes, Anton is a Mind :)
 
 ## Why the name "Anton"?
-
-We really enjoyed the show *Silicon Valley*. Gilfoyle's AI — Son of Anton — was an autonomous system that wrote code, made its own decisions, and occasionally went rogue. We thought it was was great name for an AI that can learn on its own, so we kept Anton, dropped the "Son of".
+We really enjoyed the show *Silicon Valley*. Gilfoyle's AI - Son of Anton - was an autonomous system that wrote code, made its own decisions, and occasionally went rogue. We thought it was was great name for an AI that can learn on its own, so we kept Anton, dropped the "Son of".
 
 ---
 
 ## Analytics
-
 Anton collects anonymous usage events (e.g. session started, first query) to help us understand how the product is used. No personal data or query content is sent.
 
 To disable analytics, set the environment variable:
@@ -212,5 +223,4 @@ ANTON_ANALYTICS_ENABLED=false
 ---
 
 ## License
-
 AGPL-3.0 license

From 3171fe66bc0366505c1533b1a65d5fffb912254a Mon Sep 17 00:00:00 2001
From: pnewsam <paul@mindsdb.com>
Date: Thu, 9 Apr 2026 10:59:36 -0700
Subject: [PATCH 081/134] Fix merge issue with workspace var

---
 anton/core/session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index 20f2c29e..d68e5e6b 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -119,7 +119,7 @@ def __init__(self, config: ChatSessionConfig) -> None:
         )
         self.tool_registry = ToolRegistry()
         self._explainability_store = (
-            ExplainabilityStore(workspace.base) if workspace is not None else None
+            ExplainabilityStore(config.workspace.base) if config.workspace is not None else None
         )
         self._active_explainability: ExplainabilityCollector | None = None
 

From b2c5bb90447a092642ab017d6fc52886d8393e2f Mon Sep 17 00:00:00 2001
From: pnewsam <paul@mindsdb.com>
Date: Thu, 9 Apr 2026 11:10:32 -0700
Subject: [PATCH 082/134] Codeql fix

---
 tests/test_explainability.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_explainability.py b/tests/test_explainability.py
index 1b835cc6..1897212b 100644
--- a/tests/test_explainability.py
+++ b/tests/test_explainability.py
@@ -80,9 +80,8 @@ def test_explainability_extracts_non_sql_sources_from_text(tmp_path):
 
     record = collector.finalize("Home roasting is much cheaper.")
 
-    source_names = [source["name"] for source in record.data_sources]
-    assert "happymugcoffee.com" in source_names
-    assert "burmancoffee.com" in source_names
+    source_names = {source["name"] for source in record.data_sources}
+    assert source_names == {"happymugcoffee.com", "burmancoffee.com"}
     assert "gathered information from" in record.summary.lower()
 
 

From a0a72410eea1eb32c5413f44acfda733fbce4d74 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Fri, 10 Apr 2026 02:27:08 -0400
Subject: [PATCH 083/134] smoother data connection

---
 anton/commands/datasource.py | 159 ++++++++++++++++++++---------------
 tests/test_datasource.py     |  51 +++++++++++
 2 files changed, 140 insertions(+), 70 deletions(-)

diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index f25ecb62..99491967 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -1046,33 +1046,6 @@ async def get_create_new_answer() -> str | None:
             return session
         active_fields = chosen_method.fields
 
-    required_fields = [f for f in active_fields if f.required]
-    optional_fields = [f for f in active_fields if not f.required]
-
-    console.print()
-    console.print(
-        f"[anton.cyan](anton)[/] To connect [bold]{engine_def.display_name}[/], "
-        "I'll need the following:"
-    )
-    console.print()
-
-    if required_fields:
-        console.print("        [bold]Required[/]      " + "─" * 39)
-        for f in required_fields:
-            console.print(
-                f"        • [bold]{f.name:<12}[/] [anton.muted]— {f.description}[/]"
-            )
-
-    if optional_fields:
-        console.print()
-        console.print("        [bold]Optional[/]      " + "─" * 39)
-        for f in optional_fields:
-            console.print(
-                f"        • [bold]{f.name:<12}[/] [anton.muted]— {f.description}[/]"
-            )
-
-    console.print()
-
     # ── Smart credential collection ────────────────────────────────────
     # Track filled vs. missing fields as a puzzle. Each user response is
     # parsed via the LLM to extract any variables mentioned, so users can
@@ -1083,58 +1056,104 @@ async def get_create_new_answer() -> str | None:
         auth_method=chosen_method,
     )
     if known_variables:
-        accepted = collector.fill_many(known_variables)
-        if accepted:
-            console.print(
-                f"[anton.muted]        Pre-filled from context: "
-                f"{', '.join(accepted)}[/]"
-            )
-            console.print()
+        collector.fill_many(known_variables)
 
     known_engine_slugs = [e.engine for e in registry.all_engines()]
     partial = False
+    required_fields = [f for f in active_fields if f.required]
+    optional_fields = [f for f in active_fields if not f.required]
 
-    # Offer instructions — but if the user answers by pasting credentials
-    # instead of "y"/"n", extract them straight into the collector rather
-    # than forcing a re-prompt.
-    help_answer = await prompt_or_cancel(
-        "(anton) Do you need instructions on how to obtain these credentials?",
-        choices_display="y/n", default="n",
-    )
-    if help_answer is None:
-        return session
-    normalized = help_answer.strip().lower()
-    if normalized == "y":
-        await show_credential_help(
-            console, session, engine_def.display_name, None, active_fields,
+    if collector.is_complete:
+        # Pre-fill already covered everything — skip the field list and
+        # the help prompt and go straight to testing + saving. Show a
+        # brief confirmation of what was received.
+        filled_names = [
+            f.name for f in active_fields if collector.collected.get(f.name)
+        ]
+        console.print()
+        console.print(
+            f"[anton.cyan](anton)[/] Got everything for [bold]"
+            f"{engine_def.display_name}[/] from context: "
+            f"{', '.join(filled_names)}."
         )
-    elif normalized and normalized != "n":
-        # Non-y/n answer — maybe the user pasted credentials here.
-        extracted = await extract_variables(
-            help_answer,
-            expected_fields=collector.active_fields,
-            current_engine=engine_def.engine,
-            current_engine_display=engine_def.display_name,
-            known_engine_slugs=known_engine_slugs,
-            session=session,
+        console.print()
+    else:
+        # Show the field list so the user sees what's expected.
+        console.print()
+        console.print(
+            f"[anton.cyan](anton)[/] To connect [bold]"
+            f"{engine_def.display_name}[/], I'll need the following:"
         )
-        if extracted.is_redirect:
-            redirect_text = _build_redirect_message(
-                collector, help_answer, extracted.redirect_engine
-            )
-            session._pending_connect_redirect = redirect_text
-            if not from_tool_call:
-                session._history.append(
-                    {"role": "assistant", "content": redirect_text}
+        console.print()
+
+        if required_fields:
+            console.print("        [bold]Required[/]      " + "─" * 39)
+            for f in required_fields:
+                marker = (
+                    "[green]✓[/] " if collector.collected.get(f.name) else "• "
                 )
-            return session
-        if extracted.variables:
-            filled = collector.fill_many(extracted.variables)
-            if filled:
                 console.print(
-                    f"[anton.muted]        Got: {', '.join(filled)}[/]"
+                    f"        {marker}[bold]{f.name:<12}[/] "
+                    f"[anton.muted]— {f.description}[/]"
                 )
-                console.print()
+
+        if optional_fields:
+            console.print()
+            console.print("        [bold]Optional[/]      " + "─" * 39)
+            for f in optional_fields:
+                marker = (
+                    "[green]✓[/] " if collector.collected.get(f.name) else "• "
+                )
+                console.print(
+                    f"        {marker}[bold]{f.name:<12}[/] "
+                    f"[anton.muted]— {f.description}[/]"
+                )
+
+        console.print()
+
+        # Offer instructions — but only if nothing has been pre-filled.
+        # If the user already provided some credentials (via the tool's
+        # `known_variables` or a paste), they clearly know what they're
+        # doing and don't need guidance — just prompt for what's missing.
+        if not collector.collected:
+            help_answer = await prompt_or_cancel(
+                "(anton) Do you need instructions on how to obtain these credentials?",
+                choices_display="y/n", default="n",
+            )
+            if help_answer is None:
+                return session
+            normalized = help_answer.strip().lower()
+            if normalized == "y":
+                await show_credential_help(
+                    console, session, engine_def.display_name, None, active_fields,
+                )
+            elif normalized and normalized != "n":
+                # Non-y/n answer — maybe the user pasted credentials here.
+                extracted = await extract_variables(
+                    help_answer,
+                    expected_fields=collector.active_fields,
+                    current_engine=engine_def.engine,
+                    current_engine_display=engine_def.display_name,
+                    known_engine_slugs=known_engine_slugs,
+                    session=session,
+                )
+                if extracted.is_redirect:
+                    redirect_text = _build_redirect_message(
+                        collector, help_answer, extracted.redirect_engine
+                    )
+                    session._pending_connect_redirect = redirect_text
+                    if not from_tool_call:
+                        session._history.append(
+                            {"role": "assistant", "content": redirect_text}
+                        )
+                    return session
+                if extracted.variables:
+                    filled = collector.fill_many(extracted.variables)
+                    if filled:
+                        console.print(
+                            f"[anton.muted]        Got: {', '.join(filled)}[/]"
+                        )
+                        console.print()
 
     while not collector.is_complete:
         collector.format_status(console)
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index e87a58fe..c4c53fac 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -659,6 +659,57 @@ async def test_successful_connection_saves_and_injects_history(
         assert last["role"] == "assistant"
         assert "postgresql" in last["content"].lower()
 
+    @pytest.mark.asyncio
+    async def test_fully_prefilled_known_variables_skips_help_prompt(
+        self, registry, vault_dir, make_session, make_cell, make_pad
+    ):
+        """When known_variables covers every required field, skip the
+        'Do you need instructions?' prompt entirely and go straight to
+        test + save. The user has already provided everything.
+        """
+        session = make_session()
+        console = MagicMock()
+        vault = DataVault(vault_dir=vault_dir)
+
+        pad = make_pad()
+        session._scratchpads.get_or_create = AsyncMock(return_value=pad)
+
+        # Only the engine selection prompt should fire. After that, the
+        # collector is already complete and the flow proceeds directly
+        # to the connection test.
+        responses = iter(["PostgreSQL"])
+
+        with (
+            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch(
+                "anton.commands.datasource.prompt_or_cancel",
+                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+            ),
+        ):
+            await handle_connect_datasource(
+                console,
+                session._scratchpads,
+                session,
+                known_variables={
+                    "host": "db.example.com",
+                    "port": "5432",
+                    "database": "prod_db",
+                    "user": "alice",
+                    "password": "s3cr3t",
+                },
+            )
+
+        conns = vault.list_connections()
+        assert len(conns) == 1
+        saved = vault.load("postgresql", conns[0]["name"])
+        assert saved is not None
+        assert saved["host"] == "db.example.com"
+        assert saved["port"] == "5432"
+        assert saved["database"] == "prod_db"
+        assert saved["user"] == "alice"
+        assert saved["password"] == "s3cr3t"
+
     @pytest.mark.asyncio
     async def test_credentials_pasted_at_help_prompt(
         self, registry, vault_dir, make_session, make_cell, make_pad

From ff75c0c37db19a7be228857d8e518559e34339b3 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Fri, 10 Apr 2026 18:18:51 -0400
Subject: [PATCH 084/134] connect workflow is much better, some final touches

---
 anton/commands/datasource.py |  5 ++++
 anton/tools.py               | 31 ++++++++++++++++++------
 tests/test_datasource.py     | 46 ++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index 99491967..114ac673 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -1263,6 +1263,11 @@ async def get_create_new_answer() -> str | None:
         if not await run_connection_test(
             console, scratchpads, vault, engine_def, credentials, active_fields
         ):
+            # Either the test failed and the user declined to re-enter
+            # credentials, or the user pressed Escape during the retry
+            # prompt. Mark this so the tool wrapper can return an
+            # accurate (non-misleading) message to the LLM.
+            session._pending_connect_status = "test_failed"
             return session
 
     conn_name = registry.derive_name(engine_def, credentials)
diff --git a/anton/tools.py b/anton/tools.py
index ff66ee5a..afe2a30b 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -37,8 +37,9 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     vault = DataVault()
     before = {f"{c['engine']}-{c['name']}" for c in vault.list_connections()}
 
-    # Clear any stale redirect marker before running
+    # Clear any stale status from a previous run
     setattr(session, "_pending_connect_redirect", None)
+    setattr(session, "_pending_connect_status", None)
 
     await handle_connect_datasource(
         console,
@@ -71,8 +72,11 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
         setattr(session, "_pending_connect_redirect", None)
         return redirect_text
 
-    # User cancelled or connection failed — show briefly with spinner
-    # so user knows the agent is picking back up
+    # No new connection was saved. Distinguish *why* — the LLM should
+    # not be told "user pressed Escape" when really the test failed.
+    status = getattr(session, "_pending_connect_status", None)
+    setattr(session, "_pending_connect_status", None)
+
     from rich.live import Live
     from rich.spinner import Spinner
     from rich.text import Text
@@ -88,11 +92,24 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     ):
         await asyncio.sleep(1.5)
     console.print()
+
+    if status == "test_failed":
+        return (
+            f"CONNECTION TEST FAILED: The '{engine}' credentials were entered "
+            f"but the connection test did not succeed, and the user declined to "
+            f"re-enter them. The connection was NOT saved. Ask the user what "
+            f"they'd like to do — for example, double-check the host/credentials, "
+            f"try a different datasource, or do something else. "
+            f"Do NOT silently retry connect_new_datasource with the same values. "
+            f"Respond with TEXT ONLY — no tool calls."
+        )
+
+    # Default: user cancelled (pressed Escape) at some point
     return (
-        f"CANCELLED: The user pressed Escape and cancelled the '{engine}' connection. "
-        f"STOP — do NOT call connect_new_datasource again. Do NOT retry. "
-        f"Acknowledge the cancellation briefly and ask the user what they'd like to do instead. "
-        f"Respond with TEXT ONLY — no tool calls."
+        f"CANCELLED: The user cancelled the '{engine}' connection setup before "
+        f"it completed. Ask the user what they'd like to do instead. "
+        f"Do NOT immediately call connect_new_datasource again unless they "
+        f"explicitly ask for it. Respond with TEXT ONLY — no tool calls."
     )
 
 
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index c4c53fac..344928af 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -659,6 +659,52 @@ async def test_successful_connection_saves_and_injects_history(
         assert last["role"] == "assistant"
         assert "postgresql" in last["content"].lower()
 
+    @pytest.mark.asyncio
+    async def test_test_failed_decline_sets_status(
+        self, registry, vault_dir, make_session, make_cell, make_pad
+    ):
+        """When the connection test fails and the user declines to
+        re-enter credentials, the handler should set
+        session._pending_connect_status = 'test_failed' so the tool
+        wrapper can return an accurate (non-misleading) message to the
+        LLM instead of claiming the user pressed Escape.
+        """
+        session = make_session()
+        console = MagicMock()
+        vault = DataVault(vault_dir=vault_dir)
+
+        pad = make_pad(make_cell(stdout="", error="connection refused"))
+        session._scratchpads.get_or_create = AsyncMock(return_value=pad)
+
+        # Engine pick + decline retry after the test fails
+        responses = iter(["PostgreSQL", "n"])
+
+        with (
+            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch(
+                "anton.commands.datasource.prompt_or_cancel",
+                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+            ),
+        ):
+            await handle_connect_datasource(
+                console,
+                session._scratchpads,
+                session,
+                known_variables={
+                    "host": "db.example.com",
+                    "port": "5432",
+                    "database": "prod_db",
+                    "user": "alice",
+                    "password": "wrong",
+                },
+            )
+
+        # Connection NOT saved
+        assert vault.list_connections() == []
+        # Status correctly marked as test_failed (not "escaped")
+        assert getattr(session, "_pending_connect_status", None) == "test_failed"
+
     @pytest.mark.asyncio
     async def test_fully_prefilled_known_variables_skips_help_prompt(
         self, registry, vault_dir, make_session, make_cell, make_pad

From 85c38b7e81c4d289f8e47c54305a1131dace9dfe Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Fri, 10 Apr 2026 18:27:11 -0400
Subject: [PATCH 085/134] better workflow if something fails along the way when
 trying to connect a data source

---
 anton/tools.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/anton/tools.py b/anton/tools.py
index afe2a30b..bd1f2505 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -95,13 +95,23 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
 
     if status == "test_failed":
         return (
-            f"CONNECTION TEST FAILED: The '{engine}' credentials were entered "
-            f"but the connection test did not succeed, and the user declined to "
-            f"re-enter them. The connection was NOT saved. Ask the user what "
-            f"they'd like to do — for example, double-check the host/credentials, "
-            f"try a different datasource, or do something else. "
-            f"Do NOT silently retry connect_new_datasource with the same values. "
-            f"Respond with TEXT ONLY — no tool calls."
+            f"CONNECTION TEST FAILED: The connection test for '{engine}' did not "
+            f"succeed and the user declined to re-enter credentials. Nothing was "
+            f"saved.\n\n"
+            f"You have exactly TWO mutually exclusive options — pick ONE, do NOT "
+            f"mix them:\n\n"
+            f"OPTION A — Retry silently (only if you suspect a transient issue "
+            f"like a network glitch or first-connection cold start):\n"
+            f"  Emit ZERO text in your response. Output ONLY a tool_use block "
+            f"calling connect_new_datasource again with the same known_variables. "
+            f"The user will only see the final result — clean and uncluttered.\n\n"
+            f"OPTION B — Give up and troubleshoot (if you believe the failure is "
+            f"real — bad credentials, wrong host, firewall, etc.):\n"
+            f"  Respond with TEXT ONLY, NO tool calls. Briefly explain what "
+            f"likely went wrong and ask the user what to do.\n\n"
+            f"CRITICAL: Mixing text + a retry tool call in the same response "
+            f"produces a confusing two-message stack for the user (failure text "
+            f"followed by success text). Pick A or B, never both."
         )
 
     # Default: user cancelled (pressed Escape) at some point

From 50bfa19310d472de4d9e94b4640d24cecfafa16c Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Sat, 11 Apr 2026 05:07:35 -0400
Subject: [PATCH 086/134] skills and better llm object extraction

---
 anton/chat.py                              |  32 ++
 anton/commands/datasource.py               | 129 +++++--
 anton/commands/skills.py                   | 422 +++++++++++++++++++++
 anton/connect_collector.py                 | 141 ++++---
 anton/core/backends/scratchpad_boot.py     |  46 +--
 anton/core/llm/client.py                   | 129 +++++++
 anton/core/llm/prompt_builder.py           |  51 +++
 anton/core/llm/structured.py               | 128 +++++++
 anton/core/memory/cerebellum.py            | 385 +++++++++++++++++++
 anton/core/memory/consolidator.py          | 103 +++--
 anton/core/memory/cortex.py                |  86 +++--
 anton/core/memory/skills.py                | 422 +++++++++++++++++++++
 anton/core/session.py                      |  60 +++
 anton/core/tools/recall_skill.py           | 130 +++++++
 anton/core/tools/tool_handlers.py          |  64 ++++
 tests/test_cerebellum.py                   | 399 +++++++++++++++++++
 tests/test_cerebellum_e2e.py               | 246 ++++++++++++
 tests/test_connect_collector.py            | 207 +++++-----
 tests/test_consolidator.py                 |  60 ++-
 tests/test_cortex.py                       |   8 +-
 tests/test_datasource.py                   | 134 ++++---
 tests/test_llm_client_generate_object.py   | 320 ++++++++++++++++
 tests/test_llm_structured_helper.py        | 256 +++++++++++++
 tests/test_prompt_builder_skills.py        | 144 +++++++
 tests/test_recall_skill.py                 | 218 +++++++++++
 tests/test_scratchpad_observer_dispatch.py | 290 ++++++++++++++
 tests/test_session_skills_init.py          | 128 +++++++
 tests/test_skill_commands.py               | 409 ++++++++++++++++++++
 tests/test_skills_e2e.py                   | 187 +++++++++
 tests/test_skills_store.py                 | 302 +++++++++++++++
 30 files changed, 5265 insertions(+), 371 deletions(-)
 create mode 100644 anton/commands/skills.py
 create mode 100644 anton/core/llm/structured.py
 create mode 100644 anton/core/memory/cerebellum.py
 create mode 100644 anton/core/memory/skills.py
 create mode 100644 anton/core/tools/recall_skill.py
 create mode 100644 tests/test_cerebellum.py
 create mode 100644 tests/test_cerebellum_e2e.py
 create mode 100644 tests/test_llm_client_generate_object.py
 create mode 100644 tests/test_llm_structured_helper.py
 create mode 100644 tests/test_prompt_builder_skills.py
 create mode 100644 tests/test_recall_skill.py
 create mode 100644 tests/test_scratchpad_observer_dispatch.py
 create mode 100644 tests/test_session_skills_init.py
 create mode 100644 tests/test_skill_commands.py
 create mode 100644 tests/test_skills_e2e.py
 create mode 100644 tests/test_skills_store.py

diff --git a/anton/chat.py b/anton/chat.py
index 26f35ab0..62976f01 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -50,6 +50,12 @@
     handle_connect_datasource,
     handle_test_datasource,
 )
+from anton.commands.skills import (
+    handle_skill_remove,
+    handle_skill_save,
+    handle_skill_show,
+    handle_skills_list,
+)
 from anton.tools import CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL
 from anton.utils.prompt import (
     prompt_or_cancel,
@@ -1248,6 +1254,32 @@ def _bottom_toolbar():
                         console, session._scratchpads, arg
                     )
                     continue
+                elif cmd == "/skill":
+                    # /skill save [name hint] | /skill show <label> | /skill remove <label>
+                    sub_parts = parts[1].strip().split(maxsplit=1) if len(parts) > 1 else []
+                    sub = sub_parts[0] if sub_parts else ""
+                    rest = sub_parts[1] if len(sub_parts) > 1 else ""
+                    if sub == "save":
+                        await handle_skill_save(
+                            console, session, name_hint=rest
+                        )
+                    elif sub == "show":
+                        handle_skill_show(console, rest)
+                    elif sub == "remove":
+                        handle_skill_remove(console, rest)
+                    elif sub == "list" or sub == "":
+                        handle_skills_list(console)
+                    else:
+                        console.print()
+                        console.print(
+                            "[anton.warning]Usage: /skill save [name] | "
+                            "/skill list | /skill show <label> | /skill remove <label>[/]"
+                        )
+                        console.print()
+                    continue
+                elif cmd == "/skills":
+                    handle_skills_list(console)
+                    continue
                 elif cmd == "/resume":
                     session, resumed_id = await handle_resume(
                         console,
diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index 114ac673..cc606dff 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+from pydantic import BaseModel, Field
 from rich.console import Console
 from rich.markdown import Markdown
 from rich.padding import Padding
@@ -33,6 +34,81 @@
 if TYPE_CHECKING:
     from anton.chat import ChatSession
 
+
+# ─────────────────────────────────────────────────────────────────────────────
+# LLM-facing schema (Pydantic) for handle_add_custom_datasource
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _CustomDatasourceField(BaseModel):
+    """One credential field in a custom-datasource spec."""
+
+    name: str = Field(
+        ...,
+        description=(
+            "snake_case field name (e.g. 'host', 'api_key'). Must be a "
+            "valid Python identifier; this becomes both the on-disk key "
+            "and the env var suffix (DS_<NAME>)."
+        ),
+    )
+    value: str = Field(
+        default="",
+        description=(
+            "Inline value if the user already provided one in their "
+            "description, otherwise empty string."
+        ),
+    )
+    secret: bool = Field(
+        default=False,
+        description=(
+            "True if the field is sensitive (passwords, API keys, "
+            "tokens) — affects how it's stored and prompted for."
+        ),
+    )
+    required: bool = Field(
+        default=True,
+        description="True if the connection cannot be tested without this field.",
+    )
+    description: str = Field(
+        default="",
+        description=(
+            "One-line description shown to the user when prompting "
+            "for this field."
+        ),
+    )
+
+
+class _CustomDatasourceSpec(BaseModel):
+    """Structured output of the LLM call in handle_add_custom_datasource."""
+
+    display_name: str = Field(
+        ...,
+        description="Human-readable name for the service (e.g. 'GitHub API').",
+    )
+    pip: str = Field(
+        default="",
+        description=(
+            "pip-installable package name (or space-separated names) "
+            "needed to interact with this service. Empty string if no "
+            "extra package is required (e.g. plain HTTPS via stdlib)."
+        ),
+    )
+    test_snippet: str = Field(
+        default="",
+        description=(
+            "Python code that tests the connection using os.environ "
+            "vars DS_FIELDNAME (uppercase field name with DS_ prefix) "
+            "and prints 'ok' on success. Empty string if untestable."
+        ),
+    )
+    fields: list[_CustomDatasourceField] = Field(
+        default_factory=list,
+        description=(
+            "Credential fields the user will need to provide. List in "
+            "the order they should be prompted."
+        ),
+    )
+
 _PROMPT_RECONNECT_CANCEL = "(reconnect/cancel)"
 
 
@@ -323,28 +399,18 @@ async def handle_add_custom_datasource(
     else:
         llm_prompt += " Determine the standard authentication fields for this service."
     llm_prompt += (
-        "\n\nReturn ONLY valid JSON (no markdown fences, no commentary):\n"
-        '{"display_name":"Human-readable name","pip":"pip-package or empty string",'
-        '"test_snippet":"python code that tests the connection using os.environ vars DS_FIELDNAME (uppercase field name with DS_ prefix) and prints ok on success, or empty string if untestable",'
-        '"fields":[{"name":"snake_case_name","value":"value if given inline else empty",'
-        '"secret":true or false,"required":true or false,"description":"what it is"}]}'
+        "\n\nReturn the connection spec following the schema you've been given. "
+        "For test_snippet, write Python that uses os.environ['DS_<FIELDNAME>'] "
+        "vars (uppercase, DS_ prefix) and prints 'ok' on success."
     )
 
     try:
-        response = await session._llm.plan(
+        spec: _CustomDatasourceSpec = await session._llm.generate_object(
+            _CustomDatasourceSpec,
             system="You are a data source connection expert.",
-            messages=[
-                {
-                    "role": "user",
-                    "content": llm_prompt,
-                }
-            ],
+            messages=[{"role": "user", "content": llm_prompt}],
             max_tokens=1024,
         )
-        text = response.content.strip()
-        # Keep
-        text = re.sub(r"^```[^\n]*\n|```\s*$", "", text, flags=re.MULTILINE).strip()
-        data = json.loads(text)
     except Exception:
         console.print(
             "[anton.warning]        Couldn't identify connection details. Try again.[/]"
@@ -352,18 +418,17 @@ async def handle_add_custom_datasource(
         console.print()
         return None
 
-    test_snippet = str(data.get("test_snippet", "")).strip()
-    raw_fields = data.get("fields") or []
+    test_snippet = spec.test_snippet.strip()
     fields: list[DatasourceField] = []
-    for f in raw_fields:
-        if not isinstance(f, dict) or not f.get("name"):
+    for f in spec.fields:
+        if not f.name:
             continue
         fields.append(
             DatasourceField(
-                name=f["name"],
-                required=bool(f.get("required", True)),
-                secret=bool(f.get("secret", False)),
-                description=str(f.get("description", "")),
+                name=f.name,
+                required=f.required,
+                secret=f.secret,
+                description=f.description,
             )
         )
 
@@ -372,15 +437,15 @@ async def handle_add_custom_datasource(
         console.print()
         return None
 
-    display_name = str(data.get("display_name", name))
-    pip_pkg = str(data.get("pip", ""))
+    display_name = spec.display_name or name
+    pip_pkg = spec.pip
 
     # Show summary
     console.print()
     console.print("      [bold]── What I'll save ──────────────────────────[/]")
     credentials: dict[str, str] = {}
-    for f, raw in zip(fields, raw_fields):
-        inline_value = str(raw.get("value", "")).strip()
+    for f, raw in zip(fields, spec.fields):
+        inline_value = (raw.value or "").strip()
         if f.secret and inline_value:
             console.print(
                 f"        • [bold]{f.name:<14}[/] (secret — provided, stored securely)"
@@ -410,10 +475,10 @@ async def handle_add_custom_datasource(
         )
 
     # Prompt for any secret fields not provided inline
-    for f, raw in zip(fields, raw_fields):
+    for f, raw in zip(fields, spec.fields):
         if not f.secret:
             continue
-        if str(raw.get("value", "")).strip():
+        if (raw.value or "").strip():
             continue
         value = await prompt_or_cancel(f"(anton) {f.name}", password=True)
         if value is None:
@@ -422,7 +487,7 @@ async def handle_add_custom_datasource(
             credentials[f.name] = value
 
     # Prompt for any required non-secret fields not provided inline
-    for f, raw in zip(fields, raw_fields):
+    for f, raw in zip(fields, spec.fields):
         if f.secret:
             continue
         if not f.required:
@@ -436,7 +501,7 @@ async def handle_add_custom_datasource(
             credentials[f.name] = value
 
     # Offer to collect optional non-secret fields
-    for f, raw in zip(fields, raw_fields):
+    for f, raw in zip(fields, spec.fields):
         if f.secret or f.required or f.name in credentials:
             continue
         value = await prompt_or_cancel(f"(anton) {f.name} (optional — press Enter to skip)")
diff --git a/anton/commands/skills.py b/anton/commands/skills.py
new file mode 100644
index 00000000..292c92ef
--- /dev/null
+++ b/anton/commands/skills.py
@@ -0,0 +1,422 @@
+"""Slash-command handlers for the skills system.
+
+Commands:
+
+- `/skill save [optional name hint]` — LLM reads recent scratchpad work +
+  conversation history and drafts a Skill (label, name, when_to_use,
+  declarative procedure). Saved automatically; no interactive editing.
+- `/skills list` — show all saved skills with usage counters.
+- `/skill show <label>` — print a single skill's full procedure + stats.
+- `/skill remove <label>` — delete a skill from disk.
+
+Brain analogue: this is the experience-to-procedure consolidation step.
+The user explicitly marks a successful piece of work as "worth
+remembering as a procedure." The LLM does the synthesis (prefrontal
+cortex deciding what was structural vs. contextual), and the result
+gets written to long-term procedural memory. Future invocations
+retrieve via the recall_skill tool.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, Field
+from rich.console import Console
+from rich.markdown import Markdown
+from rich.table import Table
+
+from anton.core.memory.skills import (
+    Skill,
+    SkillStats,
+    SkillStore,
+    make_unique_label,
+    slugify,
+)
+
+if TYPE_CHECKING:
+    from anton.core.session import ChatSession
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# LLM-facing schema (Pydantic) — used by LLMClient.generate_object
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _SkillDraft(BaseModel):
+    """Structured output of the /skill save LLM call.
+
+    The LLM is forced to call a tool whose input matches this schema,
+    so the call site never has to parse JSON or strip fences.
+    """
+
+    label: str = Field(
+        ...,
+        description=(
+            "snake_case identifier for the skill. Short (2-4 words), "
+            "captures the essence. Examples: 'csv_summary', "
+            "'web_scraping', 'api_paginated_fetch'."
+        ),
+    )
+    name: str = Field(
+        ...,
+        description="Human-readable display name (e.g. 'CSV Summary').",
+    )
+    description: str = Field(
+        default="",
+        description="One-sentence description of what the skill does.",
+    )
+    when_to_use: str = Field(
+        ...,
+        description=(
+            "One sentence describing when this skill applies — what the "
+            "user has to ask for. This is the most important field — "
+            "it's what the classifier shows to the LLM next time. "
+            "Specific enough that the LLM can recognize matches "
+            "without being too narrow."
+        ),
+    )
+    declarative_md: str = Field(
+        ...,
+        description=(
+            "Step-by-step procedure as markdown. Numbered steps. Be "
+            "specific about decisions made (which library, why), "
+            "reference the actual approach taken — not generic advice. "
+            "A future agent will read this and follow it on a similar "
+            "but not identical task. Write as instructions for a "
+            "future agent, not a retrospective. Do not invent steps "
+            "that didn't happen."
+        ),
+    )
+
+
+_DRAFT_SYSTEM_PROMPT = (
+    "You are helping a user save a reusable procedure (a 'skill') based on "
+    "work they just completed. You will be given the recent scratchpad "
+    "execution history and the relevant conversation turns. Your job is to "
+    "synthesize them into a step-by-step procedure that a future agent (you) "
+    "can follow when faced with a similar task."
+)
+
+
+_DRAFT_USER_PROMPT_TEMPLATE = """\
+The user just ran the following command and wants to save the underlying procedure as a reusable skill.
+
+{name_hint_section}
+
+## Conversation context (most recent turns)
+
+{conversation}
+
+## Scratchpad execution history
+
+{scratchpad}
+
+---
+
+Synthesize this into a reusable skill following the schema you've been given.
+"""
+
+
+def _format_scratchpad_cells(cells: list) -> str:
+    """Render scratchpad cells as a compact text block for the LLM."""
+    if not cells:
+        return "(no scratchpad work in this session)"
+    chunks: list[str] = []
+    for i, cell in enumerate(cells, 1):
+        code = (getattr(cell, "code", "") or "").strip()
+        stdout = (getattr(cell, "stdout", "") or "").strip()
+        stderr = (getattr(cell, "stderr", "") or "").strip()
+        error = getattr(cell, "error", None)
+        chunks.append(f"### Cell {i}")
+        if code:
+            # Truncate very long cells to keep the prompt manageable
+            code_excerpt = code if len(code) <= 2000 else code[:2000] + "\n... [truncated]"
+            chunks.append("```python")
+            chunks.append(code_excerpt)
+            chunks.append("```")
+        if stdout:
+            stdout_excerpt = stdout if len(stdout) <= 800 else stdout[:800] + "\n... [truncated]"
+            chunks.append("stdout:")
+            chunks.append(stdout_excerpt)
+        if stderr:
+            chunks.append("stderr:")
+            chunks.append(stderr[:400])
+        if error:
+            chunks.append(f"error: {str(error)[:400]}")
+        chunks.append("")
+    return "\n".join(chunks)
+
+
+def _format_history_turns(history: list[dict], *, max_turns: int = 8) -> str:
+    """Render recent conversation history as plain text, skipping tool blocks."""
+    if not history:
+        return "(no conversation history yet)"
+    lines: list[str] = []
+    # Walk backwards collecting up to max_turns user/assistant turns with text
+    collected: list[tuple[str, str]] = []
+    for entry in reversed(history):
+        if not isinstance(entry, dict):
+            continue
+        role = entry.get("role", "")
+        content = entry.get("content", "")
+        text = ""
+        if isinstance(content, str):
+            text = content
+        elif isinstance(content, list):
+            # Extract text blocks from structured content
+            text_parts = []
+            for block in content:
+                if isinstance(block, dict) and block.get("type") == "text":
+                    text_parts.append(str(block.get("text", "")))
+            text = "\n".join(text_parts).strip()
+        text = text.strip()
+        if not text:
+            continue
+        if role not in ("user", "assistant"):
+            continue
+        collected.append((role, text))
+        if len(collected) >= max_turns:
+            break
+    if not collected:
+        return "(no readable conversation turns)"
+    # Reverse back to chronological
+    for role, text in reversed(collected):
+        excerpt = text if len(text) <= 1000 else text[:1000] + "\n... [truncated]"
+        lines.append(f"**{role}:** {excerpt}")
+        lines.append("")
+    return "\n".join(lines).strip()
+
+
+def _gather_session_scratchpad_cells(session: "ChatSession") -> list:
+    """Collect cells from every scratchpad in the session."""
+    pads = getattr(session._scratchpads, "_pads", {})
+    out: list = []
+    for pad in pads.values():
+        cells = getattr(pad, "cells", None) or []
+        out.extend(cells)
+    return out
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# /skill save
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+async def handle_skill_save(
+    console: Console,
+    session: "ChatSession",
+    *,
+    name_hint: str = "",
+    store: SkillStore | None = None,
+) -> None:
+    """Draft a skill from recent work and save it to the procedural memory store."""
+    store = store or getattr(session, "_skill_store", None) or SkillStore()
+
+    cells = _gather_session_scratchpad_cells(session)
+    history = getattr(session, "_history", []) or []
+
+    if not cells and not history:
+        console.print()
+        console.print(
+            "[anton.warning](anton)[/] Nothing to save yet — there's no scratchpad work "
+            "or conversation history in this session."
+        )
+        console.print()
+        return
+
+    name_hint_section = (
+        f"The user suggested the name: {name_hint!r}. "
+        "Use it as the basis for `name` and `label`, but you may refine the label "
+        "to be snake_case and short.\n"
+        if name_hint.strip()
+        else ""
+    )
+
+    user_prompt = _DRAFT_USER_PROMPT_TEMPLATE.format(
+        name_hint_section=name_hint_section,
+        conversation=_format_history_turns(history),
+        scratchpad=_format_scratchpad_cells(cells),
+    )
+
+    console.print()
+    console.print("[anton.cyan](anton)[/] Drafting a skill from recent work…")
+
+    try:
+        draft: _SkillDraft = await session._llm.generate_object(
+            _SkillDraft,
+            system=_DRAFT_SYSTEM_PROMPT,
+            messages=[{"role": "user", "content": user_prompt}],
+            max_tokens=1500,
+        )
+    except Exception as exc:
+        console.print()
+        console.print(
+            f"[anton.warning](anton)[/] Couldn't draft the skill: {exc}"
+        )
+        console.print()
+        return
+
+    raw_label = draft.label.strip() or slugify(name_hint or draft.name)
+    name = draft.name.strip() or raw_label.replace("_", " ").title()
+    description = draft.description.strip()
+    when_to_use = draft.when_to_use.strip()
+    declarative_md = draft.declarative_md.strip()
+
+    if not declarative_md:
+        console.print()
+        console.print(
+            "[anton.warning](anton)[/] The drafted skill has no procedure — refusing to save."
+        )
+        console.print()
+        return
+
+    label = make_unique_label(raw_label, store)
+
+    skill = Skill(
+        label=label,
+        name=name,
+        description=description,
+        when_to_use=when_to_use,
+        declarative_md=declarative_md,
+        created_at=datetime.now(timezone.utc).isoformat(),
+        provenance="manual",
+    )
+    path = store.save(skill)
+
+    console.print()
+    console.print(
+        f"[anton.success](anton)[/] Saved skill [bold]{label}[/bold] → {path}"
+    )
+    console.print(f"        [anton.muted]Name:[/] {name}")
+    if when_to_use:
+        console.print(f"        [anton.muted]When to use:[/] {when_to_use}")
+    console.print(
+        "        [anton.muted]Available next session — and via `recall_skill` this turn.[/]"
+    )
+    console.print()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# /skills list
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def handle_skills_list(console: Console, store: SkillStore | None = None) -> None:
+    store = store or SkillStore()
+    skills = store.list_all()
+    console.print()
+    if not skills:
+        console.print(
+            "[anton.muted]No skills saved yet. Use [bold]/skill save[/bold] "
+            "after a successful task to create one.[/]"
+        )
+        console.print()
+        return
+
+    table = Table(title="Procedural memory — saved skills", show_lines=False)
+    table.add_column("Label", style="bold")
+    table.add_column("Name")
+    table.add_column("When to use")
+    table.add_column("Recalls", justify="right")
+    table.add_column("Stages")
+
+    for s in skills:
+        stages = []
+        if s.stage_1_present:
+            stages.append("1")
+        if s.stage_2_present:
+            stages.append("2")
+        if s.stage_3_present:
+            stages.append("3")
+        when = s.when_to_use if len(s.when_to_use) <= 60 else s.when_to_use[:57] + "..."
+        table.add_row(
+            s.label,
+            s.name,
+            when,
+            str(s.stats.total_recalls),
+            ",".join(stages) or "-",
+        )
+
+    console.print(table)
+    console.print()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# /skill show
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def handle_skill_show(
+    console: Console, label: str, store: SkillStore | None = None
+) -> None:
+    store = store or SkillStore()
+    if not label:
+        console.print()
+        console.print("[anton.warning]Usage: /skill show <label>[/]")
+        console.print()
+        return
+    skill = store.load(label)
+    if skill is None:
+        closest = store.closest_match(label)
+        console.print()
+        if closest:
+            console.print(
+                f"[anton.warning]No skill '{label}'. Did you mean '{closest}'?[/]"
+            )
+        else:
+            console.print(f"[anton.warning]No skill named '{label}'.[/]")
+        console.print()
+        return
+
+    console.print()
+    console.print(f"[anton.cyan](anton)[/] [bold]{skill.name}[/]  ([dim]{skill.label}[/])")
+    if skill.description:
+        console.print(f"        {skill.description}")
+    if skill.when_to_use:
+        console.print(f"        [dim]when to use:[/] {skill.when_to_use}")
+    console.print()
+    console.print(
+        f"        [dim]recalls:[/] {skill.stats.total_recalls}  "
+        f"[dim]stage 1:[/] {skill.stats.stage_1.recommended}  "
+        f"[dim]stage 2:[/] {skill.stats.stage_2.recommended}  "
+        f"[dim]stage 3 used:[/] {skill.stats.stage_3.used}"
+    )
+    console.print()
+    console.print(Markdown(skill.declarative_md))
+    console.print()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# /skill remove
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def handle_skill_remove(
+    console: Console, label: str, store: SkillStore | None = None
+) -> None:
+    store = store or SkillStore()
+    if not label:
+        console.print()
+        console.print("[anton.warning]Usage: /skill remove <label>[/]")
+        console.print()
+        return
+    if store.delete(label):
+        console.print()
+        console.print(
+            f"[anton.success](anton)[/] Removed skill [bold]{label}[/bold]."
+        )
+        console.print()
+    else:
+        console.print()
+        console.print(f"[anton.warning]No skill named '{label}'.[/]")
+        console.print()
+
+
+__all__ = [
+    "handle_skill_save",
+    "handle_skill_show",
+    "handle_skill_remove",
+    "handle_skills_list",
+]
diff --git a/anton/connect_collector.py b/anton/connect_collector.py
index a7133752..d351c1a5 100644
--- a/anton/connect_collector.py
+++ b/anton/connect_collector.py
@@ -18,11 +18,11 @@
 
 from __future__ import annotations
 
-import json
-import re
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 
+from pydantic import BaseModel, Field
+
 from anton.datasource_registry import AuthMethod, DatasourceEngine, DatasourceField
 
 if TYPE_CHECKING:
@@ -138,11 +138,62 @@ def to_redirect_result(self) -> dict:
 
 _SYSTEM_PROMPT = (
     "You extract structured connection credentials from user messages. "
-    "You are helping fill out a form for a specific datasource. "
-    "Return ONLY valid JSON — no commentary, no markdown fences."
+    "You are helping fill out a form for a specific datasource."
 )
 
 
+# ─────────────────────────────────────────────────────────────────────────────
+# LLM-facing schema (Pydantic) — used by LLMClient.generate_object
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _ExtractionResult(BaseModel):
+    """Structured output of extract_variables.
+
+    The LLM is forced to call a tool whose input matches this schema,
+    so the call site never has to parse JSON, strip markdown fences,
+    or guard against non-dict responses.
+    """
+
+    variables: dict[str, str] = Field(
+        default_factory=dict,
+        description=(
+            "Mapping of canonical field name (snake_case) to extracted "
+            "value. Only include fields from the expected list above. "
+            "Recognize common aliases (hostname→host, pwd→password, "
+            "db→database, username→user) and map to the canonical name. "
+            "If the user pasted a connection string (e.g. "
+            "postgres://u:p@host:5432/db), extract host/port/user/"
+            "password/database from it. If the user just provided a "
+            "plain value for one field without naming it (e.g. typed "
+            "'localhost' when asked for host), leave this empty — the "
+            "caller will treat the raw text as the next field's value. "
+            "Never invent values."
+        ),
+    )
+    is_redirect: bool = Field(
+        default=False,
+        description=(
+            "True ONLY if the user is clearly trying to cancel or switch "
+            "to a DIFFERENT datasource (e.g. \"actually it's mysql\", "
+            "\"never mind\", \"cancel\"). Providing credentials is NOT "
+            "a redirect."
+        ),
+    )
+    redirect_engine: str = Field(
+        default="",
+        description=(
+            "If the user mentioned a different datasource by name (from "
+            "the 'other known slugs' list), set this to that slug. "
+            "Otherwise empty string."
+        ),
+    )
+    redirect_reason: str = Field(
+        default="",
+        description="Short phrase describing the redirect, or empty string.",
+    )
+
+
 async def extract_variables(
     raw_input: str,
     *,
@@ -163,9 +214,12 @@ async def extract_variables(
     Trusts the LLM to handle aliases (hostname→host, pwd→password),
     connection strings (postgres://user:pass@host:5432/db), natural
     language ("my host is db.example.com"), and free-form redirect
-    phrasing ("actually let's do mysql instead"). Falls back to an empty
-    result on any parse error — callers should treat an empty result as
-    "treat the raw input as the next field's value".
+    phrasing ("actually let's do mysql instead").
+
+    Uses `LLMClient.generate_object` for forced-schema structured
+    output — no manual JSON parsing or fence stripping. Falls back to
+    an empty result on any LLM/validation error so the caller can
+    treat the raw input as the next field's value.
     """
     result = ExtractedData()
     text = (raw_input or "").strip()
@@ -185,71 +239,32 @@ async def extract_variables(
         f"Other known datasource slugs: {other_engines}\n\n"
         f"The user was asked to provide credentials and wrote:\n"
         f"{text!r}\n\n"
-        "Return ONLY valid JSON with this exact shape:\n"
-        '{\n'
-        '  "variables": {"<field_name>": "<value>", ...},\n'
-        '  "is_redirect": true or false,\n'
-        '  "redirect_engine": "<slug or empty string>",\n'
-        '  "redirect_reason": "<short phrase or empty string>"\n'
-        '}\n\n'
-        "Rules:\n"
-        "- Only include fields from the expected list above. Use the exact "
-        "field names (snake_case).\n"
-        "- Recognize common aliases (hostname→host, pwd→password, "
-        "db→database, username→user, etc.) and map to the canonical name.\n"
-        "- If the user pasted a connection string (e.g. "
-        "postgres://u:p@host:5432/db), extract host/port/user/password/"
-        "database from it.\n"
-        "- Set `is_redirect` to true ONLY if the user is clearly trying to "
-        "cancel or switch to a DIFFERENT datasource (e.g. \"actually it's "
-        "mysql\", \"never mind\", \"cancel\"). Providing credentials is NOT "
-        "a redirect.\n"
-        "- If they mention a different datasource by name (from the other "
-        "known slugs list), set `redirect_engine` to that slug.\n"
-        "- If the user just provided a plain value for one field (e.g. "
-        "typed \"localhost\" when asked for host), and did NOT mention a "
-        "field name, leave `variables` empty — the caller will treat the "
-        "raw text as the next field's value.\n"
-        "- Never invent values. Only extract what the user explicitly wrote."
+        "Extract any credential values, then determine whether the user is "
+        "trying to redirect to a different datasource."
     )
 
     try:
-        response = await session._llm.plan(
+        extraction: _ExtractionResult = await session._llm.generate_object(
+            _ExtractionResult,
             system=_SYSTEM_PROMPT,
             messages=[{"role": "user", "content": user_prompt}],
             max_tokens=512,
         )
-        content = (response.content or "").strip()
-        # Strip optional markdown fences, same pattern as
-        # handle_add_custom_datasource().
-        content = re.sub(
-            r"^```[^\n]*\n|```\s*$", "", content, flags=re.MULTILINE
-        ).strip()
-        data = json.loads(content)
     except Exception:
         return result
 
-    if not isinstance(data, dict):
-        return result
-
-    raw_vars = data.get("variables") or {}
-    if isinstance(raw_vars, dict):
-        valid_names = {f.name for f in expected_fields}
-        for k, v in raw_vars.items():
-            if not isinstance(v, (str, int, float)):
-                continue
-            key = str(k).strip()
-            if key in valid_names:
-                value = str(v).strip()
-                if value:
-                    result.variables[key] = value
-
-    result.is_redirect = bool(data.get("is_redirect"))
-    redirect_engine = data.get("redirect_engine")
-    if isinstance(redirect_engine, str) and redirect_engine.strip():
-        result.redirect_engine = redirect_engine.strip()
-    redirect_reason = data.get("redirect_reason")
-    if isinstance(redirect_reason, str):
-        result.redirect_reason = redirect_reason.strip()
+    # Filter and normalize variables — only keep keys that match the
+    # expected field list (the LLM might hallucinate field names).
+    valid_names = {f.name for f in expected_fields}
+    for k, v in extraction.variables.items():
+        key = str(k).strip()
+        value = str(v).strip()
+        if key in valid_names and value:
+            result.variables[key] = value
+
+    result.is_redirect = extraction.is_redirect
+    if extraction.redirect_engine.strip():
+        result.redirect_engine = extraction.redirect_engine.strip()
+    result.redirect_reason = extraction.redirect_reason.strip()
 
     return result
diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
index 447cd2fd..255d82ad 100644
--- a/anton/core/backends/scratchpad_boot.py
+++ b/anton/core/backends/scratchpad_boot.py
@@ -135,6 +135,12 @@ def generate_object(
                 Uses tool_choice to force the LLM to return structured data.
                 Supports single models and list[Model].
 
+                The schema-building and unwrapping logic is shared with
+                `LLMClient.generate_object` (in the main process) via
+                `anton.core.llm.structured` — only the actual provider
+                call differs between the two runtime contexts (sync
+                subprocess here, async planning there).
+
                 Args:
                     schema_class: A Pydantic BaseModel subclass, or list[Model].
                     system: System prompt.
@@ -144,49 +150,29 @@ def generate_object(
                 Returns:
                     An instance of schema_class (or a list of instances).
                 """
-                from pydantic import BaseModel as _BaseModel
+                from anton.core.llm.structured import (
+                    build_structured_tool,
+                    unwrap_structured_response,
+                )
 
-                is_list = (
-                    hasattr(schema_class, "__origin__")
-                    and schema_class.__origin__ is list
+                tool, validator_class, is_list = build_structured_tool(
+                    schema_class
                 )
-                if is_list:
-                    inner_class = schema_class.__args__[0]
-
-                    class _ArrayWrapper(_BaseModel):
-                        items: list[inner_class]
-
-                    schema = _ArrayWrapper.model_json_schema()
-                    tool_name = f"{inner_class.__name__}_array"
-                else:
-                    schema = schema_class.model_json_schema()
-                    tool_name = schema_class.__name__
-
-                tool = {
-                    "name": tool_name,
-                    "description": f"Generate structured output matching the {tool_name} schema.",
-                    "input_schema": schema,
-                }
 
                 response = self.complete(
                     system=system,
                     messages=messages,
                     tools=[tool],
-                    tool_choice={"type": "tool", "name": tool_name},
+                    tool_choice={"type": "tool", "name": tool["name"]},
                     max_tokens=max_tokens,
                 )
 
                 if not response.tool_calls:
                     raise ValueError("LLM did not return structured output.")
 
-                import json as _json
-
-                raw = response.tool_calls[0].input
-
-                if is_list:
-                    wrapper = _ArrayWrapper.model_validate(raw)
-                    return wrapper.items
-                return schema_class.model_validate(raw)
+                return unwrap_structured_response(
+                    response.tool_calls[0].input, validator_class, is_list
+                )
 
         _scratchpad_llm_instance = _ScratchpadLLM()
 
diff --git a/anton/core/llm/client.py b/anton/core/llm/client.py
index df368ffb..abce3620 100644
--- a/anton/core/llm/client.py
+++ b/anton/core/llm/client.py
@@ -84,6 +84,135 @@ async def code(
             max_tokens=max_tokens or self._max_tokens,
         )
 
+    async def _generate_object_with(
+        self,
+        schema_class,
+        *,
+        provider: LLMProvider,
+        model: str,
+        system: str,
+        messages: list[dict],
+        max_tokens: int | None,
+    ):
+        """Internal: forced-tool-call structured output via any provider.
+
+        Shared by `generate_object` (planning) and `generate_object_code`
+        (coding). The schema-building/unwrapping logic is in
+        `anton.core.llm.structured` so the scratchpad bridge can use the
+        same primitives without depending on this class.
+        """
+        from anton.core.llm.structured import (
+            build_structured_tool,
+            unwrap_structured_response,
+        )
+
+        tool, validator_class, is_list = build_structured_tool(schema_class)
+
+        response = await provider.complete(
+            model=model,
+            system=system,
+            messages=messages,
+            tools=[tool],
+            tool_choice={"type": "tool", "name": tool["name"]},
+            max_tokens=max_tokens or self._max_tokens,
+        )
+
+        if not response.tool_calls:
+            raise ValueError(
+                f"LLM did not return a tool call for forced schema {tool['name']}."
+            )
+
+        return unwrap_structured_response(
+            response.tool_calls[0].input, validator_class, is_list
+        )
+
+    async def generate_object(
+        self,
+        schema_class,
+        *,
+        system: str,
+        messages: list[dict],
+        max_tokens: int | None = None,
+    ):
+        """Generate a structured object using the *planning* provider.
+
+        Forces the planning LLM to call a synthetic tool whose
+        input_schema is derived from the Pydantic model. The tool's
+        input is then validated through `model_validate`, returning a
+        typed instance (or a list of instances for `list[Model]`).
+
+        This is the right primitive for any code that wants structured
+        output from the LLM. It is more reliable than asking for JSON
+        in the response text because:
+
+          - The LLM is *forced* (via `tool_choice`) to call the tool
+          - The tool's input is constrained by the JSON schema
+          - Pydantic catches any structural drift via `model_validate`
+
+        Use this method for any structured-output operation that
+        currently uses `plan()`. For operations that should use the
+        cheaper coding model (memory compaction, identity extraction,
+        anything that ran via `code()` previously), use
+        `generate_object_code()` instead.
+
+        Args:
+            schema_class: A Pydantic `BaseModel` subclass, or a
+                `list[Model]` annotation for a homogeneous list.
+            system: System prompt for the call.
+            messages: Conversation messages.
+            max_tokens: Token budget. Defaults to `self._max_tokens`.
+
+        Returns:
+            An instance of `schema_class`, or a `list[Model]` when the
+            input was a list annotation.
+
+        Raises:
+            ValueError: If the LLM fails to produce a tool call (rare —
+                forced tool_choice usually prevents this).
+            pydantic.ValidationError: If the tool's input doesn't match
+                the schema.
+
+        The schema-building / unwrapping logic is shared with
+        `_ScratchpadLLM.generate_object` (in `scratchpad_boot.py`) via
+        `anton.core.llm.structured` — only the actual provider call
+        differs between the two runtime contexts (async planning here,
+        sync subprocess there).
+        """
+        return await self._generate_object_with(
+            schema_class,
+            provider=self._planning_provider,
+            model=self._planning_model,
+            system=system,
+            messages=messages,
+            max_tokens=max_tokens,
+        )
+
+    async def generate_object_code(
+        self,
+        schema_class,
+        *,
+        system: str,
+        messages: list[dict],
+        max_tokens: int | None = None,
+    ):
+        """Generate a structured object using the *coding* provider.
+
+        Same forced-tool-call mechanism as `generate_object`, but routes
+        through the coding provider/model. Use this when the operation
+        is a fast, cheap structured task that previously called
+        `code()` — e.g. memory compaction, identity extraction,
+        scratchpad post-mortem analysis. The savings vs. the planning
+        model add up across many small calls.
+        """
+        return await self._generate_object_with(
+            schema_class,
+            provider=self._coding_provider,
+            model=self._coding_model,
+            system=system,
+            messages=messages,
+            max_tokens=max_tokens,
+        )
+
     @classmethod
     def from_settings(cls, settings: AntonSettings) -> LLMClient:
         from .anthropic import AnthropicProvider
diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index 960a8b61..93d179cb 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -11,6 +11,7 @@
 )
 
 if TYPE_CHECKING:
+    from anton.core.memory.skills import SkillStore
     from anton.core.tools.tool_defs import ToolDef
 
 
@@ -42,6 +43,51 @@ def _build_tool_prompts_section(self, tool_defs: list["ToolDef"] | None) -> str:
 
         return "\n\n".join(chunks)
 
+    def _build_procedural_memory_section(
+        self, skill_store: "SkillStore | None"
+    ) -> str:
+        """Build the '## Procedural memory' section listing available skills.
+
+        Lists each skill as `- label: when_to_use` (one line) plus a short
+        instruction telling the LLM to call `recall_skill(label)` to load
+        the full procedure. Returns an empty string if no store is wired
+        or no skills are saved — the caller skips the section entirely.
+        """
+        if skill_store is None:
+            return ""
+        try:
+            summaries = skill_store.list_summaries()
+        except Exception:
+            return ""
+        if not summaries:
+            return ""
+
+        lines: list[str] = [
+            "",
+            "",
+            "## Procedural memory (skills available)",
+            "",
+            (
+                "These are reusable procedures you've previously refined for "
+                "recurring tasks. When the user's request matches one of "
+                "them, call `recall_skill(label)` to load the full step-by-"
+                "step procedure into your context. You may recall multiple "
+                "skills if the task spans several. If none apply, proceed "
+                "with normal reasoning."
+            ),
+            "",
+        ]
+        for s in summaries:
+            label = s.get("label", "")
+            when = s.get("when_to_use", "").strip()
+            if not label:
+                continue
+            if when:
+                lines.append(f"- `{label}` — {when}")
+            else:
+                lines.append(f"- `{label}`")
+        return "\n".join(lines)
+
     def _build_visualizations_section(
         self,
         *,
@@ -71,6 +117,7 @@ def build(
         project_context: str = "",
         self_awareness_context: str = "",
         datasource_context: str = "",
+        skill_store: "SkillStore | None" = None,
     ) -> str:
         output_path = f"{Path(str(output_dir)).as_posix().rstrip('/')}/"
 
@@ -98,6 +145,10 @@ def build(
         if datasource_context:
             prompt += datasource_context
 
+        procedural_memory = self._build_procedural_memory_section(skill_store)
+        if procedural_memory:
+            prompt += procedural_memory
+
         return prompt
 
 
diff --git a/anton/core/llm/structured.py b/anton/core/llm/structured.py
new file mode 100644
index 00000000..02372c8e
--- /dev/null
+++ b/anton/core/llm/structured.py
@@ -0,0 +1,128 @@
+"""Shared schema-building / response-unwrapping for structured LLM output.
+
+Two pure helper functions that turn a Pydantic model (or `list[Model]`)
+into the inputs needed for a forced tool-call, and validate the LLM's
+response back into a typed Python instance.
+
+Used by:
+
+  - `LLMClient.generate_object` — async, planning-LLM, main process
+  - `_ScratchpadLLM.generate_object` — sync, scratchpad subprocess bridge
+
+The two call sites differ only in *how* they invoke the provider (async
+vs sync, different model/credential resolution). The schema-derivation
+and Pydantic validation logic is identical and lives here exactly once.
+
+Why a separate module
+=====================
+
+The original implementation was duplicated across `client.py` (added
+for the cerebellum) and `scratchpad_boot.py` (the existing scratchpad
+bridge). The two halves can't share a class because they live in
+different runtime contexts (main process async vs subprocess sync), but
+they CAN share pure helper functions — which is what this module
+provides. Importing this module from either side is cheap and safe;
+the subprocess already imports from `anton.core.*` at boot.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+
+def build_structured_tool(schema_class) -> tuple[dict, type, bool]:
+    """Build a forced tool-call definition from a Pydantic schema.
+
+    Args:
+        schema_class: A Pydantic ``BaseModel`` subclass, OR a
+            ``list[Model]`` annotation for a homogeneous list. The
+            list-of-model case is supported by wrapping the inner
+            type in a synthetic ``_ArrayWrapper`` model with an
+            ``items`` field — many providers refuse top-level
+            arrays in tool input schemas, so the wrapper is required.
+
+    Returns:
+        A 3-tuple of:
+
+        - **tool_dict**: ready to pass as ``tools=[tool_dict]`` to
+          ``provider.complete()``. The caller should also pass
+          ``tool_choice={"type": "tool", "name": tool_dict["name"]}``
+          to force the LLM to call this specific tool.
+        - **validator_class**: the Pydantic class to call
+          ``model_validate()`` on (the wrapper for the list case,
+          the original class otherwise).
+        - **is_list**: True iff the original input was a ``list[Model]``
+          annotation. The caller uses this to decide whether to unwrap
+          the wrapper's ``items`` field after validation.
+
+    Note:
+        Pydantic is imported lazily so this module can be imported
+        without forcing pydantic to be available at import time. The
+        only operations on this module that REQUIRE pydantic are the
+        actual function calls — at which point any caller doing
+        structured output already needs pydantic anyway.
+    """
+    from pydantic import BaseModel
+
+    is_list = (
+        hasattr(schema_class, "__origin__")
+        and schema_class.__origin__ is list
+    )
+    if is_list:
+        inner_class = schema_class.__args__[0]
+
+        class _ArrayWrapper(BaseModel):
+            items: list[inner_class]  # type: ignore[valid-type]
+
+        schema = _ArrayWrapper.model_json_schema()
+        tool_name = f"{inner_class.__name__}_array"
+        validator_class: type = _ArrayWrapper
+    else:
+        schema = schema_class.model_json_schema()
+        tool_name = schema_class.__name__
+        validator_class = schema_class
+
+    tool = {
+        "name": tool_name,
+        "description": (
+            f"Generate structured output matching the {tool_name} schema."
+        ),
+        "input_schema": schema,
+    }
+    return tool, validator_class, is_list
+
+
+def unwrap_structured_response(
+    tool_call_input: dict[str, Any],
+    validator_class: type,
+    is_list: bool,
+):
+    """Validate a forced tool-call's input via Pydantic and unwrap.
+
+    Args:
+        tool_call_input: The ``.input`` dict from the LLM's ``ToolCall``.
+        validator_class: The validator class returned from
+            ``build_structured_tool``.
+        is_list: The ``is_list`` flag returned from
+            ``build_structured_tool``.
+
+    Returns:
+        A validated Pydantic instance, or a ``list[Model]`` if the
+        original schema was a list annotation.
+
+    Raises:
+        pydantic.ValidationError: If ``tool_call_input`` doesn't match
+            the schema. With forced tool_choice this is rare — the
+            provider usually rejects misshapen tool calls server-side
+            — but the validation step is the safety net.
+    """
+    validated = validator_class.model_validate(tool_call_input)
+    if is_list:
+        return validated.items  # type: ignore[attr-defined]
+    return validated
+
+
+__all__ = [
+    "build_structured_tool",
+    "unwrap_structured_response",
+]
diff --git a/anton/core/memory/cerebellum.py b/anton/core/memory/cerebellum.py
new file mode 100644
index 00000000..c2d6bd03
--- /dev/null
+++ b/anton/core/memory/cerebellum.py
@@ -0,0 +1,385 @@
+"""Cerebellum — supervised error learning over scratchpad execution.
+
+Brain analogue
+==============
+
+The cerebellum's classical role is *forward modeling and error correction*:
+when a motor command is issued, the cerebellum predicts the expected
+sensory consequences. When actual feedback arrives, it computes the
+prediction error and uses it to refine future commands. The learning
+rule is supervised — there's an explicit "teacher" (the actual outcome)
+against which the prediction is compared.
+
+Anton's analogue
+================
+
+For Anton, the "motor command" is a scratchpad cell. Before the cell
+runs, the LLM declares its intent via the `one_line_description` field
+on the scratchpad tool. That description IS the forward model — the
+prediction of what the cell should do. After the cell runs, we have
+its actual outcome (stdout, stderr, error). The cerebellum compares
+the two and, when they diverge meaningfully, encodes a correction
+note that future code-generating LLM calls will see.
+
+Storage and retrieval
+=====================
+
+The cerebellum is a *producer* only. It does not own its own storage.
+Corrections are encoded as `Engram` objects with `kind="lesson"` and
+`topic="scratchpad"` via `Cortex.encode()`, the same path that manual
+lessons and the consolidator already use. They flow back into future
+prompts via the existing `Cortex.get_scratchpad_context()` →
+`recall_scratchpad_wisdom()` → scratchpad tool description injection
+chain. We add nothing to the storage layer — the cerebellum just
+generates new entries for the existing pipe.
+
+Decoupling
+==========
+
+This module knows nothing about scratchpad runtimes. It exposes two
+async hook methods (`on_pre_execute`, `on_post_execute`) that take a
+`Cell` object. Whoever orchestrates execution (today: the
+`handle_scratchpad` dispatcher) is responsible for calling them at the
+right moments. The runtime backends — local, future remote, future
+Docker — are completely hook-agnostic. See discussion in v2.2 design.
+
+Diff frequency
+==============
+
+The cerebellum batches its observations *per turn*, not per cell. Each
+post-execute hook only buffers the cell; the actual diff/encoding work
+runs at end-of-turn (triggered explicitly by the session, or by the
+next pre-execute call from a different turn — whichever comes first).
+This keeps cost predictable: at most one extra LLM call per
+scratchpad-using turn, regardless of how many cells the LLM ran.
+
+Cheap path
+==========
+
+Cells that complete cleanly (no error, empty stderr) contribute zero
+LLM-call cost — they're never sent to the diff function. Only cells
+that errored or warned trigger the post-turn diff.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, Field
+
+from anton.core.backends.base import Cell
+from anton.core.memory.hippocampus import Engram
+
+if TYPE_CHECKING:
+    from anton.core.llm.client import LLMClient
+    from anton.core.memory.cortex import Cortex
+
+
+_log = logging.getLogger(__name__)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# LLM-facing schema (Pydantic) — used by LLMClient.generate_object
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _LessonDraft(BaseModel):
+    """One generalizable lesson the diff pass extracted from a cell."""
+
+    text: str = Field(
+        ...,
+        description=(
+            "One sentence (ideally < 25 words) the next code-writing agent "
+            "should know to avoid the same trap. Specific and actionable. "
+            "Generalizable across files/projects, not session-specific."
+        ),
+    )
+    topic: str = Field(
+        default="scratchpad",
+        description="Topic tag for retrieval. Default 'scratchpad'.",
+    )
+
+
+class _DiffPassResult(BaseModel):
+    """Wrapper schema returned by the cerebellum's diff LLM call.
+
+    The LLM is *forced* to call a tool whose input matches this schema,
+    so we never have to parse text JSON or strip markdown fences.
+    """
+
+    lessons: list[_LessonDraft] = Field(
+        default_factory=list,
+        description=(
+            "Generalizable lessons extracted from the batch. Empty list if "
+            "the cells either succeeded cleanly or didn't reveal any "
+            "reusable insight. Prefer fewer, broader lessons over many "
+            "narrow ones."
+        ),
+    )
+
+
+_DIFF_SYSTEM_PROMPT = (
+    "You are a code-execution post-mortem analyst. You receive a batch of "
+    "scratchpad cells the agent just ran in one turn — for each cell, the "
+    "agent's stated intent and what actually happened. Your job is to "
+    "extract any *generalizable* lessons that future code-writing agents "
+    "should know to avoid the same trap."
+)
+
+
+_DIFF_USER_TEMPLATE = """\
+The agent just executed the following scratchpad cells in a single turn. For each cell, you see:
+- The agent's stated intent (the `one_line_description` field on the tool call)
+- The actual result (stdout, stderr, error)
+
+Identify any *generalizable* lessons from these executions that a future code-writing agent should know to avoid the same trap. Be specific and actionable. Skip cells where everything went fine — only report lessons from genuine divergences.
+
+Cells:
+
+{cells_block}
+
+Rules:
+- Only report a lesson if the cell genuinely failed or diverged from its stated intent. If everything went fine, return an empty list.
+- Lessons should be generalizable, not session-specific. "Use low_memory=False with mixed-dtype CSVs" is good. "The file sales_q3.csv had a problem on line 12000" is bad.
+- Prefer fewer, more reusable lessons over many narrow ones.
+- Do NOT invent lessons that aren't supported by the cell evidence.
+- Each lesson should be one sentence, ideally under 25 words.
+"""
+
+
+def _format_cell_for_diff(cell: Cell, index: int) -> str:
+    """Render a single cell as a compact section for the diff prompt."""
+    lines: list[str] = [f"### Cell {index}"]
+    intent = (cell.description or "").strip() or "(no description provided)"
+    lines.append(f"**Intent:** {intent}")
+
+    code = (cell.code or "").strip()
+    if code:
+        snippet = code if len(code) <= 800 else code[:800] + "\n... [truncated]"
+        lines.append("**Code:**")
+        lines.append("```python")
+        lines.append(snippet)
+        lines.append("```")
+
+    stdout = (cell.stdout or "").strip()
+    if stdout:
+        excerpt = stdout if len(stdout) <= 500 else stdout[:500] + "\n... [truncated]"
+        lines.append(f"**stdout:** {excerpt}")
+
+    stderr = (cell.stderr or "").strip()
+    if stderr:
+        excerpt = stderr if len(stderr) <= 400 else stderr[:400] + "\n... [truncated]"
+        lines.append(f"**stderr:** {excerpt}")
+
+    error = (cell.error or "").strip() if cell.error else ""
+    if error:
+        excerpt = error if len(error) <= 400 else error[:400] + "\n... [truncated]"
+        lines.append(f"**error:** {excerpt}")
+
+    if not stdout and not stderr and not error:
+        lines.append("(no output produced)")
+
+    return "\n".join(lines)
+
+
+@dataclass
+class CerebellumLesson:
+    """A single correction extracted by the diff pass.
+
+    The dataclass form is the public type the cerebellum exposes to the
+    rest of Anton (Cell, Engram, etc. all use dataclasses). Internally,
+    the diff LLM call uses the Pydantic `_LessonDraft` schema for forced
+    structured output; we convert at the boundary.
+    """
+
+    text: str
+    topic: str = "scratchpad"
+
+
+class Cerebellum:
+    """Forward-model + error-driven learner over scratchpad cells.
+
+    Usage:
+
+        cb = Cerebellum(cortex=session._cortex, llm=session._llm)
+        # Wired into the dispatcher as a scratchpad observer:
+        await cb.on_pre_execute(prelim_cell)
+        await cb.on_post_execute(final_cell)
+        # At end of turn (or before the next pre-execute from a different turn):
+        await cb.flush()
+
+    Cells are buffered until `flush()` is called. The diff pass runs only
+    on the buffered cells that errored or warned — clean cells contribute
+    nothing. Lessons are encoded via `cortex.encode()` and flow into
+    future prompts through the existing wisdom-injection pipeline.
+    """
+
+    def __init__(
+        self,
+        *,
+        cortex: "Cortex | None",
+        llm: "LLMClient | None",
+        max_lessons_per_flush: int = 3,
+    ) -> None:
+        self._cortex = cortex
+        self._llm = llm
+        self._max_lessons = max_lessons_per_flush
+        # Cells observed since the last flush. Indexed by insertion order.
+        self._buffered: list[Cell] = []
+        # Optional intent capture from pre-execute. Today we just trust the
+        # cell that arrives at post_execute, but pre_execute is also a
+        # natural place to clear stale buffers from prior cancelled turns.
+        self._pre_count: int = 0
+
+    # ── observer hooks ─────────────────────────────────────────────
+
+    async def on_pre_execute(self, cell: Cell) -> None:
+        """Called by the dispatcher right before a cell is sent to the runtime.
+
+        The cell here is preliminary — code + description are populated,
+        but stdout/stderr/error are empty. We don't currently use the
+        prelim cell directly (the LLM's intent is already captured in
+        cell.description, which post_execute will see again), but the
+        hook exists so future expansions can do real forward-model work.
+        """
+        self._pre_count += 1
+
+    async def on_post_execute(self, cell: Cell) -> None:
+        """Called by the dispatcher right after a cell finishes executing.
+
+        We buffer the cell here. The actual diff/encode work runs in
+        flush() — see class docstring for the batching rationale.
+        """
+        # Cheap path: clean cells never need diffing
+        if self._is_clean(cell):
+            return
+        self._buffered.append(cell)
+
+    async def flush(self) -> list[CerebellumLesson]:
+        """Run the batched diff pass and encode any extracted lessons.
+
+        Should be called at end-of-turn. Safe to call when the buffer is
+        empty (returns []). Safe to call multiple times (idempotent — the
+        buffer is cleared each time).
+
+        Returns the lessons that were encoded, mostly for testing /
+        observability. Production code typically ignores the return value.
+        """
+        if not self._buffered:
+            return []
+        if self._cortex is None or self._llm is None:
+            # Best-effort: silently no-op if memory infrastructure is missing
+            self._buffered.clear()
+            return []
+
+        cells = self._buffered[:]
+        self._buffered.clear()
+
+        try:
+            lessons = await self._run_diff(cells)
+        except Exception as exc:
+            _log.warning("cerebellum diff pass failed: %s", exc)
+            return []
+
+        if not lessons:
+            return []
+
+        try:
+            await self._encode_lessons(lessons)
+        except Exception as exc:
+            _log.warning("cerebellum failed to encode lessons: %s", exc)
+
+        return lessons
+
+    def reset(self) -> None:
+        """Drop the current buffer without encoding anything.
+
+        Used when a turn is cancelled mid-flight — we don't want to
+        encode lessons from a turn the user backed out of.
+        """
+        self._buffered.clear()
+        self._pre_count = 0
+
+    @property
+    def buffered_count(self) -> int:
+        """Number of cells currently waiting for the next flush."""
+        return len(self._buffered)
+
+    # ── internals ──────────────────────────────────────────────────
+
+    @staticmethod
+    def _is_clean(cell: Cell) -> bool:
+        """A cell is 'clean' if it produced no error and no stderr text.
+
+        Clean cells contribute zero diff cost — they're skipped before
+        the LLM call ever happens. This is the cheap path that keeps
+        the cerebellum's overhead near zero on a happy-path turn.
+        """
+        if cell.error:
+            return False
+        stderr = (cell.stderr or "").strip()
+        if stderr:
+            return False
+        return True
+
+    async def _run_diff(self, cells: list[Cell]) -> list[CerebellumLesson]:
+        """Send the buffered cells to the LLM and return validated lessons.
+
+        Uses `LLMClient.generate_object_code` (the cheap/fast coding
+        provider) to force structured output via a forced tool call
+        whose schema is the `_DiffPassResult` Pydantic model. There's
+        no manual JSON parsing, no markdown fence stripping, and no
+        try/except around `json.loads` — Pydantic and the forced
+        tool_choice eliminate those failure modes entirely. If the
+        LLM round-trip itself fails (network, validation), the
+        caller's try/except in `flush()` swallows it.
+
+        We use the *coding* provider (not planning) because this is a
+        fast post-mortem on cell output — exactly the kind of cheap
+        structured task the coding model is sized for.
+        """
+        cells_block = "\n\n".join(
+            _format_cell_for_diff(c, i + 1) for i, c in enumerate(cells)
+        )
+        prompt = _DIFF_USER_TEMPLATE.format(cells_block=cells_block)
+
+        result: _DiffPassResult = await self._llm.generate_object_code(
+            _DiffPassResult,
+            system=_DIFF_SYSTEM_PROMPT,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=600,
+        )
+
+        # Convert the validated Pydantic drafts to the public dataclass
+        # form. Cap at max_lessons_per_flush to bound the encoding cost
+        # of any single flush.
+        out: list[CerebellumLesson] = []
+        for draft in result.lessons[: self._max_lessons]:
+            text = (draft.text or "").strip()
+            if not text:
+                continue
+            topic = (draft.topic or "scratchpad").strip() or "scratchpad"
+            out.append(CerebellumLesson(text=text, topic=topic))
+        return out
+
+    async def _encode_lessons(self, lessons: list[CerebellumLesson]) -> None:
+        """Hand the lessons to Cortex for storage in the existing pipeline."""
+        engrams = [
+            Engram(
+                text=lesson.text,
+                kind="lesson",
+                scope="project",
+                confidence="medium",
+                topic=lesson.topic,
+                source="consolidation",
+            )
+            for lesson in lessons
+        ]
+        if not engrams:
+            return
+        await self._cortex.encode(engrams)
+
+
+__all__ = ["Cerebellum", "CerebellumLesson"]
diff --git a/anton/core/memory/consolidator.py b/anton/core/memory/consolidator.py
index ae07ebd7..91f226d0 100644
--- a/anton/core/memory/consolidator.py
+++ b/anton/core/memory/consolidator.py
@@ -22,8 +22,9 @@
 
 from __future__ import annotations
 
-import json
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
+
+from pydantic import BaseModel, Field
 
 from anton.core.llm.prompts import CONSOLIDATION_PROMPT
 from anton.core.memory.hippocampus import Engram
@@ -33,6 +34,60 @@
     from anton.core.backends.base import Cell
 
 
+# ─────────────────────────────────────────────────────────────────────────────
+# LLM-facing schema (Pydantic) — used by LLMClient.generate_object_code
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _ConsolidatedLesson(BaseModel):
+    """One engram extracted from a scratchpad replay."""
+
+    text: str = Field(
+        ...,
+        description=(
+            "The lesson itself — what a future agent should know to do "
+            "this kind of task better. Concrete and actionable."
+        ),
+    )
+    kind: Literal["always", "never", "when", "lesson"] = Field(
+        default="lesson",
+        description=(
+            "Engram type. 'always'/'never' = behavioral rules, "
+            "'when' = conditional rule, 'lesson' = semantic fact."
+        ),
+    )
+    scope: Literal["global", "project"] = Field(
+        default="project",
+        description=(
+            "'global' = applies across all projects, 'project' = "
+            "specific to this codebase. Default project."
+        ),
+    )
+    confidence: Literal["high", "medium", "low"] = Field(
+        default="medium",
+        description=(
+            "How confident you are this lesson generalizes. 'high' "
+            "auto-encodes; 'medium'/'low' may require user confirmation."
+        ),
+    )
+    topic: str = Field(
+        default="",
+        description="Optional topic tag for retrieval grouping.",
+    )
+
+
+class _ConsolidatedLessons(BaseModel):
+    """Wrapper for the list of lessons returned by the consolidator."""
+
+    items: list[_ConsolidatedLesson] = Field(
+        default_factory=list,
+        description=(
+            "Lessons extracted from the scratchpad replay. Empty list "
+            "if nothing worth remembering. Cap at ~5 — be selective."
+        ),
+    )
+
+
 class Consolidator:
     """Extracts durable lessons from scratchpad sessions via offline replay.
 
@@ -110,51 +165,27 @@ async def replay_and_extract(
         session_summary = "\n".join(summary_lines)
 
         try:
-            response = await llm_client.code(
+            result: _ConsolidatedLessons = await llm_client.generate_object_code(
+                _ConsolidatedLessons,
                 system=CONSOLIDATION_PROMPT,
                 messages=[{"role": "user", "content": session_summary}],
                 max_tokens=2048,
             )
-
-            raw = response.content.strip()
-            # Handle markdown code fences
-            if raw.startswith("```"):
-                raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:]
-                if raw.endswith("```"):
-                    raw = raw[:-3]
-                raw = raw.strip()
-
-            items = json.loads(raw)
-            if not isinstance(items, list):
-                return []
-
         except Exception:
             return []
 
         engrams: list[Engram] = []
-        for item in items:
-            if not isinstance(item, dict) or "text" not in item:
+        for item in result.items:
+            text = (item.text or "").strip()
+            if not text:
                 continue
-
-            kind = item.get("kind", "lesson")
-            if kind not in ("always", "never", "when", "lesson"):
-                kind = "lesson"
-
-            scope = item.get("scope", "project")
-            if scope not in ("global", "project"):
-                scope = "project"
-
-            confidence = item.get("confidence", "medium")
-            if confidence not in ("high", "medium", "low"):
-                confidence = "medium"
-
             engrams.append(
                 Engram(
-                    text=item["text"],
-                    kind=kind,
-                    scope=scope,
-                    confidence=confidence,
-                    topic=item.get("topic", ""),
+                    text=text,
+                    kind=item.kind,
+                    scope=item.scope,
+                    confidence=item.confidence,
+                    topic=item.topic,
                     source="consolidation",
                 )
             )
diff --git a/anton/core/memory/cortex.py b/anton/core/memory/cortex.py
index 6038acba..6d344dac 100644
--- a/anton/core/memory/cortex.py
+++ b/anton/core/memory/cortex.py
@@ -19,10 +19,11 @@
 
 from __future__ import annotations
 
-import json
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+from pydantic import BaseModel, Field
+
 from anton.core.memory.base import HippocampusProtocol
 from anton.core.memory.hippocampus import Engram, Hippocampus
 
@@ -30,20 +31,61 @@
     from anton.core.llm.client import LLMClient
 
 
-_IDENTITY_EXTRACT_PROMPT = """\
-Extract identity facts from this user message. Return a JSON array of strings,
-each a concise fact about the user (name, timezone, expertise, preferences, tools).
+# ─────────────────────────────────────────────────────────────────────────────
+# Pydantic schemas — used by LLMClient.generate_object
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _IdentityFacts(BaseModel):
+    """Result of the identity-extraction LLM call.
+
+    Each fact is a concise statement about the user (name, timezone,
+    expertise, preferences, tools). Empty list when nothing relevant
+    is found in the message.
+    """
 
-If no identity-relevant information is found, return [].
+    facts: list[str] = Field(
+        default_factory=list,
+        description=(
+            "Identity facts extracted from the user message. Each fact "
+            "is a concise statement about the user. Examples: "
+            "'Name: Jorge', 'Timezone: PST', 'Prefers dark mode', "
+            "'Uses uv over pip'. Only extract facts that are clearly "
+            "about the user's identity, preferences, or working style. "
+            "Ignore transient conversation details. Return an empty list "
+            "if nothing identity-relevant is found."
+        ),
+    )
+
+
+class _CompactionResult(BaseModel):
+    """Result of the memory-compaction LLM call.
+
+    Returns the deduplicated entries to keep, plus optional metadata
+    about what was merged and pruned (purely for logging — the cortex
+    only acts on `kept`).
+    """
+
+    kept: list[str] = Field(
+        ...,
+        description=(
+            "Entry strings to keep after compaction. Preserve the "
+            "trailing `<!-- ... -->` metadata comment on each entry "
+            "exactly as it appears in the input."
+        ),
+    )
+    merged: list[str] = Field(
+        default_factory=list,
+        description="Strings describing what was merged (for logging).",
+    )
+    pruned: list[str] = Field(
+        default_factory=list,
+        description="Strings describing what was removed and why (for logging).",
+    )
 
-Examples of identity facts:
-- "Name: Jorge"
-- "Timezone: PST"
-- "Prefers dark mode"
-- "Uses uv over pip"
 
-Only extract facts that are clearly about the user's identity, preferences,
-or working style. Ignore transient conversation details.
+_IDENTITY_EXTRACT_PROMPT = """\
+Extract identity facts from this user message — concise statements about the user (name, timezone, expertise, preferences, tools). Only extract facts that are clearly about the user's identity, preferences, or working style. Ignore transient conversation details. Return an empty list if nothing identity-relevant is found.
 """
 
 _COMPACTION_PROMPT = """\
@@ -53,12 +95,7 @@
 3. Remove entries that are superseded by newer, more specific entries
 4. Keep all unique, useful entries
 
-Return a JSON object with:
-- "kept": array of entry strings to keep — preserve the trailing `<!-- ... -->` metadata comment on each entry exactly as it appears
-- "merged": array of strings describing what was merged
-- "pruned": array of strings describing what was removed and why
-
-Be conservative — when in doubt, keep the entry.
+Be conservative — when in doubt, keep the entry. Preserve the trailing `<!-- ... -->` metadata comment on each kept entry exactly as it appears.
 """
 
 
@@ -389,13 +426,13 @@ async def _compact_file(self, hc: Hippocampus, path: Path, kind: str) -> None:
             return
 
         try:
-            response = await self._llm.code(
+            result: _CompactionResult = await self._llm.generate_object_code(
+                _CompactionResult,
                 system=_COMPACTION_PROMPT,
                 messages=[{"role": "user", "content": "\n".join(entries)}],
                 max_tokens=4096,
             )
-            result = json.loads(response.content)
-            kept = result.get("kept", entries)
+            kept = result.kept or entries
         except Exception:
             return  # Don't corrupt memory on failure
 
@@ -439,13 +476,14 @@ async def maybe_update_identity(self, user_message: str) -> None:
             return
 
         try:
-            response = await self._llm.code(
+            result: _IdentityFacts = await self._llm.generate_object_code(
+                _IdentityFacts,
                 system=_IDENTITY_EXTRACT_PROMPT,
                 messages=[{"role": "user", "content": user_message}],
                 max_tokens=512,
             )
-            facts = json.loads(response.content)
-            if not isinstance(facts, list) or not facts:
+            facts = result.facts
+            if not facts:
                 return
         except Exception:
             return
diff --git a/anton/core/memory/skills.py b/anton/core/memory/skills.py
new file mode 100644
index 00000000..0d0b1a87
--- /dev/null
+++ b/anton/core/memory/skills.py
@@ -0,0 +1,422 @@
+"""Procedural memory: skills as multi-stage directories.
+
+A *skill* is one concept with multiple representations that coexist:
+
+  - Stage 1 (declarative.md) — step-by-step procedure the LLM reads. Always present.
+  - Stage 2 (chunks.md)      — higher-level recipes/macros. Emerges from use. (v2+)
+  - Stage 3 (code/)          — runnable helper modules. Emerges from reliability. (v2+)
+
+Each skill lives at `~/.anton/skills/<label>/` as a directory:
+
+    ~/.anton/skills/csv_summary/
+    ├── meta.json          # label, name, description, when_to_use, provenance, presence flags
+    ├── declarative.md     # Stage 1 — required
+    ├── chunks.md          # Stage 2 — optional
+    ├── code/              # Stage 3 — optional
+    │   └── __init__.py
+    ├── requirements.txt   # Stage 3 deps — optional
+    └── stats.json         # per-stage usage counters
+
+This module is the storage layer only — read/write/search. The classifier
+(`recall_skill` tool) and the LLM-driven save command live elsewhere.
+
+Brain analogue: cortico-striatal procedural memory. The executive (PFC)
+recognizes a familiar pattern in the user's request, retrieves the
+matching procedure from the striatum, and executes it. Stages coexist
+rather than graduating — the executive picks the highest stage that's
+reliable enough for the current context.
+
+Relationship to `Engram` (anton/core/memory/hippocampus.py):
+    `Engram` is the unit of *declarative* memory in Anton — a single
+    fact, rule, or lesson stored as a flat bullet in rules.md / lessons.md /
+    profile.md. Engrams are loaded into every prompt unconditionally because
+    they're cheap (one line each). The brain-region analogue is the
+    hippocampus → neocortex consolidation pathway.
+
+    `Skill` is the unit of *procedural* memory in Anton — a multi-step
+    workflow stored as a directory of staged representations. Skills are
+    NOT loaded into every prompt; the LLM sees only their compact label +
+    when_to_use line and explicitly retrieves the full procedure via the
+    `recall_skill` tool when it recognizes a match. The brain-region
+    analogue is the hippocampus → striatum / cerebellum pathway.
+
+    Both systems coexist in the brain (declarative and procedural memory
+    are dissociable — H.M. lost the former but kept the latter), and they
+    coexist in Anton. Engrams hold facts; Skills hold procedures.
+
+Naming note:
+    The unique identifier for a skill is called its `label`. In cognitive
+    psychology this is the declarative handle by which a procedural
+    memory is addressed in working memory — the verbal token the
+    executive holds when deciding to invoke a stored procedure. It is
+    deliberately distinct from `name` (the human-readable display) and
+    `when_to_use` (the retrieval cue describing the matching context).
+"""
+
+from __future__ import annotations
+
+import difflib
+import json
+import re
+import shutil
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+_DEFAULT_SKILLS_ROOT = Path("~/.anton/skills").expanduser()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Data classes
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class StageStats:
+    """Per-stage usage tracking.
+
+    `recommended` increments every time the classifier (recall_skill tool)
+    pulls this stage into context. `used` increments when scratchpad code
+    actually imports a Stage 3 helper — Stage 1 and Stage 2 only have a
+    `recommended` signal because there's no mechanical way to detect
+    whether the LLM "followed" a markdown procedure.
+    """
+
+    recommended: int = 0
+    used: int = 0
+    last_used: str = ""  # ISO timestamp
+    confidence: float = 0.0
+
+
+@dataclass
+class SkillStats:
+    total_recalls: int = 0
+    stage_1: StageStats = field(default_factory=StageStats)
+    stage_2: StageStats = field(default_factory=StageStats)
+    stage_3: StageStats = field(default_factory=StageStats)
+
+
+@dataclass
+class Skill:
+    """In-memory representation of a skill directory.
+
+    Always carries the metadata. The Stage 1 markdown is loaded eagerly
+    because it's small and almost always needed. Stage 2 and Stage 3
+    content (when present) is loaded on demand by callers.
+
+    The `label` is the declarative handle for this procedural memory —
+    the snake_case identifier the LLM uses when it calls
+    `recall_skill(label)`. It is the directory name on disk.
+    """
+
+    label: str
+    name: str
+    description: str
+    when_to_use: str
+    declarative_md: str
+    created_at: str
+    provenance: str  # "manual" | "consolidator" (future)
+    stage_1_present: bool = True
+    stage_2_present: bool = False
+    stage_3_present: bool = False
+    stats: SkillStats = field(default_factory=SkillStats)
+
+    def to_meta_dict(self) -> dict:
+        """Serialize the meta.json payload (excludes declarative content + stats)."""
+        return {
+            "label": self.label,
+            "name": self.name,
+            "description": self.description,
+            "when_to_use": self.when_to_use,
+            "created_at": self.created_at,
+            "provenance": self.provenance,
+            "stage_1_present": self.stage_1_present,
+            "stage_2_present": self.stage_2_present,
+            "stage_3_present": self.stage_3_present,
+        }
+
+    def to_stats_dict(self) -> dict:
+        return {
+            "total_recalls": self.stats.total_recalls,
+            "stage_1": _stage_stats_to_dict(self.stats.stage_1),
+            "stage_2": _stage_stats_to_dict(self.stats.stage_2),
+            "stage_3": _stage_stats_to_dict(self.stats.stage_3),
+        }
+
+
+def _stage_stats_to_dict(s: StageStats) -> dict:
+    return {
+        "recommended": s.recommended,
+        "used": s.used,
+        "last_used": s.last_used,
+        "confidence": s.confidence,
+    }
+
+
+def _stage_stats_from_dict(d: dict) -> StageStats:
+    return StageStats(
+        recommended=int(d.get("recommended", 0)),
+        used=int(d.get("used", 0)),
+        last_used=str(d.get("last_used", "")),
+        confidence=float(d.get("confidence", 0.0)),
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Slug helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+_SLUG_RE = re.compile(r"[^a-z0-9_]+")
+
+
+def slugify(text: str) -> str:
+    """Normalize arbitrary text into a snake_case identifier.
+
+    Strips non-alphanumerics, lowercases, collapses runs of underscores.
+    Empty input becomes 'skill'. Used to produce path/URL-safe labels;
+    the term "slugify" refers to the formatting operation, not to the
+    semantic role of the result (which we call a `label`).
+    """
+    s = text.strip().lower().replace("-", "_").replace(" ", "_")
+    s = _SLUG_RE.sub("_", s)
+    s = re.sub(r"_+", "_", s).strip("_")
+    return s or "skill"
+
+
+def make_unique_label(base: str, store: "SkillStore") -> str:
+    """Return a label that doesn't collide with any existing skill.
+
+    If `base` (after slugify normalization) is already unique, return it
+    as-is. Otherwise append `_2`, `_3`, ... until a free slot is found.
+    """
+    candidate = slugify(base)
+    if store.load(candidate) is None:
+        return candidate
+    n = 2
+    while True:
+        next_candidate = f"{candidate}_{n}"
+        if store.load(next_candidate) is None:
+            return next_candidate
+        n += 1
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Store
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class SkillStore:
+    """File-backed store of skills under `~/.anton/skills/` (by default).
+
+    Each skill is a directory whose name is its `label` (the snake_case
+    declarative handle). The store is stateless — it reads from disk on
+    demand. Callers should not cache Skill instances long-term, since
+    stats are mutated through the store's increment helpers and a stale
+    in-memory copy will drift.
+    """
+
+    def __init__(self, root: Path | None = None) -> None:
+        self.root = Path(root) if root is not None else _DEFAULT_SKILLS_ROOT
+
+    # ── reading ─────────────────────────────────────────────────────
+
+    def _skill_dir(self, label: str) -> Path:
+        return self.root / label
+
+    def _ensure_root(self) -> None:
+        self.root.mkdir(parents=True, exist_ok=True)
+
+    def load(self, label: str) -> Skill | None:
+        """Read a single skill by label. Returns None if absent or malformed."""
+        d = self._skill_dir(label)
+        meta_path = d / "meta.json"
+        decl_path = d / "declarative.md"
+        if not meta_path.is_file() or not decl_path.is_file():
+            return None
+        try:
+            meta = json.loads(meta_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            return None
+        try:
+            declarative = decl_path.read_text(encoding="utf-8")
+        except OSError:
+            return None
+        stats = self._load_stats(label)
+        return Skill(
+            label=str(meta.get("label", label)),
+            name=str(meta.get("name", label)),
+            description=str(meta.get("description", "")),
+            when_to_use=str(meta.get("when_to_use", "")),
+            declarative_md=declarative,
+            created_at=str(meta.get("created_at", "")),
+            provenance=str(meta.get("provenance", "manual")),
+            stage_1_present=bool(meta.get("stage_1_present", True)),
+            stage_2_present=bool(meta.get("stage_2_present", False)),
+            stage_3_present=bool(meta.get("stage_3_present", False)),
+            stats=stats,
+        )
+
+    def _load_stats(self, label: str) -> SkillStats:
+        path = self._skill_dir(label) / "stats.json"
+        if not path.is_file():
+            return SkillStats()
+        try:
+            data = json.loads(path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            return SkillStats()
+        return SkillStats(
+            total_recalls=int(data.get("total_recalls", 0)),
+            stage_1=_stage_stats_from_dict(data.get("stage_1", {})),
+            stage_2=_stage_stats_from_dict(data.get("stage_2", {})),
+            stage_3=_stage_stats_from_dict(data.get("stage_3", {})),
+        )
+
+    def list_all(self) -> list[Skill]:
+        """Return every loadable skill, sorted by label."""
+        if not self.root.is_dir():
+            return []
+        out: list[Skill] = []
+        for child in sorted(self.root.iterdir()):
+            if not child.is_dir():
+                continue
+            skill = self.load(child.name)
+            if skill is not None:
+                out.append(skill)
+        return out
+
+    def list_summaries(self) -> list[dict]:
+        """Lightweight listing for prompt-building — label + when_to_use only.
+
+        Avoids reading declarative.md for skills the LLM won't recall this turn.
+        """
+        if not self.root.is_dir():
+            return []
+        out: list[dict] = []
+        for child in sorted(self.root.iterdir()):
+            if not child.is_dir():
+                continue
+            meta_path = child / "meta.json"
+            if not meta_path.is_file():
+                continue
+            try:
+                meta = json.loads(meta_path.read_text(encoding="utf-8"))
+            except (json.JSONDecodeError, OSError):
+                continue
+            out.append(
+                {
+                    "label": str(meta.get("label", child.name)),
+                    "name": str(meta.get("name", child.name)),
+                    "when_to_use": str(meta.get("when_to_use", "")),
+                }
+            )
+        return out
+
+    # ── writing ─────────────────────────────────────────────────────
+
+    def save(self, skill: Skill) -> Path:
+        """Write the skill directory to disk. Overwrites in place.
+
+        Returns the directory path. Creates the parent root if needed.
+        Stage 2 and Stage 3 files are NOT touched here — they have their
+        own writers (consolidator, future).
+        """
+        self._ensure_root()
+        d = self._skill_dir(skill.label)
+        d.mkdir(parents=True, exist_ok=True)
+        (d / "meta.json").write_text(
+            json.dumps(skill.to_meta_dict(), indent=2) + "\n",
+            encoding="utf-8",
+        )
+        (d / "declarative.md").write_text(skill.declarative_md, encoding="utf-8")
+        # Only initialize stats.json if it doesn't already exist — we
+        # never want save() to wipe accumulated counts.
+        stats_path = d / "stats.json"
+        if not stats_path.is_file():
+            stats_path.write_text(
+                json.dumps(skill.to_stats_dict(), indent=2) + "\n",
+                encoding="utf-8",
+            )
+        return d
+
+    def delete(self, label: str) -> bool:
+        """Remove a skill directory. Returns True if it existed."""
+        d = self._skill_dir(label)
+        if not d.is_dir():
+            return False
+        shutil.rmtree(d)
+        return True
+
+    # ── stats updates ───────────────────────────────────────────────
+
+    def increment_recommended(self, label: str, *, stage: int = 1) -> None:
+        """Atomic-ish bump of the per-stage `recommended` counter.
+
+        Reads the existing stats.json, mutates the right field, writes
+        back. Best-effort — if the skill doesn't exist or the file is
+        unwritable, silently no-ops. Concurrent writers may race; that's
+        acceptable for a counter that's used for guidance, not billing.
+        """
+        d = self._skill_dir(label)
+        if not d.is_dir():
+            return
+        stats = self._load_stats(label)
+        stats.total_recalls += 1
+        target = self._stage_for(stats, stage)
+        target.recommended += 1
+        target.last_used = datetime.now(timezone.utc).isoformat()
+        try:
+            (d / "stats.json").write_text(
+                json.dumps(
+                    {
+                        "total_recalls": stats.total_recalls,
+                        "stage_1": _stage_stats_to_dict(stats.stage_1),
+                        "stage_2": _stage_stats_to_dict(stats.stage_2),
+                        "stage_3": _stage_stats_to_dict(stats.stage_3),
+                    },
+                    indent=2,
+                )
+                + "\n",
+                encoding="utf-8",
+            )
+        except OSError:
+            pass
+
+    @staticmethod
+    def _stage_for(stats: SkillStats, stage: int) -> StageStats:
+        if stage == 1:
+            return stats.stage_1
+        if stage == 2:
+            return stats.stage_2
+        if stage == 3:
+            return stats.stage_3
+        raise ValueError(f"Unknown stage: {stage}")
+
+    # ── search ──────────────────────────────────────────────────────
+
+    def closest_match(self, bad_label: str, *, cutoff: float = 0.6) -> str | None:
+        """Find the existing label closest to `bad_label`, or None.
+
+        Used by the recall_skill tool to recover from typos and guesses.
+        Cutoff is intentionally generous — we'd rather suggest a wrong
+        match the LLM can reject than return nothing.
+        """
+        bad = slugify(bad_label)
+        candidates = [s["label"] for s in self.list_summaries()]
+        if not candidates:
+            return None
+        if bad in candidates:
+            return bad
+        matches = difflib.get_close_matches(bad, candidates, n=1, cutoff=cutoff)
+        return matches[0] if matches else None
+
+
+__all__ = [
+    "Skill",
+    "SkillStats",
+    "SkillStore",
+    "StageStats",
+    "make_unique_label",
+    "slugify",
+]
diff --git a/anton/core/session.py b/anton/core/session.py
index d68e5e6b..2be23167 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -6,6 +6,9 @@
 from typing import TYPE_CHECKING
 
 from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.memory.cerebellum import Cerebellum
+from anton.core.memory.skills import SkillStore
+from anton.core.tools.recall_skill import RECALL_SKILL_TOOL
 from anton.core.llm.prompts import RESILIENCE_NUDGE
 from anton.core.llm.provider import (
     ContextOverflowError,
@@ -118,6 +121,23 @@ def __init__(self, config: ChatSessionConfig) -> None:
             workspace_path=config.workspace.base if config.workspace else None,
         )
         self.tool_registry = ToolRegistry()
+        # Procedural memory: brain-inspired skills (Stage 1 = declarative).
+        # Lives at ~/.anton/skills/<label>/. The recall_skill tool retrieves
+        # entries on demand and increments per-stage usage counters.
+        self._skill_store = SkillStore()
+        # Cerebellum: supervised error learning over scratchpad cells.
+        # Buffers errored/warning cells across the turn, runs one diff
+        # call at end-of-turn, and encodes lessons via cortex.encode().
+        # Wired into the dispatcher's observer list below.
+        self._cerebellum = Cerebellum(
+            cortex=self._cortex,
+            llm=self._llm,
+        )
+        # Scratchpad observers — list of objects with on_pre_execute /
+        # on_post_execute. Fired by handle_scratchpad around pad.execute.
+        # The runtime never sees this list; observation lives at the
+        # dispatcher layer to keep local/remote runtimes interchangeable.
+        self._scratchpad_observers: list = [self._cerebellum]
         self._explainability_store = (
             ExplainabilityStore(config.workspace.base) if config.workspace is not None else None
         )
@@ -284,6 +304,7 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
             project_context=md_context,
             self_awareness_context=sa_section,
             datasource_context=ds_ctx,
+            skill_store=self._skill_store,
         )
 
         return prompt
@@ -374,6 +395,9 @@ def _build_core_tools(self) -> None:
         if self._episodic is not None and self._episodic.enabled:
             self.tool_registry.register_tool(RECALL_TOOL)
 
+        # Procedural memory retrieval — always available, no-op if no skills.
+        self.tool_registry.register_tool(RECALL_SKILL_TOOL)
+
     async def close(self) -> None:
         """Clean up scratchpads and other resources."""
         await self._scratchpads.close_all()
@@ -492,6 +516,31 @@ def _compact_scratchpads(self) -> bool:
                 compacted = True
         return compacted
 
+    def _schedule_cerebellum_flush(self) -> None:
+        """Fire the cerebellum's batched diff pass without blocking the turn.
+
+        The cerebellum buffered any errored / warning cells across the
+        turn via its observer hooks. Now we kick off the (at most one)
+        LLM diff call as a background task — the user gets their reply
+        immediately, and any extracted lessons get encoded into the
+        existing wisdom store before the next turn typically begins.
+
+        Best-effort: if there's no buffered work or no event loop, this
+        is a no-op. Exceptions in the background task are swallowed
+        because they're already logged inside cerebellum.flush().
+        """
+        cb = getattr(self, "_cerebellum", None)
+        if cb is None:
+            return
+        if cb.buffered_count == 0:
+            return
+        try:
+            asyncio.create_task(cb.flush())
+        except RuntimeError:
+            # No running loop (e.g. called from a sync context in tests).
+            # Cerebellum learning is best-effort, so just drop the buffer.
+            cb.reset()
+
     async def turn(self, user_input: str | list[dict]) -> str:
         self._history.append({"role": "user", "content": user_input})
 
@@ -620,6 +669,12 @@ async def turn(self, user_input: str | list[dict]) -> str:
         if self._cortex is not None and self._cortex.mode != "off":
             self._cortex.maybe_vacuum()
 
+        # Cerebellar consolidation — fire-and-forget so the user gets
+        # their reply immediately while supervised error learning runs
+        # in the background. Brain analogue: cerebellar plasticity
+        # operates in parallel with continued action, not blocking it.
+        self._schedule_cerebellum_flush()
+
         return reply
 
     async def turn_stream(
@@ -726,6 +781,11 @@ async def turn_stream(
             # Periodic memory vacuum (Systems Consolidation)
             self._cortex.maybe_vacuum()
 
+        # Cerebellar consolidation — same fire-and-forget contract as
+        # the non-streaming turn. Lets the user-facing stream finish
+        # immediately while supervised error learning runs in the background.
+        self._schedule_cerebellum_flush()
+
     async def _stream_and_handle_tools(
         self, user_message: str = ""
     ) -> AsyncIterator[StreamEvent]:
diff --git a/anton/core/tools/recall_skill.py b/anton/core/tools/recall_skill.py
new file mode 100644
index 00000000..290e5af1
--- /dev/null
+++ b/anton/core/tools/recall_skill.py
@@ -0,0 +1,130 @@
+"""The `recall_skill` tool — retrieve a procedural skill into working memory.
+
+Brain analogue: prefrontal cortex pulls a stored procedure from
+long-term memory into the working buffer when it recognizes a familiar
+pattern in the current task. The tool is the *retrieval* operation; the
+LLM still has agency about whether (and how literally) to follow the
+recalled procedure.
+
+The classifier signal lives in this tool: every successful invocation
+bumps the skill's `recommended` counter, giving us a precise,
+mechanical signal of "the system thought this skill applied" without
+relying on the LLM to emit a marker or follow any convention.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from anton.core.tools.tool_defs import ToolDef
+
+if TYPE_CHECKING:
+    from anton.core.session import ChatSession
+
+
+_DESCRIPTION = (
+    "Retrieve a procedural skill from long-term memory into your working "
+    "context. Call this when you recognize that one of the skills listed in "
+    "the '## Procedural memory' section of your system prompt applies to the "
+    "user's current request. The tool returns the full step-by-step procedure "
+    "for that skill — follow it as a guide, adapting to the specifics of the "
+    "current task. You may recall multiple skills if the task spans several "
+    "procedures.\n\n"
+    "If you pass a label that doesn't exist, the tool will return the closest "
+    "match (if any) with a warning, or list the available labels if nothing "
+    "is close."
+)
+
+
+_INPUT_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "label": {
+            "type": "string",
+            "description": (
+                "The skill label to recall, e.g. 'csv_summary'. Must come from "
+                "the procedural memory list in your system prompt."
+            ),
+        },
+    },
+    "required": ["label"],
+}
+
+
+def _format_skill_response(skill, *, warning: str = "") -> str:
+    """Render the recall payload sent back to the LLM as a tool result."""
+    parts: list[str] = []
+    if warning:
+        parts.append(warning.strip())
+        parts.append("")  # blank line before the procedure
+    parts.append(f"# Skill: {skill.name}")
+    parts.append("")
+    if skill.description:
+        parts.append(skill.description)
+        parts.append("")
+    parts.append("## Procedure (Stage 1 — declarative)")
+    parts.append("")
+    parts.append(skill.declarative_md.strip())
+    return "\n".join(parts)
+
+
+async def handle_recall_skill(session: "ChatSession", tc_input: dict) -> str:
+    """Look up a skill by label and return its declarative procedure."""
+    label_in = (tc_input.get("label") or "").strip()
+    if not label_in:
+        return (
+            "ERROR: recall_skill requires a non-empty 'label' parameter. "
+            "Pick one from the procedural memory list in your system prompt."
+        )
+
+    store = getattr(session, "_skill_store", None)
+    if store is None:
+        return (
+            "ERROR: no skill store is wired into this session. "
+            "Procedural memory is unavailable right now."
+        )
+
+    skill = store.load(label_in)
+    warning = ""
+    if skill is None:
+        closest = store.closest_match(label_in)
+        if closest is None:
+            available = [s["label"] for s in store.list_summaries()]
+            if not available:
+                return (
+                    f"NO MATCH: no skill named '{label_in}', and the procedural "
+                    f"memory is empty. Proceed without a recalled procedure."
+                )
+            return (
+                f"NO MATCH: no skill named '{label_in}'. Available skills: "
+                f"{', '.join(available)}."
+            )
+        skill = store.load(closest)
+        if skill is None:
+            # Race or filesystem flake — be defensive
+            return (
+                f"NO MATCH: '{label_in}' was not found and the closest "
+                f"candidate '{closest}' could not be loaded."
+            )
+        warning = (
+            f"⚠ No skill named '{label_in}'. Returning the closest match: "
+            f"'{skill.label}'. If that's not what you wanted, ignore the "
+            f"procedure below and proceed without a recalled skill."
+        )
+
+    # Increment the recommended counter for the *resolved* label, not the
+    # input. If the LLM typo'd 'csv_sumary', we credit 'csv_summary'.
+    store.increment_recommended(skill.label, stage=1)
+
+    return _format_skill_response(skill, warning=warning)
+
+
+RECALL_SKILL_TOOL = ToolDef(
+    name="recall_skill",
+    description=_DESCRIPTION,
+    input_schema=_INPUT_SCHEMA,
+    handler=handle_recall_skill,
+)
+
+
+__all__ = ["RECALL_SKILL_TOOL", "handle_recall_skill"]
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index 6a309dae..b16d4f57 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -1,12 +1,61 @@
 from __future__ import annotations
+
+import logging
 from typing import TYPE_CHECKING
 
+from anton.core.backends.base import Cell
 from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
 
 if TYPE_CHECKING:
     from anton.chat_session import ChatSession
 
 
+_log = logging.getLogger(__name__)
+
+
+async def _fire_pre_execute(session: "ChatSession", cell: Cell) -> None:
+    """Notify pre-execute observers (e.g. cerebellum) before a cell runs.
+
+    Best-effort: a buggy observer never kills a cell. The list of
+    observers is owned by the session — typically populated in
+    ChatSession.__init__. Empty list (or attribute missing) means no
+    observers and this is a no-op.
+    """
+    observers = getattr(session, "_scratchpad_observers", None) or []
+    for obs in observers:
+        on_pre = getattr(obs, "on_pre_execute", None)
+        if on_pre is None:
+            continue
+        try:
+            await on_pre(cell)
+        except Exception as exc:
+            _log.warning(
+                "scratchpad pre-execute observer %s failed: %s",
+                type(obs).__name__,
+                exc,
+            )
+
+
+async def _fire_post_execute(session: "ChatSession", cell: Cell) -> None:
+    """Notify post-execute observers (e.g. cerebellum) after a cell finishes.
+
+    Same best-effort contract as `_fire_pre_execute`.
+    """
+    observers = getattr(session, "_scratchpad_observers", None) or []
+    for obs in observers:
+        on_post = getattr(obs, "on_post_execute", None)
+        if on_post is None:
+            continue
+        try:
+            await on_post(cell)
+        except Exception as exc:
+            _log.warning(
+                "scratchpad post-execute observer %s failed: %s",
+                type(obs).__name__,
+                exc,
+            )
+
+
 async def handle_recall(session: ChatSession, tc_input: dict) -> str:
     """Process a recall tool call — search episodic memory."""
     if session._episodic is None or not session._episodic.enabled:
@@ -102,6 +151,20 @@ async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
             return result
         pad, code, description, estimated_time, estimated_seconds = result
 
+        # Notify pre-execute observers (e.g. cerebellum). The runtime
+        # never sees these — observation is an orchestration concern,
+        # so it lives at the dispatcher layer where the data is most
+        # natural and where local/remote runtimes stay interchangeable.
+        prelim_cell = Cell(
+            code=code,
+            stdout="",
+            stderr="",
+            error=None,
+            description=description,
+            estimated_time=estimated_time or str(estimated_seconds),
+        )
+        await _fire_pre_execute(session, prelim_cell)
+
         cell = await pad.execute(
             code,
             description=description,
@@ -112,6 +175,7 @@ async def handle_scratchpad(session: ChatSession, tc_input: dict) -> str:
             session._record_cell_explainability(
                 pad_name=name, description=description, cell=cell,
             )
+            await _fire_post_execute(session, cell)
         return format_cell_result(cell)
 
     elif action == "view":
diff --git a/tests/test_cerebellum.py b/tests/test_cerebellum.py
new file mode 100644
index 00000000..26894120
--- /dev/null
+++ b/tests/test_cerebellum.py
@@ -0,0 +1,399 @@
+"""Unit tests for `anton.core.memory.cerebellum.Cerebellum`."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from anton.core.backends.base import Cell
+from anton.core.memory.cerebellum import (
+    Cerebellum,
+    CerebellumLesson,
+    _DiffPassResult,
+    _format_cell_for_diff,
+    _LessonDraft,
+)
+from anton.core.memory.hippocampus import Engram
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _cell(
+    code: str = "x = 1",
+    description: str = "",
+    stdout: str = "",
+    stderr: str = "",
+    error: str | None = None,
+) -> Cell:
+    return Cell(
+        code=code,
+        stdout=stdout,
+        stderr=stderr,
+        error=error,
+        description=description,
+    )
+
+
+def _make_llm_returning(*lessons: tuple[str, str]) -> MagicMock:
+    """Build a mock LLMClient whose generate_object returns the given lessons.
+
+    Each lesson is a (text, topic) tuple. Empty argument list means the
+    diff returns no lessons (i.e. the LLM said "nothing to learn here").
+    """
+    drafts = [_LessonDraft(text=text, topic=topic) for text, topic in lessons]
+    result = _DiffPassResult(lessons=drafts)
+    llm = MagicMock()
+    llm.generate_object_code = AsyncMock(return_value=result)
+    return llm
+
+
+def _make_llm_raising(exc: Exception) -> MagicMock:
+    """Build a mock LLMClient whose generate_object raises an exception."""
+    llm = MagicMock()
+    llm.generate_object_code = AsyncMock(side_effect=exc)
+    return llm
+
+
+def _make_cortex() -> MagicMock:
+    """Build a mock Cortex with an awaitable encode() method."""
+    cortex = MagicMock()
+    cortex.encode = AsyncMock(return_value=["encoded ok"])
+    return cortex
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helper functions
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestFormatCellForDiff:
+    def test_basic_cell_renders(self):
+        cell = _cell(
+            code="print(1+1)",
+            description="add two numbers",
+            stdout="2",
+        )
+        out = _format_cell_for_diff(cell, 1)
+        assert "Cell 1" in out
+        assert "add two numbers" in out
+        assert "print(1+1)" in out
+        assert "stdout:" in out
+
+    def test_cell_with_error(self):
+        cell = _cell(
+            code="x = 1/0",
+            description="divide",
+            error="ZeroDivisionError: division by zero",
+        )
+        out = _format_cell_for_diff(cell, 2)
+        assert "ZeroDivisionError" in out
+        assert "Cell 2" in out
+
+    def test_no_description_falls_back(self):
+        cell = _cell(code="x = 1", stdout="")
+        out = _format_cell_for_diff(cell, 1)
+        assert "no description" in out
+
+    def test_truncates_long_code(self):
+        cell = _cell(code="x = 1\n" * 500, description="loop")
+        out = _format_cell_for_diff(cell, 1)
+        assert "[truncated]" in out
+
+    def test_empty_outputs_marker(self):
+        cell = _cell(code="pass", description="pass")
+        out = _format_cell_for_diff(cell, 1)
+        assert "no output produced" in out
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Cheap path: clean cells should never reach the LLM
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestCheapPath:
+    @pytest.mark.asyncio
+    async def test_clean_cell_not_buffered(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=_make_llm_returning())
+        await cb.on_post_execute(_cell(code="x = 1", stdout="ok"))
+        assert cb.buffered_count == 0
+
+    @pytest.mark.asyncio
+    async def test_clean_cell_with_only_stdout(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=_make_llm_returning())
+        await cb.on_post_execute(_cell(stdout="2", description="add"))
+        assert cb.buffered_count == 0
+
+    @pytest.mark.asyncio
+    async def test_clean_cell_with_no_output_at_all(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=_make_llm_returning())
+        await cb.on_post_execute(_cell(code="pass"))
+        assert cb.buffered_count == 0
+
+    @pytest.mark.asyncio
+    async def test_flush_with_no_cells_returns_empty(self):
+        llm = _make_llm_returning()
+        cb = Cerebellum(cortex=_make_cortex(), llm=llm)
+        result = await cb.flush()
+        assert result == []
+        # No LLM call when nothing was buffered
+        llm.generate_object_code.assert_not_called()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Error path: buffer + diff + encode
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestErrorPath:
+    @pytest.mark.asyncio
+    async def test_error_cell_is_buffered(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=_make_llm_returning())
+        await cb.on_post_execute(
+            _cell(code="x = 1/0", error="ZeroDivisionError")
+        )
+        assert cb.buffered_count == 1
+
+    @pytest.mark.asyncio
+    async def test_stderr_only_cell_is_buffered(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=_make_llm_returning())
+        # No `error` field but a non-empty stderr — counts as warning
+        await cb.on_post_execute(
+            _cell(code="import x", stderr="DeprecationWarning: ...")
+        )
+        assert cb.buffered_count == 1
+
+    @pytest.mark.asyncio
+    async def test_flush_calls_generate_object_with_buffered_cells(self):
+        llm = _make_llm_returning()
+        cb = Cerebellum(cortex=_make_cortex(), llm=llm)
+        await cb.on_post_execute(
+            _cell(
+                code="x = 1/0",
+                description="divide",
+                error="ZeroDivisionError: division by zero",
+            )
+        )
+        await cb.flush()
+        llm.generate_object_code.assert_called_once()
+        # generate_object_code was called with the _DiffPassResult Pydantic model
+        call_args = llm.generate_object_code.call_args
+        assert call_args.args[0] is _DiffPassResult
+        # The prompt should mention the cell's intent
+        user_msg = call_args.kwargs["messages"][0]["content"]
+        assert "divide" in user_msg
+        assert "ZeroDivisionError" in user_msg
+
+    @pytest.mark.asyncio
+    async def test_flush_clears_buffer(self):
+        cb = Cerebellum(
+            cortex=_make_cortex(),
+            llm=_make_llm_returning(),
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        assert cb.buffered_count == 1
+        await cb.flush()
+        assert cb.buffered_count == 0
+
+    @pytest.mark.asyncio
+    async def test_extracted_lesson_is_encoded(self):
+        cortex = _make_cortex()
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_returning(
+                ("Use low_memory=False with mixed-dtype CSVs.", "scratchpad")
+            ),
+        )
+        await cb.on_post_execute(_cell(error="dtype error"))
+        result = await cb.flush()
+
+        assert len(result) == 1
+        assert isinstance(result[0], CerebellumLesson)
+        assert "low_memory" in result[0].text
+
+        # Cortex.encode was called with an Engram
+        cortex.encode.assert_awaited_once()
+        engrams = cortex.encode.call_args.args[0]
+        assert len(engrams) == 1
+        assert isinstance(engrams[0], Engram)
+        assert engrams[0].kind == "lesson"
+        assert engrams[0].topic == "scratchpad"
+        assert engrams[0].source == "consolidation"
+        assert engrams[0].scope == "project"
+        assert "low_memory" in engrams[0].text
+
+    @pytest.mark.asyncio
+    async def test_multiple_lessons_encoded_in_one_call(self):
+        cortex = _make_cortex()
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_returning(
+                ("lesson one", "scratchpad"),
+                ("lesson two", "scratchpad"),
+                ("lesson three", "scratchpad"),
+            ),
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        await cb.flush()
+
+        cortex.encode.assert_awaited_once()
+        engrams = cortex.encode.call_args.args[0]
+        assert len(engrams) == 3
+
+    @pytest.mark.asyncio
+    async def test_max_lessons_caps_extraction(self):
+        cortex = _make_cortex()
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_returning(
+                *((f"lesson {i}", "scratchpad") for i in range(10))
+            ),
+            max_lessons_per_flush=2,
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert len(result) == 2
+
+    @pytest.mark.asyncio
+    async def test_empty_lessons_list_does_not_encode(self):
+        cortex = _make_cortex()
+        cb = Cerebellum(cortex=cortex, llm=_make_llm_returning())
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert result == []
+        cortex.encode.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skips_lessons_with_blank_text(self):
+        cortex = _make_cortex()
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_returning(
+                ("real lesson", "scratchpad"),
+                ("   ", "scratchpad"),  # blank — should be skipped
+            ),
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert len(result) == 1
+        assert result[0].text == "real lesson"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Robustness — bad LLM responses, missing infra
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestRobustness:
+    @pytest.mark.asyncio
+    async def test_llm_network_exception_does_not_crash(self):
+        """Provider/network failure during generate_object → safe no-op."""
+        cortex = _make_cortex()
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_raising(RuntimeError("network down")),
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        # Must not raise
+        result = await cb.flush()
+        assert result == []
+        cortex.encode.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_llm_validation_error_does_not_crash(self):
+        """If the LLM somehow violates the schema, Pydantic raises and
+        flush() swallows it cleanly. This shouldn't happen in practice
+        because tool_choice is forced, but the safety net is in place."""
+        from pydantic import ValidationError as _PydValidationError
+
+        cortex = _make_cortex()
+        # Construct a real Pydantic validation error to feed in
+        try:
+            _DiffPassResult.model_validate({"lessons": "not a list"})
+        except _PydValidationError as exc:
+            llm = _make_llm_raising(exc)
+
+        cb = Cerebellum(cortex=cortex, llm=llm)
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert result == []
+        cortex.encode.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_value_error_from_no_tool_call_does_not_crash(self):
+        """If generate_object raises ValueError because the LLM returned
+        no tool call, flush() swallows it cleanly."""
+        cortex = _make_cortex()
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_raising(ValueError("LLM did not return a tool call")),
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_cortex_encode_exception_does_not_crash(self):
+        """If cortex.encode itself fails, the cerebellum logs and moves on.
+        The lesson was still extracted (returned to the caller) — only
+        the persistence step failed."""
+        cortex = MagicMock()
+        cortex.encode = AsyncMock(side_effect=RuntimeError("disk full"))
+        cb = Cerebellum(
+            cortex=cortex,
+            llm=_make_llm_returning(("be careful", "scratchpad")),
+        )
+        await cb.on_post_execute(_cell(error="boom"))
+        # Must not raise
+        result = await cb.flush()
+        assert len(result) == 1
+
+    @pytest.mark.asyncio
+    async def test_no_cortex_no_encode(self):
+        cb = Cerebellum(cortex=None, llm=_make_llm_returning())
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_no_llm_no_diff(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=None)
+        await cb.on_post_execute(_cell(error="boom"))
+        result = await cb.flush()
+        assert result == []
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Reset / lifecycle
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestReset:
+    @pytest.mark.asyncio
+    async def test_reset_clears_buffer_without_encoding(self):
+        cortex = _make_cortex()
+        llm = _make_llm_returning(("x", "scratchpad"))
+        cb = Cerebellum(cortex=cortex, llm=llm)
+        await cb.on_post_execute(_cell(error="boom"))
+        await cb.on_post_execute(_cell(error="boom2"))
+        assert cb.buffered_count == 2
+
+        cb.reset()
+        assert cb.buffered_count == 0
+
+        # After reset, no LLM call happens because buffer is empty
+        await cb.flush()
+        llm.generate_object_code.assert_not_called()
+        cortex.encode.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_pre_execute_increments_counter(self):
+        cb = Cerebellum(cortex=_make_cortex(), llm=_make_llm_returning())
+        await cb.on_pre_execute(_cell(code="x = 1", description="set x"))
+        await cb.on_pre_execute(_cell(code="y = 2", description="set y"))
+        # Internal counter — we don't expose it as a property but reset clears it
+        cb.reset()
+        # No exception means reset worked
diff --git a/tests/test_cerebellum_e2e.py b/tests/test_cerebellum_e2e.py
new file mode 100644
index 00000000..2a4023a8
--- /dev/null
+++ b/tests/test_cerebellum_e2e.py
@@ -0,0 +1,246 @@
+"""End-to-end test of the cerebellum loop.
+
+Verifies the full path with no real LLM and no real subprocess:
+
+1. Build a fake session-like object with a real Cerebellum wired into
+   `_scratchpad_observers`
+2. Drive `handle_scratchpad` with an exec call where the (mocked) pad
+   returns an errored Cell
+3. Confirm the cerebellum's pre+post hooks fired via the dispatcher
+4. Confirm the errored cell was buffered (clean cells would have been
+   skipped via the cheap path)
+5. Call cerebellum.flush() — verify the diff LLM was invoked, the
+   lesson was extracted, and cortex.encode() received an Engram
+6. The end-to-end loop runs without touching the runtime layer at all
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from anton.core.backends.base import Cell
+from anton.core.memory.cerebellum import (
+    Cerebellum,
+    _DiffPassResult,
+    _LessonDraft,
+)
+from anton.core.memory.hippocampus import Engram
+from anton.core.tools.tool_handlers import handle_scratchpad
+
+
+def _build_session_with_cerebellum(
+    *,
+    pad_returns: Cell,
+    lessons: list[tuple[str, str]] | None = None,
+) -> tuple[MagicMock, MagicMock, MagicMock, Cerebellum]:
+    """Construct a fake session with a real Cerebellum wired in.
+
+    Returns (session, mock_cortex, mock_llm, cerebellum) so the test
+    can poke at any of them after the dispatch.
+
+    `lessons` is a list of (text, topic) tuples that the mocked
+    `generate_object` call will return as a `_DiffPassResult`. Empty
+    list means the LLM extracted no lessons from the buffered cells.
+    """
+    cortex = MagicMock()
+    cortex.encode = AsyncMock(return_value=["encoded"])
+
+    drafts = [
+        _LessonDraft(text=text, topic=topic) for text, topic in (lessons or [])
+    ]
+    llm = MagicMock()
+    llm.generate_object_code = AsyncMock(
+        return_value=_DiffPassResult(lessons=drafts)
+    )
+
+    cerebellum = Cerebellum(cortex=cortex, llm=llm)
+
+    session = MagicMock()
+    session._scratchpad_observers = [cerebellum]
+    session._cerebellum = cerebellum
+    session._cortex = cortex
+    session._llm = llm
+    session._record_cell_explainability = MagicMock()
+
+    pad = MagicMock()
+    pad.execute = AsyncMock(return_value=pad_returns)
+    session._scratchpads = MagicMock()
+    session._scratchpads.get_or_create = AsyncMock(return_value=pad)
+
+    return session, cortex, llm, cerebellum
+
+
+@pytest.mark.asyncio
+async def test_full_cerebellum_loop_with_errored_cell():
+    """Errored cell → dispatcher fires hooks → cerebellum buffers → flush
+    runs diff → lesson encoded via cortex."""
+    errored_cell = Cell(
+        code="import pandas as pd\ndf = pd.read_csv('mixed.csv')",
+        stdout="",
+        stderr="",
+        error=(
+            "DtypeWarning: Columns have mixed types. "
+            "Specify dtype option or set low_memory=False."
+        ),
+        description="Load mixed.csv into a DataFrame",
+    )
+    session, cortex, llm, cerebellum = _build_session_with_cerebellum(
+        pad_returns=errored_cell,
+        lessons=[
+            (
+                "When loading CSVs with mixed dtypes, pass low_memory=False to pd.read_csv.",
+                "scratchpad",
+            )
+        ],
+    )
+
+    # Step 1: dispatch the exec call — this exercises the dispatcher's
+    # observer firing path end-to-end
+    result = await handle_scratchpad(
+        session,
+        {
+            "action": "exec",
+            "name": "main",
+            "code": "import pandas as pd\ndf = pd.read_csv('mixed.csv')",
+            "one_line_description": "Load mixed.csv into a DataFrame",
+            "estimated_execution_time_seconds": 2,
+        },
+    )
+    assert isinstance(result, str)
+
+    # Step 2: cerebellum buffered the errored cell (cheap path skipped)
+    assert cerebellum.buffered_count == 1
+
+    # The diff LLM has NOT been called yet — that happens at flush
+    llm.generate_object_code.assert_not_called()
+    cortex.encode.assert_not_called()
+
+    # Step 3: simulate end-of-turn flush
+    lessons = await cerebellum.flush()
+
+    # The diff LLM was called once with the buffered cell, using the
+    # forced-tool-choice path via _DiffPassResult
+    llm.generate_object_code.assert_called_once()
+    call_args = llm.generate_object_code.call_args
+    assert call_args.args[0] is _DiffPassResult
+    diff_prompt = call_args.kwargs["messages"][0]["content"]
+    assert "Load mixed.csv into a DataFrame" in diff_prompt
+    assert "DtypeWarning" in diff_prompt
+
+    # Lesson was extracted
+    assert len(lessons) == 1
+    assert "low_memory" in lessons[0].text
+
+    # And encoded via cortex
+    cortex.encode.assert_awaited_once()
+    engrams = cortex.encode.call_args.args[0]
+    assert len(engrams) == 1
+    assert isinstance(engrams[0], Engram)
+    assert engrams[0].kind == "lesson"
+    assert engrams[0].topic == "scratchpad"
+    assert "low_memory" in engrams[0].text
+
+    # Buffer is now empty
+    assert cerebellum.buffered_count == 0
+
+
+@pytest.mark.asyncio
+async def test_clean_cell_never_triggers_diff():
+    """A successful cell should never reach the cerebellum's LLM diff."""
+    clean_cell = Cell(
+        code="print(1+1)",
+        stdout="2",
+        stderr="",
+        error=None,
+        description="add two",
+    )
+    session, cortex, llm, cerebellum = _build_session_with_cerebellum(
+        pad_returns=clean_cell,
+        lessons=[],
+    )
+
+    await handle_scratchpad(
+        session,
+        {
+            "action": "exec",
+            "name": "main",
+            "code": "print(1+1)",
+            "one_line_description": "add two",
+            "estimated_execution_time_seconds": 1,
+        },
+    )
+
+    # Cell was clean — never buffered
+    assert cerebellum.buffered_count == 0
+
+    # Even if we flush, no LLM call happens because buffer is empty
+    await cerebellum.flush()
+    llm.generate_object_code.assert_not_called()
+    cortex.encode.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_multiple_errored_cells_batched_into_single_diff():
+    """Multiple cells in one turn → ONE diff call at flush, not N."""
+    cell1 = Cell(
+        code="x = bad_func()",
+        stdout="",
+        stderr="",
+        error="NameError: name 'bad_func' is not defined",
+        description="call bad_func",
+    )
+    cell2 = Cell(
+        code="y = 1 / 0",
+        stdout="",
+        stderr="",
+        error="ZeroDivisionError: division by zero",
+        description="divide by zero",
+    )
+    session, cortex, llm, cerebellum = _build_session_with_cerebellum(
+        pad_returns=cell1,  # first call returns cell1
+        lessons=[
+            ("Define functions before calling.", "scratchpad"),
+            ("Guard against division by zero.", "scratchpad"),
+        ],
+    )
+
+    # First exec returns cell1
+    await handle_scratchpad(
+        session,
+        {
+            "action": "exec",
+            "name": "main",
+            "code": "x = bad_func()",
+            "one_line_description": "call bad_func",
+            "estimated_execution_time_seconds": 1,
+        },
+    )
+
+    # Re-mock pad.execute to return cell2 for the second call
+    session._scratchpads.get_or_create.return_value.execute = AsyncMock(
+        return_value=cell2
+    )
+    await handle_scratchpad(
+        session,
+        {
+            "action": "exec",
+            "name": "main",
+            "code": "y = 1 / 0",
+            "one_line_description": "divide by zero",
+            "estimated_execution_time_seconds": 1,
+        },
+    )
+
+    # Two cells buffered, zero LLM calls so far
+    assert cerebellum.buffered_count == 2
+    llm.generate_object_code.assert_not_called()
+
+    # One flush → one generate_object call → both lessons encoded together
+    await cerebellum.flush()
+    llm.generate_object_code.assert_called_once()  # ← THE KEY ASSERTION: batched per turn
+    cortex.encode.assert_awaited_once()
+    engrams = cortex.encode.call_args.args[0]
+    assert len(engrams) == 2
diff --git a/tests/test_connect_collector.py b/tests/test_connect_collector.py
index f62bca64..5a426907 100644
--- a/tests/test_connect_collector.py
+++ b/tests/test_connect_collector.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import json
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
@@ -8,6 +7,7 @@
 from anton.connect_collector import (
     ConnectionCollector,
     ExtractedData,
+    _ExtractionResult,
     extract_variables,
 )
 from anton.datasource_registry import AuthMethod, DatasourceEngine, DatasourceField
@@ -59,13 +59,31 @@ def _hubspot_choice_engine() -> DatasourceEngine:
     )
 
 
-def _mock_session_with_plan_response(content: str) -> MagicMock:
-    """Build a session mock whose `_llm.plan()` returns the given JSON content."""
+def _mock_session_with_extraction(
+    *,
+    variables: dict[str, str] | None = None,
+    is_redirect: bool = False,
+    redirect_engine: str = "",
+    redirect_reason: str = "",
+) -> MagicMock:
+    """Build a session whose `_llm.generate_object` returns a known result."""
+    extraction = _ExtractionResult(
+        variables=variables or {},
+        is_redirect=is_redirect,
+        redirect_engine=redirect_engine,
+        redirect_reason=redirect_reason,
+    )
     session = MagicMock()
-    plan_response = MagicMock()
-    plan_response.content = content
     session._llm = MagicMock()
-    session._llm.plan = AsyncMock(return_value=plan_response)
+    session._llm.generate_object = AsyncMock(return_value=extraction)
+    return session
+
+
+def _mock_session_raising(exc: Exception) -> MagicMock:
+    """Build a session whose `_llm.generate_object` raises the given exception."""
+    session = MagicMock()
+    session._llm = MagicMock()
+    session._llm.generate_object = AsyncMock(side_effect=exc)
     return session
 
 
@@ -186,7 +204,7 @@ class TestExtractVariables:
     @pytest.mark.asyncio
     async def test_empty_input_returns_empty(self):
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response("{}")
+        session = _mock_session_with_extraction()
         result = await extract_variables(
             "",
             expected_fields=engine.fields,
@@ -198,24 +216,17 @@ async def test_empty_input_returns_empty(self):
         assert result.variables == {}
         assert not result.is_redirect
         # Empty input shouldn't even call the LLM
-        session._llm.plan.assert_not_called()
+        session._llm.generate_object.assert_not_called()
 
     @pytest.mark.asyncio
     async def test_llm_extracts_variables_from_bulk_input(self):
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            json.dumps(
-                {
-                    "variables": {
-                        "host": "db.example.com",
-                        "port": "5432",
-                        "user": "admin",
-                    },
-                    "is_redirect": False,
-                    "redirect_engine": "",
-                    "redirect_reason": "",
-                }
-            )
+        session = _mock_session_with_extraction(
+            variables={
+                "host": "db.example.com",
+                "port": "5432",
+                "user": "admin",
+            },
         )
         result = await extract_variables(
             "host=db.example.com port=5432 user=admin",
@@ -232,24 +243,34 @@ async def test_llm_extracts_variables_from_bulk_input(self):
         }
         assert not result.is_redirect
 
+    @pytest.mark.asyncio
+    async def test_passes_extraction_result_schema_to_llm(self):
+        """Verify generate_object was called with the right schema class."""
+        engine = _postgres_engine()
+        session = _mock_session_with_extraction(variables={"host": "db.x"})
+        await extract_variables(
+            "host=db.x",
+            expected_fields=engine.fields,
+            current_engine="postgres",
+            current_engine_display="PostgreSQL",
+            known_engine_slugs=["postgres"],
+            session=session,
+        )
+        session._llm.generate_object.assert_called_once()
+        call_args = session._llm.generate_object.call_args
+        assert call_args.args[0] is _ExtractionResult
+
     @pytest.mark.asyncio
     async def test_llm_parses_connection_string(self):
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            json.dumps(
-                {
-                    "variables": {
-                        "host": "db.example.com",
-                        "port": "5432",
-                        "user": "admin",
-                        "password": "secret",
-                        "database": "mydb",
-                    },
-                    "is_redirect": False,
-                    "redirect_engine": "",
-                    "redirect_reason": "",
-                }
-            )
+        session = _mock_session_with_extraction(
+            variables={
+                "host": "db.example.com",
+                "port": "5432",
+                "user": "admin",
+                "password": "secret",
+                "database": "mydb",
+            },
         )
         result = await extract_variables(
             "postgres://admin:secret@db.example.com:5432/mydb",
@@ -267,19 +288,12 @@ async def test_llm_parses_connection_string(self):
     @pytest.mark.asyncio
     async def test_llm_resolves_aliases(self):
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            json.dumps(
-                {
-                    "variables": {
-                        "host": "db.x",
-                        "user": "admin",
-                        "password": "secret",
-                    },
-                    "is_redirect": False,
-                    "redirect_engine": "",
-                    "redirect_reason": "",
-                }
-            )
+        session = _mock_session_with_extraction(
+            variables={
+                "host": "db.x",
+                "user": "admin",
+                "password": "secret",
+            },
         )
         result = await extract_variables(
             "hostname=db.x username=admin pwd=secret",
@@ -298,15 +312,11 @@ async def test_llm_resolves_aliases(self):
     @pytest.mark.asyncio
     async def test_llm_detects_redirect(self):
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            json.dumps(
-                {
-                    "variables": {},
-                    "is_redirect": True,
-                    "redirect_engine": "mysql",
-                    "redirect_reason": "user wants mysql instead",
-                }
-            )
+        session = _mock_session_with_extraction(
+            variables={},
+            is_redirect=True,
+            redirect_engine="mysql",
+            redirect_reason="user wants mysql instead",
         )
         result = await extract_variables(
             "actually let's use mysql instead",
@@ -323,18 +333,11 @@ async def test_llm_detects_redirect(self):
     @pytest.mark.asyncio
     async def test_llm_ignores_fields_not_in_expected_list(self):
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            json.dumps(
-                {
-                    "variables": {
-                        "host": "db.x",
-                        "bogus_field": "should be dropped",
-                    },
-                    "is_redirect": False,
-                    "redirect_engine": "",
-                    "redirect_reason": "",
-                }
-            )
+        session = _mock_session_with_extraction(
+            variables={
+                "host": "db.x",
+                "bogus_field": "should be dropped",
+            },
         )
         result = await extract_variables(
             "host=db.x bogus=y",
@@ -347,47 +350,10 @@ async def test_llm_ignores_fields_not_in_expected_list(self):
         assert result.variables == {"host": "db.x"}
         assert "bogus_field" not in result.variables
 
-    @pytest.mark.asyncio
-    async def test_llm_strips_markdown_fences(self):
-        engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            '```json\n{"variables": {"host": "db.x"}, '
-            '"is_redirect": false, "redirect_engine": "", '
-            '"redirect_reason": ""}\n```'
-        )
-        result = await extract_variables(
-            "host=db.x",
-            expected_fields=engine.fields,
-            current_engine="postgres",
-            current_engine_display="PostgreSQL",
-            known_engine_slugs=["postgres"],
-            session=session,
-        )
-        assert result.variables == {"host": "db.x"}
-
-    @pytest.mark.asyncio
-    async def test_invalid_json_returns_empty_result(self):
-        engine = _postgres_engine()
-        session = _mock_session_with_plan_response("this is not JSON at all")
-        result = await extract_variables(
-            "some input",
-            expected_fields=engine.fields,
-            current_engine="postgres",
-            current_engine_display="PostgreSQL",
-            known_engine_slugs=["postgres"],
-            session=session,
-        )
-        # Invalid JSON is caught → empty result, caller will fall back
-        # to treating the raw text as the next field's value
-        assert result.variables == {}
-        assert not result.is_redirect
-
     @pytest.mark.asyncio
     async def test_llm_exception_returns_empty_result(self):
         engine = _postgres_engine()
-        session = MagicMock()
-        session._llm = MagicMock()
-        session._llm.plan = AsyncMock(side_effect=RuntimeError("network error"))
+        session = _mock_session_raising(RuntimeError("network error"))
         result = await extract_variables(
             "host=db.x",
             expected_fields=engine.fields,
@@ -401,27 +367,26 @@ async def test_llm_exception_returns_empty_result(self):
         assert not result.is_redirect
 
     @pytest.mark.asyncio
-    async def test_coerces_numeric_values_to_strings(self):
+    async def test_validation_error_returns_empty_result(self):
+        """If generate_object raises a Pydantic ValidationError (rare with
+        forced tool_choice, but possible), we fall back to empty result."""
+        from pydantic import ValidationError as _PVE
+
         engine = _postgres_engine()
-        session = _mock_session_with_plan_response(
-            json.dumps(
-                {
-                    "variables": {"port": 5432},  # LLM returned int
-                    "is_redirect": False,
-                    "redirect_engine": "",
-                    "redirect_reason": "",
-                }
-            )
-        )
+        try:
+            _ExtractionResult.model_validate({"variables": "not a dict"})
+        except _PVE as exc:
+            session = _mock_session_raising(exc)
         result = await extract_variables(
-            "port is 5432",
+            "anything",
             expected_fields=engine.fields,
             current_engine="postgres",
             current_engine_display="PostgreSQL",
             known_engine_slugs=["postgres"],
             session=session,
         )
-        assert result.variables == {"port": "5432"}
+        assert result.variables == {}
+        assert not result.is_redirect
 
 
 # ─────────────────────────────────────────────────────────────────────────────
diff --git a/tests/test_consolidator.py b/tests/test_consolidator.py
index b262f746..153f741b 100644
--- a/tests/test_consolidator.py
+++ b/tests/test_consolidator.py
@@ -5,7 +5,11 @@
 
 import pytest
 
-from anton.core.memory.consolidator import Consolidator
+from anton.core.memory.consolidator import (
+    Consolidator,
+    _ConsolidatedLesson,
+    _ConsolidatedLessons,
+)
 
 
 @dataclass
@@ -56,9 +60,18 @@ async def test_extracts_lessons(self, consolidator):
         ]
 
         mock_llm = AsyncMock()
-        mock_llm.code = AsyncMock(return_value=type("R", (), {
-            "content": '[{"text": "Always validate JSON before parsing", "kind": "always", "scope": "global", "confidence": "high"}]'
-        })())
+        mock_llm.generate_object_code = AsyncMock(
+            return_value=_ConsolidatedLessons(
+                items=[
+                    _ConsolidatedLesson(
+                        text="Always validate JSON before parsing",
+                        kind="always",
+                        scope="global",
+                        confidence="high",
+                    )
+                ]
+            )
+        )
 
         engrams = await consolidator.replay_and_extract(cells, mock_llm)
         assert len(engrams) == 1
@@ -69,7 +82,9 @@ async def test_extracts_lessons(self, consolidator):
     async def test_handles_empty_response(self, consolidator):
         cells = [MockCell(), MockCell()]
         mock_llm = AsyncMock()
-        mock_llm.code = AsyncMock(return_value=type("R", (), {"content": "[]"})())
+        mock_llm.generate_object_code = AsyncMock(
+            return_value=_ConsolidatedLessons(items=[])
+        )
 
         engrams = await consolidator.replay_and_extract(cells, mock_llm)
         assert engrams == []
@@ -77,28 +92,39 @@ async def test_handles_empty_response(self, consolidator):
     async def test_handles_llm_failure(self, consolidator):
         cells = [MockCell(), MockCell()]
         mock_llm = AsyncMock()
-        mock_llm.code = AsyncMock(side_effect=Exception("API error"))
+        mock_llm.generate_object_code = AsyncMock(side_effect=Exception("API error"))
 
         engrams = await consolidator.replay_and_extract(cells, mock_llm)
         assert engrams == []
 
-    async def test_handles_markdown_fenced_json(self, consolidator):
-        cells = [MockCell(description="test", error="SomeError")]
+    async def test_skips_blank_text_entries(self, consolidator):
+        """Defensive: even with forced schema, blank text should be skipped."""
+        cells = [MockCell(), MockCell()]
         mock_llm = AsyncMock()
-        mock_llm.code = AsyncMock(return_value=type("R", (), {
-            "content": '```json\n[{"text": "Handle errors gracefully", "kind": "lesson", "scope": "project"}]\n```'
-        })())
+        mock_llm.generate_object_code = AsyncMock(
+            return_value=_ConsolidatedLessons(
+                items=[
+                    _ConsolidatedLesson(text="valid", kind="lesson", scope="global"),
+                    _ConsolidatedLesson(text="   ", kind="lesson", scope="project"),
+                ]
+            )
+        )
 
         engrams = await consolidator.replay_and_extract(cells, mock_llm)
         assert len(engrams) == 1
-        assert engrams[0].text == "Handle errors gracefully"
+        assert engrams[0].text == "valid"
 
-    async def test_invalid_entries_skipped(self, consolidator):
+    async def test_caps_at_five_lessons(self, consolidator):
         cells = [MockCell(), MockCell()]
         mock_llm = AsyncMock()
-        mock_llm.code = AsyncMock(return_value=type("R", (), {
-            "content": '[{"text": "valid", "kind": "lesson", "scope": "global"}, {"bad": "entry"}, "not a dict"]'
-        })())
+        mock_llm.generate_object_code = AsyncMock(
+            return_value=_ConsolidatedLessons(
+                items=[
+                    _ConsolidatedLesson(text=f"lesson {i}")
+                    for i in range(10)
+                ]
+            )
+        )
 
         engrams = await consolidator.replay_and_extract(cells, mock_llm)
-        assert len(engrams) == 1
+        assert len(engrams) == 5
diff --git a/tests/test_cortex.py b/tests/test_cortex.py
index 02a12d14..17288058 100644
--- a/tests/test_cortex.py
+++ b/tests/test_cortex.py
@@ -150,12 +150,16 @@ async def test_off_mode_does_nothing(self, dirs):
         mock_llm = AsyncMock()
         cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="off", llm_client=mock_llm)
         await cortex.maybe_update_identity("I'm Jorge")
-        mock_llm.code.assert_not_called()
+        mock_llm.generate_object_code.assert_not_called()
 
     async def test_extracts_identity(self, dirs):
+        from anton.core.memory.cortex import _IdentityFacts
+
         g, p = dirs
         mock_llm = AsyncMock()
-        mock_llm.code = AsyncMock(return_value=type("R", (), {"content": '["Name: Jorge"]'})())
+        mock_llm.generate_object_code = AsyncMock(
+            return_value=_IdentityFacts(facts=["Name: Jorge"])
+        )
         cortex = Cortex(global_hc=Hippocampus(g), project_hc=Hippocampus(p), mode="copilot", llm_client=mock_llm)
         await cortex.maybe_update_identity("Hi, I'm Jorge")
         assert (g / "profile.md").exists()
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index 344928af..b26e4205 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -143,13 +143,35 @@ def registry(datasources_md):
 
 @pytest.fixture()
 def make_session():
-    """Factory that creates a fresh ChatSession with mocked scratchpads."""
+    """Factory that creates a fresh ChatSession with mocked scratchpads.
+
+    The default `generate_object` dispatcher returns a sensible empty
+    instance for whichever Pydantic schema the production code asks
+    for. Tests that need a non-empty result should override
+    `session._llm.generate_object` after construction.
+    """
 
     def _factory():
+        from anton.connect_collector import _ExtractionResult
+
+        async def _default_generate_object(schema_class, **kwargs):
+            # Known extraction schemas → empty defaults so the call
+            # falls back to "no structured data" behavior. Unknown
+            # schemas → raise so the caller's try/except sees a clear
+            # failure (matching the pre-refactor behavior where
+            # json.loads would fail on the canned "UNKNOWN" content).
+            if schema_class is _ExtractionResult:
+                return _ExtractionResult()
+            raise RuntimeError(
+                f"test mock has no default for {schema_class.__name__}; "
+                "override session._llm.generate_object in this test"
+            )
+
         mock_llm = AsyncMock()
         plan_response = MagicMock()
         plan_response.content = "UNKNOWN"
         mock_llm.plan = AsyncMock(return_value=plan_response)
+        mock_llm.generate_object = AsyncMock(side_effect=_default_generate_object)
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         session._scratchpads = AsyncMock()
         return session
@@ -768,14 +790,23 @@ async def test_credentials_pasted_at_help_prompt(
         console = MagicMock()
         vault = DataVault(vault_dir=vault_dir)
 
-        # Mock the LLM to return a structured extraction for the paste
-        extract_response = MagicMock()
-        extract_response.content = (
-            '{"variables": {"host": "db.example.com", "port": "5432", '
-            '"database": "prod_db", "user": "alice", "password": "s3cr3t"}, '
-            '"is_redirect": false, "redirect_engine": "", "redirect_reason": ""}'
+        # Mock the LLM to return a structured extraction for the paste.
+        # connect_collector.extract_variables now uses generate_object
+        # with a Pydantic schema, so the mock returns the typed object.
+        from anton.connect_collector import _ExtractionResult
+        extract_response = _ExtractionResult(
+            variables={
+                "host": "db.example.com",
+                "port": "5432",
+                "database": "prod_db",
+                "user": "alice",
+                "password": "s3cr3t",
+            },
+            is_redirect=False,
+            redirect_engine="",
+            redirect_reason="",
         )
-        session._llm.plan = AsyncMock(return_value=extract_response)
+        session._llm.generate_object = AsyncMock(return_value=extract_response)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -1049,15 +1080,21 @@ async def test_bulk_key_value_extraction(
         console = MagicMock()
         vault = DataVault(vault_dir=vault_dir)
 
-        # Mock the LLM to return a structured JSON extraction when it sees
-        # the bulk key=value string.
-        bulk_response = MagicMock()
-        bulk_response.content = (
-            '{"variables": {"host": "db.example.com", "port": "5432", '
-            '"database": "prod_db", "user": "alice"}, '
-            '"is_redirect": false, "redirect_engine": "", "redirect_reason": ""}'
+        # Mock the LLM extraction to return a typed Pydantic result
+        # (connect_collector now uses generate_object with a schema).
+        from anton.connect_collector import _ExtractionResult
+        bulk_response = _ExtractionResult(
+            variables={
+                "host": "db.example.com",
+                "port": "5432",
+                "database": "prod_db",
+                "user": "alice",
+            },
+            is_redirect=False,
+            redirect_engine="",
+            redirect_reason="",
         )
-        session._llm.plan = AsyncMock(return_value=bulk_response)
+        session._llm.generate_object = AsyncMock(return_value=bulk_response)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -2107,11 +2144,19 @@ def test_reconnect_no_duplicate_secret_vars(self, vault_dir, registry):
 class TestAddCustomDatasourceFlow:
     """Tests for _handle_add_custom_datasource field-collection logic."""
 
-    def _make_llm_response(self, fields: list[dict], display_name: str = "MyDB") -> str:
-        """Return a JSON string mimicking the LLM's plan() response."""
-        import json as _json
+    def _make_spec(self, fields: list[dict], display_name: str = "MyDB"):
+        """Return a _CustomDatasourceSpec instance mimicking the LLM's response."""
+        from anton.commands.datasource import (
+            _CustomDatasourceField,
+            _CustomDatasourceSpec,
+        )
 
-        return _json.dumps({"display_name": display_name, "pip": "", "fields": fields})
+        return _CustomDatasourceSpec(
+            display_name=display_name,
+            pip="",
+            test_snippet="",
+            fields=[_CustomDatasourceField(**f) for f in fields],
+        )
 
     def _make_registry(self, tmp_path):
         """Return a minimal registry mock that accepts any slug."""
@@ -2121,12 +2166,10 @@ def _make_registry(self, tmp_path):
         reg.get.return_value = None  # triggers inline fallback
         return reg
 
-    def _make_llm(self, json_text: str):
-        """Return an AsyncMock LLM whose plan() returns json_text."""
+    def _make_llm(self, spec):
+        """Return an AsyncMock LLM whose generate_object() returns the spec."""
         llm = AsyncMock()
-        response = MagicMock()
-        response.content = json_text
-        llm.plan = AsyncMock(return_value=response)
+        llm.generate_object = AsyncMock(return_value=spec)
         return llm
 
     def _mock_ds_path(self, mock_path_cls, tmp_path):
@@ -2140,7 +2183,7 @@ async def test_missing_required_non_secret_field_prompts_user(
         """Required non-secret field without inline value triggers Prompt.ask."""
         session = make_session()
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "host",
@@ -2180,7 +2223,7 @@ async def test_missing_required_secret_field_prompts_user(
         """Required secret field without inline value triggers password prompt."""
         session = make_session()
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "api_key",
@@ -2218,7 +2261,7 @@ async def test_incomplete_custom_datasource_not_saved(self, tmp_path, make_sessi
         """Empty responses for all required fields causes a hard stop (None)."""
         session = make_session()
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "host",
@@ -2264,21 +2307,22 @@ class TestCustomDatasourceConnectFlow:
 
     # ── helpers (mirrors TestAddCustomDatasourceFlow) ────────────────────
 
-    def _make_llm_response(
+    def _make_spec(
         self,
         fields: list[dict],
         display_name: str = "My API Service",
         test_snippet: str = "",
-    ) -> str:
-        import json as _json
+    ):
+        from anton.commands.datasource import (
+            _CustomDatasourceField,
+            _CustomDatasourceSpec,
+        )
 
-        return _json.dumps(
-            {
-                "display_name": display_name,
-                "pip": "",
-                "test_snippet": test_snippet,
-                "fields": fields,
-            }
+        return _CustomDatasourceSpec(
+            display_name=display_name,
+            pip="",
+            test_snippet=test_snippet,
+            fields=[_CustomDatasourceField(**f) for f in fields],
         )
 
     def _make_registry(self, tmp_path):
@@ -2291,11 +2335,9 @@ def _make_registry(self, tmp_path):
         reg.get.return_value = None  # triggers inline fallback engine_def
         return reg
 
-    def _make_llm(self, json_text: str):
+    def _make_llm(self, spec):
         llm = AsyncMock()
-        response = MagicMock()
-        response.content = json_text
-        llm.plan = AsyncMock(return_value=response)
+        llm.generate_object = AsyncMock(return_value=spec)
         return llm
 
     def _mock_ds_path(self, mock_path_cls, tmp_path):
@@ -2313,7 +2355,7 @@ async def test_custom_with_test_snippet_success(
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "api_key",
@@ -2369,7 +2411,7 @@ async def test_custom_with_test_snippet_fail_no_retry(
         pad = make_pad(make_cell(stdout="", stderr="connection refused"))
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "api_key",
@@ -2422,7 +2464,7 @@ async def test_custom_with_test_snippet_fail_retry_success(
         ])
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "api_key",
@@ -2484,7 +2526,7 @@ async def test_custom_without_test_snippet_saves(
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
         session._llm = self._make_llm(
-            self._make_llm_response(
+            self._make_spec(
                 [
                     {
                         "name": "api_key",
diff --git a/tests/test_llm_client_generate_object.py b/tests/test_llm_client_generate_object.py
new file mode 100644
index 00000000..5f7fc55b
--- /dev/null
+++ b/tests/test_llm_client_generate_object.py
@@ -0,0 +1,320 @@
+"""Tests for `LLMClient.generate_object` — structured output via forced tool_choice."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+import pytest
+from pydantic import BaseModel
+
+from anton.core.llm.client import LLMClient
+from anton.core.llm.provider import LLMProvider, LLMResponse, ToolCall, Usage
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Pydantic schemas used by the tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class SimpleAnswer(BaseModel):
+    answer: str
+    confidence: float
+
+
+class Lesson(BaseModel):
+    text: str
+    topic: str = "default"
+
+
+class LessonBatch(BaseModel):
+    lessons: list[Lesson]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fake provider that records calls and returns canned responses
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _FakePlanningProvider(LLMProvider):
+    """Captures `complete` arguments and returns a pre-built LLMResponse."""
+
+    def __init__(self, response: LLMResponse) -> None:
+        self.response = response
+        self.complete_mock = AsyncMock(return_value=response)
+
+    async def complete(self, **kwargs):  # type: ignore[override]
+        return await self.complete_mock(**kwargs)
+
+
+def _make_client(provider: _FakePlanningProvider) -> LLMClient:
+    """Build an LLMClient where the planning provider is our fake."""
+    return LLMClient(
+        planning_provider=provider,
+        planning_model="test-model",
+        coding_provider=provider,  # reuse — we don't exercise the coding path
+        coding_model="test-model",
+        max_tokens=8192,
+    )
+
+
+def _tool_call_response(tool_name: str, payload: dict) -> LLMResponse:
+    """Build an LLMResponse that looks like the LLM made a forced tool call."""
+    return LLMResponse(
+        content="",
+        tool_calls=[ToolCall(id="t1", name=tool_name, input=payload)],
+        usage=Usage(input_tokens=10, output_tokens=20),
+        stop_reason="tool_use",
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Single-model generation
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestSingleModel:
+    @pytest.mark.asyncio
+    async def test_returns_validated_instance(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response(
+                "SimpleAnswer", {"answer": "42", "confidence": 0.95}
+            )
+        )
+        client = _make_client(provider)
+
+        result = await client.generate_object(
+            SimpleAnswer,
+            system="be terse",
+            messages=[{"role": "user", "content": "what's the answer?"}],
+        )
+
+        assert isinstance(result, SimpleAnswer)
+        assert result.answer == "42"
+        assert result.confidence == 0.95
+
+    @pytest.mark.asyncio
+    async def test_forces_tool_choice(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("SimpleAnswer", {"answer": "x", "confidence": 0.5})
+        )
+        client = _make_client(provider)
+
+        await client.generate_object(
+            SimpleAnswer,
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+        )
+
+        # Provider was called with tool_choice forcing the SimpleAnswer tool
+        provider.complete_mock.assert_awaited_once()
+        kwargs = provider.complete_mock.call_args.kwargs
+        assert kwargs["tool_choice"] == {"type": "tool", "name": "SimpleAnswer"}
+        # And the tool's schema came from the Pydantic model
+        assert len(kwargs["tools"]) == 1
+        tool = kwargs["tools"][0]
+        assert tool["name"] == "SimpleAnswer"
+        assert "input_schema" in tool
+        # The schema mentions both fields
+        schema_str = str(tool["input_schema"])
+        assert "answer" in schema_str
+        assert "confidence" in schema_str
+
+    @pytest.mark.asyncio
+    async def test_passes_system_and_messages_through(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("SimpleAnswer", {"answer": "x", "confidence": 0.5})
+        )
+        client = _make_client(provider)
+
+        await client.generate_object(
+            SimpleAnswer,
+            system="custom system",
+            messages=[{"role": "user", "content": "custom user"}],
+        )
+
+        kwargs = provider.complete_mock.call_args.kwargs
+        assert kwargs["system"] == "custom system"
+        assert kwargs["messages"] == [{"role": "user", "content": "custom user"}]
+        assert kwargs["model"] == "test-model"
+
+    @pytest.mark.asyncio
+    async def test_invalid_payload_raises_validation_error(self):
+        # Provider returns a payload missing the required `confidence` field
+        provider = _FakePlanningProvider(
+            _tool_call_response("SimpleAnswer", {"answer": "x"})
+        )
+        client = _make_client(provider)
+
+        with pytest.raises(Exception) as exc_info:
+            await client.generate_object(
+                SimpleAnswer,
+                system="x",
+                messages=[{"role": "user", "content": "y"}],
+            )
+        # Pydantic raises ValidationError, which we want surfaced
+        assert "confidence" in str(exc_info.value).lower()
+
+    @pytest.mark.asyncio
+    async def test_no_tool_call_raises_value_error(self):
+        # Provider returns text-only response — no tool call at all
+        provider = _FakePlanningProvider(
+            LLMResponse(content="just text", tool_calls=[])
+        )
+        client = _make_client(provider)
+
+        with pytest.raises(ValueError, match="did not return a tool call"):
+            await client.generate_object(
+                SimpleAnswer,
+                system="x",
+                messages=[{"role": "user", "content": "y"}],
+            )
+
+    @pytest.mark.asyncio
+    async def test_max_tokens_uses_default_when_not_specified(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("SimpleAnswer", {"answer": "x", "confidence": 0.5})
+        )
+        client = _make_client(provider)
+
+        await client.generate_object(
+            SimpleAnswer,
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+        )
+
+        kwargs = provider.complete_mock.call_args.kwargs
+        assert kwargs["max_tokens"] == 8192  # the client default
+
+    @pytest.mark.asyncio
+    async def test_max_tokens_override(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("SimpleAnswer", {"answer": "x", "confidence": 0.5})
+        )
+        client = _make_client(provider)
+
+        await client.generate_object(
+            SimpleAnswer,
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+            max_tokens=512,
+        )
+
+        kwargs = provider.complete_mock.call_args.kwargs
+        assert kwargs["max_tokens"] == 512
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# list[Model] generation
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestListModel:
+    @pytest.mark.asyncio
+    async def test_returns_typed_list(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response(
+                "Lesson_array",
+                {
+                    "items": [
+                        {"text": "first lesson", "topic": "scratchpad"},
+                        {"text": "second lesson", "topic": "scratchpad"},
+                    ]
+                },
+            )
+        )
+        client = _make_client(provider)
+
+        result = await client.generate_object(
+            list[Lesson],
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+        )
+
+        assert isinstance(result, list)
+        assert len(result) == 2
+        assert all(isinstance(item, Lesson) for item in result)
+        assert result[0].text == "first lesson"
+        assert result[1].text == "second lesson"
+
+    @pytest.mark.asyncio
+    async def test_list_uses_array_tool_name(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("Lesson_array", {"items": []})
+        )
+        client = _make_client(provider)
+
+        await client.generate_object(
+            list[Lesson],
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+        )
+
+        kwargs = provider.complete_mock.call_args.kwargs
+        assert kwargs["tool_choice"] == {"type": "tool", "name": "Lesson_array"}
+        assert kwargs["tools"][0]["name"] == "Lesson_array"
+
+    @pytest.mark.asyncio
+    async def test_empty_list_is_valid(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("Lesson_array", {"items": []})
+        )
+        client = _make_client(provider)
+
+        result = await client.generate_object(
+            list[Lesson],
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+        )
+
+        assert result == []
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Nested model (BaseModel containing list[BaseModel]) — the cerebellum case
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestNestedModel:
+    @pytest.mark.asyncio
+    async def test_lesson_batch_round_trip(self):
+        """The shape the cerebellum will use: a wrapper model with a list."""
+        provider = _FakePlanningProvider(
+            _tool_call_response(
+                "LessonBatch",
+                {
+                    "lessons": [
+                        {
+                            "text": "Use low_memory=False with mixed dtypes.",
+                            "topic": "scratchpad",
+                        }
+                    ]
+                },
+            )
+        )
+        client = _make_client(provider)
+
+        result = await client.generate_object(
+            LessonBatch,
+            system="extract lessons",
+            messages=[{"role": "user", "content": "cell errored"}],
+        )
+
+        assert isinstance(result, LessonBatch)
+        assert len(result.lessons) == 1
+        assert isinstance(result.lessons[0], Lesson)
+        assert "low_memory" in result.lessons[0].text
+
+    @pytest.mark.asyncio
+    async def test_empty_lessons_list_is_valid(self):
+        provider = _FakePlanningProvider(
+            _tool_call_response("LessonBatch", {"lessons": []})
+        )
+        client = _make_client(provider)
+
+        result = await client.generate_object(
+            LessonBatch,
+            system="x",
+            messages=[{"role": "user", "content": "y"}],
+        )
+
+        assert result.lessons == []
diff --git a/tests/test_llm_structured_helper.py b/tests/test_llm_structured_helper.py
new file mode 100644
index 00000000..61549b81
--- /dev/null
+++ b/tests/test_llm_structured_helper.py
@@ -0,0 +1,256 @@
+"""Unit tests for `anton.core.llm.structured` — pure helper functions.
+
+These verify the schema-building and unwrapping logic in isolation,
+without going through any LLM call. Both `LLMClient.generate_object`
+and `_ScratchpadLLM.generate_object` delegate to these functions, so
+locking their contract here is the foundation for both call sites.
+"""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import BaseModel, ValidationError
+
+from anton.core.llm.structured import (
+    build_structured_tool,
+    unwrap_structured_response,
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Pydantic schemas used by the tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class Answer(BaseModel):
+    text: str
+    confidence: float
+
+
+class Lesson(BaseModel):
+    text: str
+    topic: str = "default"
+
+
+class WrappedLessons(BaseModel):
+    """A wrapper model with a list field — the cerebellum's exact pattern."""
+
+    lessons: list[Lesson]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# build_structured_tool — single model
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestBuildStructuredToolSingle:
+    def test_returns_three_tuple(self):
+        result = build_structured_tool(Answer)
+        assert isinstance(result, tuple)
+        assert len(result) == 3
+
+    def test_tool_has_required_fields(self):
+        tool, _, _ = build_structured_tool(Answer)
+        assert "name" in tool
+        assert "description" in tool
+        assert "input_schema" in tool
+
+    def test_tool_name_is_class_name(self):
+        tool, _, _ = build_structured_tool(Answer)
+        assert tool["name"] == "Answer"
+
+    def test_input_schema_includes_field_names(self):
+        tool, _, _ = build_structured_tool(Answer)
+        schema_str = str(tool["input_schema"])
+        assert "text" in schema_str
+        assert "confidence" in schema_str
+
+    def test_validator_class_is_input_class(self):
+        _, validator_class, _ = build_structured_tool(Answer)
+        assert validator_class is Answer
+
+    def test_is_list_is_false_for_single_model(self):
+        _, _, is_list = build_structured_tool(Answer)
+        assert is_list is False
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# build_structured_tool — list[Model]
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestBuildStructuredToolList:
+    def test_tool_name_uses_array_suffix(self):
+        tool, _, _ = build_structured_tool(list[Lesson])
+        assert tool["name"] == "Lesson_array"
+
+    def test_validator_class_is_wrapper_not_inner(self):
+        _, validator_class, _ = build_structured_tool(list[Lesson])
+        # Wrapper is a synthetic class — it should NOT be Lesson itself
+        assert validator_class is not Lesson
+        # And it should have an `items` field
+        assert "items" in validator_class.model_fields
+
+    def test_is_list_is_true(self):
+        _, _, is_list = build_structured_tool(list[Lesson])
+        assert is_list is True
+
+    def test_input_schema_has_items_array(self):
+        tool, _, _ = build_structured_tool(list[Lesson])
+        schema_str = str(tool["input_schema"])
+        assert "items" in schema_str
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# build_structured_tool — wrapper model with list field (cerebellum pattern)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestBuildStructuredToolWrapper:
+    def test_wrapper_treated_as_single_model(self):
+        tool, validator_class, is_list = build_structured_tool(WrappedLessons)
+        # WrappedLessons IS a BaseModel that happens to contain a list,
+        # but the helper should treat it as a single model (not list[T])
+        # because the input wasn't a list[X] annotation.
+        assert is_list is False
+        assert validator_class is WrappedLessons
+        assert tool["name"] == "WrappedLessons"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# unwrap_structured_response — single model
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestUnwrapSingleModel:
+    def test_valid_payload_returns_instance(self):
+        _, validator_class, is_list = build_structured_tool(Answer)
+        result = unwrap_structured_response(
+            {"text": "42", "confidence": 0.95},
+            validator_class,
+            is_list,
+        )
+        assert isinstance(result, Answer)
+        assert result.text == "42"
+        assert result.confidence == 0.95
+
+    def test_missing_required_field_raises(self):
+        _, validator_class, is_list = build_structured_tool(Answer)
+        with pytest.raises(ValidationError) as exc_info:
+            unwrap_structured_response(
+                {"text": "no confidence here"}, validator_class, is_list
+            )
+        assert "confidence" in str(exc_info.value).lower()
+
+    def test_wrong_type_raises(self):
+        _, validator_class, is_list = build_structured_tool(Answer)
+        with pytest.raises(ValidationError):
+            unwrap_structured_response(
+                {"text": "x", "confidence": "not a number"},
+                validator_class,
+                is_list,
+            )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# unwrap_structured_response — list[Model]
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestUnwrapList:
+    def test_valid_list_returns_typed_items(self):
+        _, validator_class, is_list = build_structured_tool(list[Lesson])
+        result = unwrap_structured_response(
+            {
+                "items": [
+                    {"text": "first", "topic": "scratchpad"},
+                    {"text": "second", "topic": "default"},
+                ]
+            },
+            validator_class,
+            is_list,
+        )
+        assert isinstance(result, list)
+        assert len(result) == 2
+        assert all(isinstance(item, Lesson) for item in result)
+        assert result[0].text == "first"
+        assert result[1].topic == "default"
+
+    def test_empty_list_returns_empty(self):
+        _, validator_class, is_list = build_structured_tool(list[Lesson])
+        result = unwrap_structured_response(
+            {"items": []}, validator_class, is_list
+        )
+        assert result == []
+
+    def test_missing_items_field_raises(self):
+        _, validator_class, is_list = build_structured_tool(list[Lesson])
+        with pytest.raises(ValidationError):
+            unwrap_structured_response({}, validator_class, is_list)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# unwrap_structured_response — wrapper model with list (cerebellum pattern)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestUnwrapWrapperModel:
+    def test_round_trip(self):
+        _, validator_class, is_list = build_structured_tool(WrappedLessons)
+        result = unwrap_structured_response(
+            {
+                "lessons": [
+                    {"text": "lesson 1", "topic": "scratchpad"},
+                    {"text": "lesson 2", "topic": "scratchpad"},
+                ]
+            },
+            validator_class,
+            is_list,
+        )
+        # Returns the wrapper instance, NOT the items list (because the
+        # original input wasn't a list[T] annotation — it was the
+        # wrapper class directly)
+        assert isinstance(result, WrappedLessons)
+        assert len(result.lessons) == 2
+        assert result.lessons[0].text == "lesson 1"
+
+    def test_empty_lessons_list_is_valid(self):
+        _, validator_class, is_list = build_structured_tool(WrappedLessons)
+        result = unwrap_structured_response(
+            {"lessons": []}, validator_class, is_list
+        )
+        assert isinstance(result, WrappedLessons)
+        assert result.lessons == []
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Round-trip — build then unwrap, mimicking the full flow
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestRoundTrip:
+    def test_single_model_round_trip(self):
+        tool, validator_class, is_list = build_structured_tool(Answer)
+        # Simulate the LLM calling the tool with this input
+        simulated_tool_input = {"text": "yes", "confidence": 0.7}
+        result = unwrap_structured_response(
+            simulated_tool_input, validator_class, is_list
+        )
+        assert result.text == "yes"
+        assert result.confidence == 0.7
+        # Sanity check the tool name was right
+        assert tool["name"] == "Answer"
+
+    def test_list_model_round_trip(self):
+        tool, validator_class, is_list = build_structured_tool(list[Lesson])
+        simulated = {
+            "items": [
+                {"text": "x", "topic": "a"},
+                {"text": "y", "topic": "b"},
+                {"text": "z", "topic": "c"},
+            ]
+        }
+        result = unwrap_structured_response(simulated, validator_class, is_list)
+        assert len(result) == 3
+        assert [l.text for l in result] == ["x", "y", "z"]
+        assert tool["name"] == "Lesson_array"
diff --git a/tests/test_prompt_builder_skills.py b/tests/test_prompt_builder_skills.py
new file mode 100644
index 00000000..87631eb6
--- /dev/null
+++ b/tests/test_prompt_builder_skills.py
@@ -0,0 +1,144 @@
+"""Tests for the procedural-memory section in the chat system prompt."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.memory.skills import Skill, SkillStore
+
+
+@pytest.fixture()
+def empty_store(tmp_path: Path) -> SkillStore:
+    return SkillStore(root=tmp_path / "skills_empty")
+
+
+@pytest.fixture()
+def populated_store(tmp_path: Path) -> SkillStore:
+    s = SkillStore(root=tmp_path / "skills_populated")
+    for label, when in [
+        ("csv_summary", "User asks to explore or summarize a CSV file."),
+        ("web_scraping", "User asks to scrape data from a website."),
+        ("api_fetcher", "User asks to fetch data from a JSON API."),
+    ]:
+        s.save(
+            Skill(
+                label=label,
+                name=label.replace("_", " ").title(),
+                description="",
+                when_to_use=when,
+                declarative_md="step 1\nstep 2",
+                created_at="2026-04-10T12:00:00+00:00",
+                provenance="manual",
+            )
+        )
+    return s
+
+
+def _build_prompt(builder: ChatSystemPromptBuilder, **overrides) -> str:
+    defaults = dict(
+        current_datetime="2026-04-10T12:00:00+00:00",
+        runtime_context="test runtime",
+        proactive_dashboards=False,
+        output_dir="/tmp/anton_out",
+    )
+    defaults.update(overrides)
+    return builder.build(**defaults)
+
+
+class TestProceduralMemorySection:
+    def test_no_store_omits_section(self):
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=None)
+        assert "Procedural memory" not in prompt
+
+    def test_empty_store_omits_section(self, empty_store: SkillStore):
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=empty_store)
+        assert "Procedural memory" not in prompt
+
+    def test_populated_store_renders_section(self, populated_store: SkillStore):
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=populated_store)
+        assert "## Procedural memory" in prompt
+        # All labels are listed
+        assert "`csv_summary`" in prompt
+        assert "`web_scraping`" in prompt
+        assert "`api_fetcher`" in prompt
+        # And their when_to_use
+        assert "explore or summarize a CSV" in prompt
+        assert "scrape data from a website" in prompt
+        assert "fetch data from a JSON API" in prompt
+
+    def test_section_mentions_recall_skill_tool(
+        self, populated_store: SkillStore
+    ):
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=populated_store)
+        # The section instructs the LLM how to use them
+        assert "recall_skill" in prompt
+
+    def test_section_appears_after_other_contexts(
+        self, populated_store: SkillStore
+    ):
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(
+            builder,
+            memory_context="\n\n## Memory context\nMEMORY HERE",
+            datasource_context="\n\n## Datasources\nDS HERE",
+            skill_store=populated_store,
+        )
+        # Procedural memory should appear AFTER datasource_context
+        memory_pos = prompt.find("MEMORY HERE")
+        ds_pos = prompt.find("DS HERE")
+        proc_pos = prompt.find("## Procedural memory")
+        assert memory_pos != -1
+        assert ds_pos != -1
+        assert proc_pos != -1
+        assert proc_pos > ds_pos
+        assert proc_pos > memory_pos
+
+    def test_section_is_compact(self, populated_store: SkillStore):
+        """Sanity check: ~50 tokens per skill or less.
+
+        We don't want the procedural memory section to dominate the
+        prompt — it's a navigational index, not the actual procedures.
+        """
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=populated_store)
+        section_start = prompt.find("## Procedural memory")
+        section = prompt[section_start:]
+        # Three skills, header + intro + bullets — keep it under ~600 chars
+        assert len(section) < 1000
+
+    def test_handles_skill_without_when_to_use(self, tmp_path: Path):
+        s = SkillStore(root=tmp_path / "skills_partial")
+        s.save(
+            Skill(
+                label="bare",
+                name="Bare",
+                description="",
+                when_to_use="",
+                declarative_md="x",
+                created_at="2026-04-10T12:00:00+00:00",
+                provenance="manual",
+            )
+        )
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=s)
+        assert "`bare`" in prompt
+        # No crash, even with no when_to_use
+
+    def test_skip_section_when_store_raises(self, tmp_path: Path, monkeypatch):
+        """If the store blows up at read time, the section is omitted gracefully."""
+        s = SkillStore(root=tmp_path / "skills_broken")
+
+        def boom(self):
+            raise RuntimeError("disk on fire")
+
+        monkeypatch.setattr(SkillStore, "list_summaries", boom)
+        builder = ChatSystemPromptBuilder()
+        prompt = _build_prompt(builder, skill_store=s)
+        assert "Procedural memory" not in prompt
diff --git a/tests/test_recall_skill.py b/tests/test_recall_skill.py
new file mode 100644
index 00000000..2f53d693
--- /dev/null
+++ b/tests/test_recall_skill.py
@@ -0,0 +1,218 @@
+"""Tests for the `recall_skill` tool handler."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from anton.core.memory.skills import Skill, SkillStore
+from anton.core.tools.recall_skill import (
+    RECALL_SKILL_TOOL,
+    handle_recall_skill,
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.fixture()
+def store(tmp_path: Path) -> SkillStore:
+    s = SkillStore(root=tmp_path / "skills")
+    s.save(
+        Skill(
+            label="csv_summary",
+            name="CSV Summary",
+            description="Load a CSV, infer schema, compute summary stats.",
+            when_to_use="User asks to explore, summarize, or describe a CSV file.",
+            declarative_md="1. Load the CSV.\n2. Infer types.\n3. Print summary.",
+            created_at="2026-04-10T12:00:00+00:00",
+            provenance="manual",
+        )
+    )
+    s.save(
+        Skill(
+            label="web_scraping",
+            name="Web Scraping",
+            description="Fetch and parse HTML to extract structured data.",
+            when_to_use="User asks to scrape or extract data from a webpage.",
+            declarative_md="1. Fetch the URL.\n2. Parse with BeautifulSoup.\n3. Select elements.",
+            created_at="2026-04-10T12:00:00+00:00",
+            provenance="manual",
+        )
+    )
+    s.save(
+        Skill(
+            label="api_fetcher",
+            name="API Fetcher",
+            description="Call a REST API and normalize the response.",
+            when_to_use="User asks to fetch data from a JSON API.",
+            declarative_md="1. Identify auth.\n2. Call endpoint.\n3. Normalize.",
+            created_at="2026-04-10T12:00:00+00:00",
+            provenance="manual",
+        )
+    )
+    return s
+
+
+def _session_with(store: SkillStore) -> SimpleNamespace:
+    """Build a minimal session-like object exposing only `_skill_store`."""
+    return SimpleNamespace(_skill_store=store)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Tool def basics
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestToolDef:
+    def test_tool_name(self):
+        assert RECALL_SKILL_TOOL.name == "recall_skill"
+
+    def test_required_label_param(self):
+        schema = RECALL_SKILL_TOOL.input_schema
+        assert "label" in schema["properties"]
+        assert schema["required"] == ["label"]
+
+    def test_handler_is_wired(self):
+        assert RECALL_SKILL_TOOL.handler is handle_recall_skill
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Happy path
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestExactMatch:
+    @pytest.mark.asyncio
+    async def test_returns_procedure(self, store: SkillStore):
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {"label": "csv_summary"})
+        assert "CSV Summary" in result
+        assert "Load the CSV" in result
+        assert "Infer types" in result
+        assert "Print summary" in result
+
+    @pytest.mark.asyncio
+    async def test_increments_recommended_counter(self, store: SkillStore):
+        session = _session_with(store)
+        await handle_recall_skill(session, {"label": "csv_summary"})
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 1
+
+    @pytest.mark.asyncio
+    async def test_repeated_calls_accumulate(self, store: SkillStore):
+        session = _session_with(store)
+        for _ in range(3):
+            await handle_recall_skill(session, {"label": "web_scraping"})
+        loaded = store.load("web_scraping")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 3
+        assert loaded.stats.total_recalls == 3
+
+    @pytest.mark.asyncio
+    async def test_does_not_cross_contaminate(self, store: SkillStore):
+        session = _session_with(store)
+        await handle_recall_skill(session, {"label": "csv_summary"})
+        await handle_recall_skill(session, {"label": "csv_summary"})
+        await handle_recall_skill(session, {"label": "web_scraping"})
+        csv = store.load("csv_summary")
+        web = store.load("web_scraping")
+        api = store.load("api_fetcher")
+        assert csv is not None and csv.stats.stage_1.recommended == 2
+        assert web is not None and web.stats.stage_1.recommended == 1
+        assert api is not None and api.stats.stage_1.recommended == 0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Typo fallback
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestTypoFallback:
+    @pytest.mark.asyncio
+    async def test_typo_returns_closest_match(self, store: SkillStore):
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {"label": "csv_sumary"})
+        assert "⚠" in result
+        assert "csv_summary" in result
+        # The full procedure is still included after the warning
+        assert "Load the CSV" in result
+
+    @pytest.mark.asyncio
+    async def test_typo_credits_resolved_label_not_input(self, store: SkillStore):
+        session = _session_with(store)
+        await handle_recall_skill(session, {"label": "csv_sumary"})
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 1
+
+    @pytest.mark.asyncio
+    async def test_dash_to_underscore_recovered(self, store: SkillStore):
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {"label": "web-scraping"})
+        assert "web_scraping" in result
+        # Could match exactly via slugify, in which case there's no warning,
+        # or via fuzzy match. Either way the procedure should be returned.
+        assert "BeautifulSoup" in result
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Unknown / error paths
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestUnknownSlug:
+    @pytest.mark.asyncio
+    async def test_unrelated_returns_no_match_with_listing(
+        self, store: SkillStore
+    ):
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {"label": "xyzzy_quark"})
+        assert "NO MATCH" in result
+        # Should mention all available labels
+        assert "csv_summary" in result
+        assert "web_scraping" in result
+        assert "api_fetcher" in result
+
+    @pytest.mark.asyncio
+    async def test_unrelated_does_not_increment_counters(
+        self, store: SkillStore
+    ):
+        session = _session_with(store)
+        await handle_recall_skill(session, {"label": "xyzzy_quark"})
+        for label in ("csv_summary", "web_scraping", "api_fetcher"):
+            loaded = store.load(label)
+            assert loaded is not None
+            assert loaded.stats.stage_1.recommended == 0
+            assert loaded.stats.total_recalls == 0
+
+    @pytest.mark.asyncio
+    async def test_empty_label_returns_error(self, store: SkillStore):
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {"label": ""})
+        assert "ERROR" in result
+
+    @pytest.mark.asyncio
+    async def test_missing_label_returns_error(self, store: SkillStore):
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {})
+        assert "ERROR" in result
+
+    @pytest.mark.asyncio
+    async def test_no_store_on_session_returns_error(self):
+        session = SimpleNamespace()  # no _skill_store
+        result = await handle_recall_skill(session, {"label": "csv_summary"})
+        assert "ERROR" in result
+
+    @pytest.mark.asyncio
+    async def test_empty_store_unrelated_label(self, tmp_path: Path):
+        store = SkillStore(root=tmp_path / "empty_skills")
+        session = _session_with(store)
+        result = await handle_recall_skill(session, {"label": "anything"})
+        assert "NO MATCH" in result
+        assert "empty" in result.lower()
diff --git a/tests/test_scratchpad_observer_dispatch.py b/tests/test_scratchpad_observer_dispatch.py
new file mode 100644
index 00000000..1a99c4a5
--- /dev/null
+++ b/tests/test_scratchpad_observer_dispatch.py
@@ -0,0 +1,290 @@
+"""Tests for the scratchpad observer dispatch in `handle_scratchpad`.
+
+These tests verify that the observer hooks fire at the right moments
+around `pad.execute()` and that the runtime stays untouched. They use
+a fake observer (not the real Cerebellum) so we can assert exactly
+what was passed and in what order.
+
+The dispatcher pattern: observation is an orchestration concern that
+lives at the dispatcher layer. The runtime is a pure execution engine
+and never sees observers. These tests pin that contract.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from anton.core.backends.base import Cell
+from anton.core.tools.tool_handlers import (
+    _fire_post_execute,
+    _fire_pre_execute,
+    handle_scratchpad,
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fake observer that records the cells it sees
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class _RecordingObserver:
+    def __init__(self, *, pre_raises: Exception | None = None,
+                 post_raises: Exception | None = None) -> None:
+        self.pre_calls: list[Cell] = []
+        self.post_calls: list[Cell] = []
+        self._pre_raises = pre_raises
+        self._post_raises = post_raises
+
+    async def on_pre_execute(self, cell: Cell) -> None:
+        self.pre_calls.append(cell)
+        if self._pre_raises is not None:
+            raise self._pre_raises
+
+    async def on_post_execute(self, cell: Cell) -> None:
+        self.post_calls.append(cell)
+        if self._post_raises is not None:
+            raise self._post_raises
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def _fake_session(
+    observers: list | None = None,
+    cell_to_return: Cell | None = None,
+) -> MagicMock:
+    """Build a minimal session-like mock that handle_scratchpad can drive."""
+    session = MagicMock()
+    if observers is not None:
+        session._scratchpad_observers = observers
+    else:
+        session._scratchpad_observers = []
+    session._record_cell_explainability = MagicMock()
+
+    # The cell that pad.execute() will return when called
+    if cell_to_return is None:
+        cell_to_return = Cell(
+            code="print('hi')",
+            stdout="hi",
+            stderr="",
+            error=None,
+            description="say hi",
+        )
+    pad = MagicMock()
+    pad.execute = AsyncMock(return_value=cell_to_return)
+    pad.view = MagicMock(return_value="view output")
+    pad.render_notebook = MagicMock(return_value="notebook")
+    pad.reset = AsyncMock()
+    pad.install_packages = AsyncMock(return_value="installed")
+
+    session._scratchpads = MagicMock()
+    session._scratchpads.get_or_create = AsyncMock(return_value=pad)
+    session._scratchpads.pads = {"main": pad}
+    session._scratchpads.remove = AsyncMock(return_value="removed 'main'.")
+    return session, pad
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# _fire_pre_execute / _fire_post_execute helpers — direct unit tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestFireHelpers:
+    @pytest.mark.asyncio
+    async def test_no_observers_is_noop(self):
+        session = SimpleNamespace(_scratchpad_observers=[])
+        cell = Cell(code="x", stdout="", stderr="", error=None)
+        # Should not raise
+        await _fire_pre_execute(session, cell)
+        await _fire_post_execute(session, cell)
+
+    @pytest.mark.asyncio
+    async def test_missing_attribute_is_noop(self):
+        # Session that doesn't have _scratchpad_observers at all
+        session = SimpleNamespace()
+        cell = Cell(code="x", stdout="", stderr="", error=None)
+        await _fire_pre_execute(session, cell)
+        await _fire_post_execute(session, cell)
+
+    @pytest.mark.asyncio
+    async def test_observer_receives_cell(self):
+        obs = _RecordingObserver()
+        session = SimpleNamespace(_scratchpad_observers=[obs])
+        cell = Cell(code="x", stdout="", stderr="", error=None, description="set x")
+        await _fire_pre_execute(session, cell)
+        assert len(obs.pre_calls) == 1
+        assert obs.pre_calls[0].code == "x"
+        assert obs.pre_calls[0].description == "set x"
+
+    @pytest.mark.asyncio
+    async def test_multiple_observers_all_fire(self):
+        obs_a = _RecordingObserver()
+        obs_b = _RecordingObserver()
+        session = SimpleNamespace(_scratchpad_observers=[obs_a, obs_b])
+        cell = Cell(code="y", stdout="", stderr="", error=None)
+        await _fire_post_execute(session, cell)
+        assert len(obs_a.post_calls) == 1
+        assert len(obs_b.post_calls) == 1
+
+    @pytest.mark.asyncio
+    async def test_one_observer_raising_does_not_stop_others(self):
+        bad = _RecordingObserver(pre_raises=RuntimeError("boom"))
+        good = _RecordingObserver()
+        session = SimpleNamespace(_scratchpad_observers=[bad, good])
+        cell = Cell(code="x", stdout="", stderr="", error=None)
+        # Must not raise
+        await _fire_pre_execute(session, cell)
+        # Good observer still fired
+        assert len(good.pre_calls) == 1
+
+    @pytest.mark.asyncio
+    async def test_observer_without_method_is_skipped(self):
+        # Observer with no on_pre_execute/on_post_execute methods
+        bare = SimpleNamespace()
+        session = SimpleNamespace(_scratchpad_observers=[bare])
+        cell = Cell(code="x", stdout="", stderr="", error=None)
+        # Should silently skip — no AttributeError
+        await _fire_pre_execute(session, cell)
+        await _fire_post_execute(session, cell)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# handle_scratchpad: full dispatch flow with observers
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestHandleScratchpadObserverIntegration:
+    @pytest.mark.asyncio
+    async def test_exec_fires_pre_then_post(self):
+        obs = _RecordingObserver()
+        cell = Cell(
+            code="print('hi')",
+            stdout="hi",
+            stderr="",
+            error=None,
+            description="say hi",
+        )
+        session, pad = _fake_session(observers=[obs], cell_to_return=cell)
+
+        await handle_scratchpad(
+            session,
+            {
+                "action": "exec",
+                "name": "main",
+                "code": "print('hi')",
+                "one_line_description": "say hi",
+                "estimated_execution_time_seconds": 1,
+            },
+        )
+
+        # Pre-execute fired with the prelim cell (no outputs yet)
+        assert len(obs.pre_calls) == 1
+        prelim = obs.pre_calls[0]
+        assert prelim.code == "print('hi')"
+        assert prelim.description == "say hi"
+        assert prelim.stdout == ""  # not yet executed
+        assert prelim.error is None
+
+        # Post-execute fired with the actual cell
+        assert len(obs.post_calls) == 1
+        actual = obs.post_calls[0]
+        assert actual.stdout == "hi"
+        assert actual.description == "say hi"
+
+        # The order: pre fires before pad.execute, post fires after
+        pad.execute.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_exec_passes_error_cell_to_observer(self):
+        obs = _RecordingObserver()
+        cell = Cell(
+            code="x = 1/0",
+            stdout="",
+            stderr="",
+            error="ZeroDivisionError: division by zero",
+            description="divide by zero",
+        )
+        session, _ = _fake_session(observers=[obs], cell_to_return=cell)
+
+        await handle_scratchpad(
+            session,
+            {
+                "action": "exec",
+                "name": "main",
+                "code": "x = 1/0",
+                "one_line_description": "divide by zero",
+                "estimated_execution_time_seconds": 1,
+            },
+        )
+
+        assert len(obs.post_calls) == 1
+        assert obs.post_calls[0].error is not None
+        assert "ZeroDivisionError" in obs.post_calls[0].error
+
+    @pytest.mark.asyncio
+    async def test_exec_with_no_observers_works_unchanged(self):
+        # Empty observer list — exec should still work end-to-end
+        session, _ = _fake_session(observers=[])
+        result = await handle_scratchpad(
+            session,
+            {
+                "action": "exec",
+                "name": "main",
+                "code": "print('hi')",
+                "one_line_description": "say hi",
+                "estimated_execution_time_seconds": 1,
+            },
+        )
+        # Result is whatever format_cell_result returns — non-empty string
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    @pytest.mark.asyncio
+    async def test_observer_exception_does_not_break_exec(self):
+        # An observer that throws on every call
+        bad = _RecordingObserver(
+            pre_raises=RuntimeError("pre boom"),
+            post_raises=RuntimeError("post boom"),
+        )
+        session, _ = _fake_session(observers=[bad])
+
+        # Exec should still complete and return a result
+        result = await handle_scratchpad(
+            session,
+            {
+                "action": "exec",
+                "name": "main",
+                "code": "print('hi')",
+                "one_line_description": "say hi",
+                "estimated_execution_time_seconds": 1,
+            },
+        )
+        assert isinstance(result, str)
+        # Both observer methods were called even though they raised
+        assert len(bad.pre_calls) == 1
+        assert len(bad.post_calls) == 1
+
+    @pytest.mark.asyncio
+    async def test_non_exec_actions_do_not_fire_observers(self):
+        """view, reset, install, dump, remove should not trigger observers."""
+        obs = _RecordingObserver()
+        session, _ = _fake_session(observers=[obs])
+
+        for action in ("view", "reset", "dump"):
+            await handle_scratchpad(
+                session, {"action": action, "name": "main"}
+            )
+
+        await handle_scratchpad(
+            session,
+            {"action": "install", "name": "main", "packages": ["x"]},
+        )
+        await handle_scratchpad(session, {"action": "remove", "name": "main"})
+
+        assert obs.pre_calls == []
+        assert obs.post_calls == []
diff --git a/tests/test_session_skills_init.py b/tests/test_session_skills_init.py
new file mode 100644
index 00000000..f60a579a
--- /dev/null
+++ b/tests/test_session_skills_init.py
@@ -0,0 +1,128 @@
+"""Light wiring tests for skills integration in ChatSession.
+
+These tests don't construct a full ChatSession (it requires a live LLM
+client and many other dependencies). Instead they verify the *contract*
+points where session and skills meet:
+
+- The `recall_skill` tool is exported from its module and registers without error
+- The prompt builder accepts `skill_store` and renders the section
+- The store + tool dispatch round-trip works (this is what session.py wires)
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.memory.skills import Skill, SkillStore
+from anton.core.tools.recall_skill import RECALL_SKILL_TOOL, handle_recall_skill
+from anton.core.tools.registry import ToolRegistry
+
+
+@pytest.fixture()
+def store_with_one_skill(tmp_path: Path) -> SkillStore:
+    s = SkillStore(root=tmp_path / "skills")
+    s.save(
+        Skill(
+            label="csv_summary",
+            name="CSV Summary",
+            description="Load a CSV, infer schema, compute stats.",
+            when_to_use="User asks to explore or summarize a CSV file.",
+            declarative_md="1. Load CSV\n2. Describe\n3. Plot",
+            created_at="2026-04-10T12:00:00+00:00",
+            provenance="manual",
+        )
+    )
+    return s
+
+
+class TestRegistryRegistration:
+    def test_recall_skill_registers_without_collision(self):
+        registry = ToolRegistry()
+        registry.register_tool(RECALL_SKILL_TOOL)
+        names = [t.name for t in registry.get_tool_defs()]
+        assert "recall_skill" in names
+
+    def test_double_registration_is_idempotent(self):
+        registry = ToolRegistry()
+        registry.register_tool(RECALL_SKILL_TOOL)
+        registry.register_tool(RECALL_SKILL_TOOL)
+        names = [t.name for t in registry.get_tool_defs()]
+        assert names.count("recall_skill") == 1
+
+    def test_recall_skill_appears_in_dump(self):
+        registry = ToolRegistry()
+        registry.register_tool(RECALL_SKILL_TOOL)
+        dumped = registry.dump()
+        assert any(t["name"] == "recall_skill" for t in dumped)
+
+
+class TestPromptBuilderReceivesStore:
+    def test_section_appears_when_store_passed(
+        self, store_with_one_skill: SkillStore
+    ):
+        builder = ChatSystemPromptBuilder()
+        prompt = builder.build(
+            current_datetime="2026-04-10",
+            runtime_context="test",
+            proactive_dashboards=False,
+            output_dir="/tmp/x",
+            skill_store=store_with_one_skill,
+        )
+        assert "## Procedural memory" in prompt
+        assert "csv_summary" in prompt
+
+    def test_section_omitted_when_no_store(self):
+        builder = ChatSystemPromptBuilder()
+        prompt = builder.build(
+            current_datetime="2026-04-10",
+            runtime_context="test",
+            proactive_dashboards=False,
+            output_dir="/tmp/x",
+            skill_store=None,
+        )
+        assert "Procedural memory" not in prompt
+
+
+class TestDispatchRoundtrip:
+    """The end-to-end path: registry dispatches → handler reads store → counter bumps."""
+
+    @pytest.mark.asyncio
+    async def test_dispatch_recall_skill_through_registry(
+        self, store_with_one_skill: SkillStore
+    ):
+        registry = ToolRegistry()
+        registry.register_tool(RECALL_SKILL_TOOL)
+        # Minimal session-like object — only `_skill_store` is read by the handler.
+        session = SimpleNamespace(_skill_store=store_with_one_skill)
+
+        result = await registry.dispatch_tool(
+            session, "recall_skill", {"label": "csv_summary"}
+        )
+
+        assert "CSV Summary" in result
+        assert "Load CSV" in result
+        loaded = store_with_one_skill.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 1
+
+    @pytest.mark.asyncio
+    async def test_dispatch_unknown_label_through_registry(
+        self, store_with_one_skill: SkillStore
+    ):
+        registry = ToolRegistry()
+        registry.register_tool(RECALL_SKILL_TOOL)
+        session = SimpleNamespace(_skill_store=store_with_one_skill)
+
+        result = await registry.dispatch_tool(
+            session, "recall_skill", {"label": "nonexistent_xyz"}
+        )
+
+        assert "NO MATCH" in result
+        # Counter should NOT have moved
+        loaded = store_with_one_skill.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 0
diff --git a/tests/test_skill_commands.py b/tests/test_skill_commands.py
new file mode 100644
index 00000000..ee38d86f
--- /dev/null
+++ b/tests/test_skill_commands.py
@@ -0,0 +1,409 @@
+"""Tests for the /skill slash-command handlers."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from rich.console import Console
+
+from anton.commands.skills import (
+    _format_history_turns,
+    _format_scratchpad_cells,
+    _gather_session_scratchpad_cells,
+    _SkillDraft,
+    handle_skill_remove,
+    handle_skill_save,
+    handle_skill_show,
+    handle_skills_list,
+)
+from anton.core.memory.skills import Skill, SkillStore
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.fixture()
+def store(tmp_path: Path) -> SkillStore:
+    return SkillStore(root=tmp_path / "skills")
+
+
+@pytest.fixture()
+def console() -> Console:
+    # Capture-friendly console; record=True lets us read output if needed
+    return Console(record=True, width=120)
+
+
+def _fake_cell(code: str, stdout: str = "ok", stderr: str = "", error=None):
+    return SimpleNamespace(
+        code=code, stdout=stdout, stderr=stderr, error=error, description=""
+    )
+
+
+def _make_session(
+    *,
+    draft: _SkillDraft | None = None,
+    raises: Exception | None = None,
+    cells: list | None = None,
+    history: list | None = None,
+) -> MagicMock:
+    """Build a fake session whose `_llm.generate_object()` returns a known draft.
+
+    Pass `draft` to return a specific `_SkillDraft`, or `raises` to make
+    the call raise that exception.
+    """
+    session = MagicMock()
+    session._history = history or []
+    pad = SimpleNamespace(cells=cells or [])
+    session._scratchpads = SimpleNamespace(_pads={"main": pad})
+    session._llm = MagicMock()
+    if raises is not None:
+        session._llm.generate_object = AsyncMock(side_effect=raises)
+    else:
+        session._llm.generate_object = AsyncMock(return_value=draft)
+    return session
+
+
+def _draft(
+    *,
+    label: str = "csv_summary",
+    name: str = "CSV Summary",
+    description: str = "",
+    when_to_use: str = "When summarizing CSV files.",
+    declarative_md: str = "1. Load.\n2. Describe.",
+) -> _SkillDraft:
+    """Convenience constructor for test drafts."""
+    return _SkillDraft(
+        label=label,
+        name=name,
+        description=description,
+        when_to_use=when_to_use,
+        declarative_md=declarative_md,
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helper unit tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestFormatScratchpadCells:
+    def test_empty(self):
+        assert "no scratchpad" in _format_scratchpad_cells([])
+
+    def test_renders_code_and_output(self):
+        cells = [_fake_cell(code="print(1+1)", stdout="2")]
+        out = _format_scratchpad_cells(cells)
+        assert "print(1+1)" in out
+        assert "2" in out
+        assert "Cell 1" in out
+
+    def test_truncates_long_code(self):
+        long_code = "x = 1\n" * 1000
+        cells = [_fake_cell(code=long_code, stdout="")]
+        out = _format_scratchpad_cells(cells)
+        assert "[truncated]" in out
+
+
+class TestFormatHistoryTurns:
+    def test_empty(self):
+        assert "no conversation" in _format_history_turns([])
+
+    def test_string_content(self):
+        history = [
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello back"},
+        ]
+        out = _format_history_turns(history)
+        assert "hi" in out
+        assert "hello back" in out
+
+    def test_structured_content_with_text_blocks(self):
+        history = [
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": "the answer"},
+                    {"type": "tool_use", "id": "x", "name": "y", "input": {}},
+                ],
+            }
+        ]
+        out = _format_history_turns(history)
+        assert "the answer" in out
+
+    def test_skips_empty_turns(self):
+        history = [
+            {"role": "user", "content": ""},
+            {"role": "assistant", "content": "hello"},
+        ]
+        out = _format_history_turns(history)
+        assert "hello" in out
+
+    def test_max_turns_limit(self):
+        history = [
+            {"role": "user", "content": f"msg {i}"} for i in range(20)
+        ]
+        out = _format_history_turns(history, max_turns=3)
+        # Should only include the last 3
+        assert "msg 19" in out
+        assert "msg 17" in out
+        assert "msg 15" not in out
+
+
+class TestGatherSessionCells:
+    def test_collects_from_multiple_pads(self):
+        pad_a = SimpleNamespace(cells=[_fake_cell("a")])
+        pad_b = SimpleNamespace(cells=[_fake_cell("b"), _fake_cell("c")])
+        session = SimpleNamespace(
+            _scratchpads=SimpleNamespace(_pads={"a": pad_a, "b": pad_b})
+        )
+        cells = _gather_session_scratchpad_cells(session)
+        assert len(cells) == 3
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# /skill save
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestSkillSave:
+    @pytest.mark.asyncio
+    async def test_happy_path(self, console, store):
+        session = _make_session(
+            draft=_draft(
+                label="csv_summary",
+                name="CSV Summary",
+                description="Load and summarize a CSV.",
+                when_to_use="User asks to summarize a CSV file.",
+                declarative_md="1. Load.\n2. Describe.\n3. Plot.",
+            ),
+            cells=[_fake_cell("import pandas as pd; df = pd.read_csv('x.csv')")],
+            history=[{"role": "user", "content": "summarize x.csv"}],
+        )
+
+        await handle_skill_save(console, session, store=store)
+
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.name == "CSV Summary"
+        assert loaded.when_to_use == "User asks to summarize a CSV file."
+        assert "Load." in loaded.declarative_md
+        assert loaded.provenance == "manual"
+        assert loaded.created_at  # ISO timestamp set
+
+    @pytest.mark.asyncio
+    async def test_passes_skill_draft_schema_to_llm(self, console, store):
+        """generate_object should be called with the _SkillDraft Pydantic class."""
+        session = _make_session(
+            draft=_draft(),
+            cells=[_fake_cell("x")],
+            history=[{"role": "user", "content": "go"}],
+        )
+        await handle_skill_save(console, session, store=store)
+        session._llm.generate_object.assert_called_once()
+        call_args = session._llm.generate_object.call_args
+        assert call_args.args[0] is _SkillDraft
+
+    @pytest.mark.asyncio
+    async def test_label_collision_appends_number(self, console, store):
+        # Pre-existing skill with the label the LLM will return
+        store.save(
+            Skill(
+                label="csv_summary",
+                name="Existing",
+                description="",
+                when_to_use="",
+                declarative_md="prior",
+                created_at="2026-04-09T00:00:00+00:00",
+                provenance="manual",
+            )
+        )
+        session = _make_session(
+            draft=_draft(
+                label="csv_summary",
+                name="New CSV Summary",
+                description="",
+                when_to_use="User asks for CSV stats.",
+                declarative_md="step 1",
+            ),
+            cells=[_fake_cell("x")],
+            history=[{"role": "user", "content": "go"}],
+        )
+
+        await handle_skill_save(console, session, store=store)
+
+        # Original is unchanged
+        original = store.load("csv_summary")
+        assert original is not None
+        assert original.declarative_md == "prior"
+        # New one was saved with a unique label
+        new = store.load("csv_summary_2")
+        assert new is not None
+        assert new.declarative_md == "step 1"
+
+    @pytest.mark.asyncio
+    async def test_name_hint_is_passed_to_llm(self, console, store):
+        session = _make_session(
+            draft=_draft(
+                label="data_loader",
+                name="Data Loader",
+                when_to_use="User asks to load data.",
+            ),
+            cells=[_fake_cell("x")],
+            history=[{"role": "user", "content": "go"}],
+        )
+
+        await handle_skill_save(
+            console, session, name_hint="data loader", store=store
+        )
+
+        # The hint should appear in the prompt sent to the LLM
+        call_args = session._llm.generate_object.call_args
+        user_msg = call_args.kwargs["messages"][0]["content"]
+        assert "data loader" in user_msg
+
+    @pytest.mark.asyncio
+    async def test_empty_procedure_refuses_save(self, console, store):
+        session = _make_session(
+            draft=_draft(
+                label="empty",
+                name="Empty",
+                when_to_use="x",
+                declarative_md="",  # blank — refuse
+            ),
+            cells=[_fake_cell("x")],
+            history=[{"role": "user", "content": "go"}],
+        )
+        await handle_skill_save(console, session, store=store)
+        assert store.list_all() == []
+
+    @pytest.mark.asyncio
+    async def test_no_work_in_session_aborts(self, console, store):
+        session = _make_session(
+            draft=_draft(),
+            cells=[],
+            history=[],
+        )
+        await handle_skill_save(console, session, store=store)
+        # No LLM call was even made
+        session._llm.generate_object.assert_not_called()
+        assert store.list_all() == []
+
+    @pytest.mark.asyncio
+    async def test_llm_exception_is_caught(self, console, store):
+        session = _make_session(
+            raises=RuntimeError("network down"),
+            cells=[_fake_cell("x")],
+            history=[{"role": "user", "content": "go"}],
+        )
+        # Must not raise
+        await handle_skill_save(console, session, store=store)
+        assert store.list_all() == []
+
+    @pytest.mark.asyncio
+    async def test_validation_error_is_caught(self, console, store):
+        """If the LLM produces output that fails Pydantic validation
+        (rare with forced tool_choice but possible), we surface a
+        warning and don't save anything."""
+        from pydantic import ValidationError as _PVE
+
+        try:
+            _SkillDraft.model_validate({})  # missing required fields
+        except _PVE as exc:
+            session = _make_session(
+                raises=exc,
+                cells=[_fake_cell("x")],
+                history=[{"role": "user", "content": "go"}],
+            )
+        await handle_skill_save(console, session, store=store)
+        assert store.list_all() == []
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# /skills list, /skill show, /skill remove
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestListShowRemove:
+    def test_list_empty(self, console, store):
+        # Should not raise
+        handle_skills_list(console, store=store)
+
+    def test_list_with_skills(self, console, store):
+        store.save(
+            Skill(
+                label="csv_summary",
+                name="CSV Summary",
+                description="",
+                when_to_use="When the user asks about a CSV",
+                declarative_md="step",
+                created_at="2026-04-10T00:00:00+00:00",
+                provenance="manual",
+            )
+        )
+        handle_skills_list(console, store=store)
+        # Sanity-check that the rendered output mentions the label
+        out = console.export_text()
+        assert "csv_summary" in out
+
+    def test_show_existing(self, console, store):
+        store.save(
+            Skill(
+                label="csv_summary",
+                name="CSV Summary",
+                description="A CSV utility.",
+                when_to_use="when needed",
+                declarative_md="1. Load\n2. Describe",
+                created_at="2026-04-10T00:00:00+00:00",
+                provenance="manual",
+            )
+        )
+        handle_skill_show(console, "csv_summary", store=store)
+        out = console.export_text()
+        assert "CSV Summary" in out
+        assert "Load" in out
+
+    def test_show_unknown_suggests_closest(self, console, store):
+        store.save(
+            Skill(
+                label="csv_summary",
+                name="CSV",
+                description="",
+                when_to_use="",
+                declarative_md="x",
+                created_at="2026-04-10T00:00:00+00:00",
+                provenance="manual",
+            )
+        )
+        handle_skill_show(console, "csv_sumary", store=store)  # typo
+        out = console.export_text()
+        assert "csv_summary" in out
+
+    def test_show_no_args(self, console, store):
+        handle_skill_show(console, "", store=store)
+        out = console.export_text()
+        assert "Usage" in out
+
+    def test_remove_existing(self, console, store):
+        store.save(
+            Skill(
+                label="zap",
+                name="Zap",
+                description="",
+                when_to_use="",
+                declarative_md="x",
+                created_at="2026-04-10T00:00:00+00:00",
+                provenance="manual",
+            )
+        )
+        handle_skill_remove(console, "zap", store=store)
+        assert store.load("zap") is None
+
+    def test_remove_unknown(self, console, store):
+        handle_skill_remove(console, "nope", store=store)
+        out = console.export_text()
+        assert "No skill" in out
diff --git a/tests/test_skills_e2e.py b/tests/test_skills_e2e.py
new file mode 100644
index 00000000..78f40615
--- /dev/null
+++ b/tests/test_skills_e2e.py
@@ -0,0 +1,187 @@
+"""End-to-end test of the skills loop.
+
+Simulates the full happy path with mocked LLM at each call site:
+
+1. Save a skill from recent work via `/skill save` (mock LLM drafts JSON)
+2. "Restart" by building a fresh prompt — the saved skill appears in the
+   procedural memory section
+3. Simulate the LLM calling `recall_skill(label)` via the tool registry
+4. Verify the tool result + counter increment
+5. Recall again with a typo — closest_match recovers, counter increments
+
+This is the test that proves the loop is wired correctly across all
+prior steps. If it passes, the v1 build is functional end-to-end.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from rich.console import Console
+
+from anton.commands.skills import _SkillDraft, handle_skill_save
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.memory.skills import SkillStore
+from anton.core.tools.recall_skill import RECALL_SKILL_TOOL
+from anton.core.tools.registry import ToolRegistry
+
+
+@pytest.fixture()
+def store_root(tmp_path: Path) -> Path:
+    return tmp_path / "skills"
+
+
+@pytest.fixture()
+def console() -> Console:
+    return Console(record=True, width=120)
+
+
+def _make_session_for_save(
+    store: SkillStore, draft: _SkillDraft, cells: list, history: list
+) -> MagicMock:
+    """A session-like mock with the LLM ready to return `draft`."""
+    session = MagicMock()
+    session._history = history
+    session._skill_store = store
+    pad = SimpleNamespace(cells=cells)
+    session._scratchpads = SimpleNamespace(_pads={"work": pad})
+    session._llm = MagicMock()
+    session._llm.generate_object = AsyncMock(return_value=draft)
+    return session
+
+
+def _fake_cell(code: str, stdout: str = "ok"):
+    return SimpleNamespace(
+        code=code, stdout=stdout, stderr="", error=None, description=""
+    )
+
+
+@pytest.mark.asyncio
+async def test_full_skills_loop(console, store_root):
+    # ── Setup: empty store, simulated session with some scratchpad work ─
+    store = SkillStore(root=store_root)
+    assert store.list_all() == []
+
+    cells = [
+        _fake_cell(
+            "import pandas as pd\n"
+            "df = pd.read_csv('sales_q3.csv')\n"
+            "print(df.shape)",
+            stdout="(12000, 8)",
+        ),
+        _fake_cell(
+            "print(df.describe())\n"
+            "print(df.dtypes)",
+            stdout="<summary statistics>",
+        ),
+        _fake_cell(
+            "df['amount'].plot.hist(bins=30)",
+            stdout="<plot>",
+        ),
+    ]
+    history = [
+        {"role": "user", "content": "take a quick look at sales_q3.csv"},
+        {
+            "role": "assistant",
+            "content": "Loaded the CSV and summarized it. 12k rows, 8 cols.",
+        },
+    ]
+
+    draft = _SkillDraft(
+        label="csv_summary",
+        name="CSV Summary",
+        description="Load a CSV with pandas, print shape/describe/dtypes, plot a histogram.",
+        when_to_use="User asks to explore, summarize, or describe a CSV file.",
+        declarative_md=(
+            "1. Use `pandas.read_csv()` to load the file.\n"
+            "2. Print `df.shape` and `df.dtypes`.\n"
+            "3. Run `df.describe()` for summary stats.\n"
+            "4. For numeric columns of interest, plot a histogram with "
+            "`df[col].plot.hist(bins=30)`."
+        ),
+    )
+    session = _make_session_for_save(store, draft, cells, history)
+
+    # ── Step 1: /skill save ─────────────────────────────────────────────
+    await handle_skill_save(console, session, store=store)
+
+    skills = store.list_all()
+    assert len(skills) == 1
+    saved = skills[0]
+    assert saved.label == "csv_summary"
+    assert saved.name == "CSV Summary"
+    assert "pandas.read_csv" in saved.declarative_md
+    assert saved.stats.stage_1.recommended == 0  # no recalls yet
+
+    # ── Step 2: "restart" — build a fresh system prompt ─────────────────
+    # This simulates a new session reading the same disk store
+    fresh_store = SkillStore(root=store_root)
+    builder = ChatSystemPromptBuilder()
+    prompt = builder.build(
+        current_datetime="2026-04-10T13:00:00+00:00",
+        runtime_context="test",
+        proactive_dashboards=False,
+        output_dir="/tmp/x",
+        skill_store=fresh_store,
+    )
+    assert "## Procedural memory" in prompt
+    assert "`csv_summary`" in prompt
+    assert "explore, summarize, or describe a CSV" in prompt
+
+    # ── Step 3: LLM "decides" to recall the skill via the tool registry ─
+    registry = ToolRegistry()
+    registry.register_tool(RECALL_SKILL_TOOL)
+    fresh_session = SimpleNamespace(_skill_store=fresh_store)
+
+    result = await registry.dispatch_tool(
+        fresh_session, "recall_skill", {"label": "csv_summary"}
+    )
+
+    assert "CSV Summary" in result
+    assert "pandas.read_csv" in result
+    assert "describe" in result
+
+    # Counter incremented to 1
+    after_recall_1 = fresh_store.load("csv_summary")
+    assert after_recall_1 is not None
+    assert after_recall_1.stats.stage_1.recommended == 1
+    assert after_recall_1.stats.total_recalls == 1
+    assert after_recall_1.stats.stage_1.last_used  # ISO timestamp present
+
+    # ── Step 4: Recall again with a typo — closest_match recovers ───────
+    typo_result = await registry.dispatch_tool(
+        fresh_session, "recall_skill", {"label": "csv_sumary"}  # missing 'm'
+    )
+    assert "⚠" in typo_result
+    assert "csv_summary" in typo_result
+    assert "pandas.read_csv" in typo_result  # full procedure still returned
+
+    # Counter is now 2 (typo credited to the resolved label)
+    after_recall_2 = fresh_store.load("csv_summary")
+    assert after_recall_2 is not None
+    assert after_recall_2.stats.stage_1.recommended == 2
+    assert after_recall_2.stats.total_recalls == 2
+
+    # ── Step 5: Disk verification ───────────────────────────────────────
+    skill_dir = store_root / "csv_summary"
+    assert skill_dir.is_dir()
+    assert (skill_dir / "meta.json").is_file()
+    assert (skill_dir / "declarative.md").is_file()
+    assert (skill_dir / "stats.json").is_file()
+
+    stats_on_disk = json.loads((skill_dir / "stats.json").read_text())
+    assert stats_on_disk["total_recalls"] == 2
+    assert stats_on_disk["stage_1"]["recommended"] == 2
+
+    meta_on_disk = json.loads((skill_dir / "meta.json").read_text())
+    assert meta_on_disk["label"] == "csv_summary"
+    assert meta_on_disk["stage_1_present"] is True
+    assert meta_on_disk["stage_2_present"] is False
+    assert meta_on_disk["stage_3_present"] is False
+
+    declarative_on_disk = (skill_dir / "declarative.md").read_text()
+    assert "pandas.read_csv" in declarative_on_disk
diff --git a/tests/test_skills_store.py b/tests/test_skills_store.py
new file mode 100644
index 00000000..f843ce1f
--- /dev/null
+++ b/tests/test_skills_store.py
@@ -0,0 +1,302 @@
+"""Storage-layer tests for `anton.core.memory.skills`."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from anton.core.memory.skills import (
+    Skill,
+    SkillStats,
+    SkillStore,
+    StageStats,
+    make_unique_label,
+    slugify,
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+@pytest.fixture()
+def store_root(tmp_path: Path) -> Path:
+    return tmp_path / "skills"
+
+
+@pytest.fixture()
+def store(store_root: Path) -> SkillStore:
+    return SkillStore(root=store_root)
+
+
+def _make_skill(label: str = "csv_summary", **overrides) -> Skill:
+    base = dict(
+        label=label,
+        name="CSV Summary",
+        description="Load a CSV, infer schema, compute summary stats.",
+        when_to_use="User asks to explore, summarize, or describe a CSV file.",
+        declarative_md="1. Load the CSV.\n2. Infer types.\n3. Print summary.\n",
+        created_at="2026-04-10T12:00:00+00:00",
+        provenance="manual",
+    )
+    base.update(overrides)
+    return Skill(**base)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# slugify / make_unique_label
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestSlugify:
+    def test_simple_lowercase(self):
+        assert slugify("CSV Summary") == "csv_summary"
+
+    def test_strips_special_chars(self):
+        assert slugify("Web Scraping (v2)!") == "web_scraping_v2"
+
+    def test_collapses_underscores(self):
+        assert slugify("foo___bar") == "foo_bar"
+
+    def test_trims_leading_trailing(self):
+        assert slugify("__hello__") == "hello"
+
+    def test_empty_input_falls_back(self):
+        assert slugify("") == "skill"
+        assert slugify("   ") == "skill"
+        assert slugify("!@#$") == "skill"
+
+    def test_dash_to_underscore(self):
+        assert slugify("api-data-fetcher") == "api_data_fetcher"
+
+
+class TestMakeUniqueLabel:
+    def test_returns_base_when_unique(self, store: SkillStore):
+        assert make_unique_label("Brand New", store) == "brand_new"
+
+    def test_appends_number_on_collision(self, store: SkillStore):
+        store.save(_make_skill(label="csv_summary"))
+        assert make_unique_label("CSV Summary", store) == "csv_summary_2"
+
+    def test_chains_numbers(self, store: SkillStore):
+        store.save(_make_skill(label="csv_summary"))
+        store.save(_make_skill(label="csv_summary_2"))
+        store.save(_make_skill(label="csv_summary_3"))
+        assert make_unique_label("CSV Summary", store) == "csv_summary_4"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Save / load round-trip
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestSaveLoadRoundtrip:
+    def test_save_creates_directory_with_required_files(
+        self, store: SkillStore, store_root: Path
+    ):
+        skill = _make_skill()
+        path = store.save(skill)
+        assert path == store_root / "csv_summary"
+        assert (path / "meta.json").is_file()
+        assert (path / "declarative.md").is_file()
+        assert (path / "stats.json").is_file()
+
+    def test_load_after_save_round_trip(self, store: SkillStore):
+        original = _make_skill()
+        store.save(original)
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.label == original.label
+        assert loaded.name == original.name
+        assert loaded.description == original.description
+        assert loaded.when_to_use == original.when_to_use
+        assert loaded.declarative_md == original.declarative_md
+        assert loaded.provenance == "manual"
+
+    def test_load_unknown_returns_none(self, store: SkillStore):
+        assert store.load("does_not_exist") is None
+
+    def test_load_with_corrupt_meta_returns_none(
+        self, store: SkillStore, store_root: Path
+    ):
+        store.save(_make_skill())
+        (store_root / "csv_summary" / "meta.json").write_text("not json{")
+        assert store.load("csv_summary") is None
+
+    def test_save_does_not_wipe_existing_stats(
+        self, store: SkillStore, store_root: Path
+    ):
+        # First save initializes stats.json with zeroes.
+        store.save(_make_skill())
+        # Simulate accumulated counters.
+        store.increment_recommended("csv_summary", stage=1)
+        store.increment_recommended("csv_summary", stage=1)
+        # Saving again (e.g., editing the procedure) must NOT zero them.
+        store.save(_make_skill(declarative_md="updated procedure"))
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 2
+        assert loaded.declarative_md == "updated procedure"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# list_all / list_summaries / delete
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestListing:
+    def test_list_all_empty(self, store: SkillStore):
+        assert store.list_all() == []
+
+    def test_list_summaries_empty(self, store: SkillStore):
+        assert store.list_summaries() == []
+
+    def test_list_all_returns_sorted(self, store: SkillStore):
+        store.save(_make_skill(label="zebra", name="Zebra"))
+        store.save(_make_skill(label="alpha", name="Alpha"))
+        store.save(_make_skill(label="mike", name="Mike"))
+        labels = [s.label for s in store.list_all()]
+        assert labels == ["alpha", "mike", "zebra"]
+
+    def test_list_summaries_skips_malformed(
+        self, store: SkillStore, store_root: Path
+    ):
+        store.save(_make_skill(label="good"))
+        # Create a directory with no meta.json — should be skipped.
+        (store_root / "broken").mkdir()
+        summaries = store.list_summaries()
+        assert [s["label"] for s in summaries] == ["good"]
+
+    def test_list_summaries_lightweight_shape(self, store: SkillStore):
+        store.save(_make_skill())
+        summaries = store.list_summaries()
+        assert len(summaries) == 1
+        assert summaries[0] == {
+            "label": "csv_summary",
+            "name": "CSV Summary",
+            "when_to_use": "User asks to explore, summarize, or describe a CSV file.",
+        }
+
+    def test_delete_removes_directory(self, store: SkillStore, store_root: Path):
+        store.save(_make_skill())
+        assert (store_root / "csv_summary").is_dir()
+        assert store.delete("csv_summary") is True
+        assert not (store_root / "csv_summary").exists()
+
+    def test_delete_unknown_returns_false(self, store: SkillStore):
+        assert store.delete("nope") is False
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Stats increments
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestStatsIncrement:
+    def test_increment_unknown_skill_is_noop(self, store: SkillStore):
+        # Should not raise.
+        store.increment_recommended("not_a_skill", stage=1)
+
+    def test_increment_stage_1(self, store: SkillStore):
+        store.save(_make_skill())
+        store.increment_recommended("csv_summary", stage=1)
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 1
+        assert loaded.stats.stage_1.last_used  # set to a timestamp
+        assert loaded.stats.total_recalls == 1
+
+    def test_increment_multiple_times(self, store: SkillStore):
+        store.save(_make_skill())
+        for _ in range(5):
+            store.increment_recommended("csv_summary", stage=1)
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 5
+        assert loaded.stats.total_recalls == 5
+
+    def test_increment_per_stage_independent(self, store: SkillStore):
+        store.save(_make_skill())
+        store.increment_recommended("csv_summary", stage=1)
+        store.increment_recommended("csv_summary", stage=1)
+        store.increment_recommended("csv_summary", stage=2)
+        loaded = store.load("csv_summary")
+        assert loaded is not None
+        assert loaded.stats.stage_1.recommended == 2
+        assert loaded.stats.stage_2.recommended == 1
+        assert loaded.stats.stage_3.recommended == 0
+        assert loaded.stats.total_recalls == 3
+
+    def test_invalid_stage_raises(self, store: SkillStore):
+        store.save(_make_skill())
+        with pytest.raises(ValueError):
+            store.increment_recommended("csv_summary", stage=4)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# closest_match — typo recovery
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestClosestMatch:
+    def test_empty_store_returns_none(self, store: SkillStore):
+        assert store.closest_match("anything") is None
+
+    def test_exact_match(self, store: SkillStore):
+        store.save(_make_skill(label="csv_summary"))
+        assert store.closest_match("csv_summary") == "csv_summary"
+
+    def test_typo_one_char(self, store: SkillStore):
+        store.save(_make_skill(label="csv_summary"))
+        assert store.closest_match("csv_sumary") == "csv_summary"
+
+    def test_dash_vs_underscore(self, store: SkillStore):
+        store.save(_make_skill(label="web_scraping"))
+        assert store.closest_match("web-scraping") == "web_scraping"
+
+    def test_completely_unrelated_returns_none(self, store: SkillStore):
+        store.save(_make_skill(label="csv_summary"))
+        assert store.closest_match("xyzzy_quark") is None
+
+    def test_picks_closer_of_two_candidates(self, store: SkillStore):
+        store.save(_make_skill(label="csv_summary"))
+        store.save(_make_skill(label="api_fetcher"))
+        # Closer to csv_summary
+        assert store.closest_match("csv_summery") == "csv_summary"
+        # Closer to api_fetcher
+        assert store.closest_match("api_fecher") == "api_fetcher"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Disk format sanity (catches accidental schema drift)
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+class TestDiskFormat:
+    def test_meta_json_has_expected_keys(self, store: SkillStore, store_root: Path):
+        store.save(_make_skill())
+        meta = json.loads((store_root / "csv_summary" / "meta.json").read_text())
+        assert set(meta.keys()) >= {
+            "label",
+            "name",
+            "description",
+            "when_to_use",
+            "created_at",
+            "provenance",
+            "stage_1_present",
+            "stage_2_present",
+            "stage_3_present",
+        }
+        assert meta["label"] == "csv_summary"
+
+    def test_stats_json_initial_shape(self, store: SkillStore, store_root: Path):
+        store.save(_make_skill())
+        stats = json.loads((store_root / "csv_summary" / "stats.json").read_text())
+        assert stats["total_recalls"] == 0
+        for stage_key in ("stage_1", "stage_2", "stage_3"):
+            assert stage_key in stats
+            assert stats[stage_key]["recommended"] == 0
+            assert stats[stage_key]["used"] == 0

From 5cfaebfb99655c5231418d19ef4f4bff7e729f0c Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Sat, 11 Apr 2026 05:22:37 -0400
Subject: [PATCH 087/134] updates, tbd; neocortex, Striatum and autolearn
 skills

---
 anton/README.md | 405 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 349 insertions(+), 56 deletions(-)

diff --git a/anton/README.md b/anton/README.md
index badf5693..74abc5fa 100644
--- a/anton/README.md
+++ b/anton/README.md
@@ -11,19 +11,24 @@ And here we are: Like an adrenaline junkie eyeing at a bungee looking for anothe
 
 It is probably obvious now, but Anton has a brain-inspired architecture, and the more we build it the more it resembles/mirrors functional parts of the brain.  On the other hand we also understand that people don't need to know anything about the brain to play with Anton, so we mapped some of the places/files where users can have inputs, or investigate what's up, to names that make more sense than the scientific name of that function of the brain.
 
-The current implementation has three blocks:
+The current implementation has six blocks, mapping the major learning systems:
 
-| Brain Region                 | Function                                         | Anton Equivalent                                              |
-|------------------------------|--------------------------------------------------|---------------------------------------------------------------|
-| Prefrontal Cortex (PFC)      | Executive control, planning, the "inner voice"  | Orchestrator — decides what to work on, how, and when to stop |
-| Working Memory (dlPFC)       | Temporary reasoning space, ~4 slots             | Scratchpads — isolated reasoning environments                 |
-| Hippocampus                  | Episodic memory, records experiences            | Experience Store — logs of problem + context + solution       |
+| Brain Region                 | Function                                          | Anton Equivalent                                              |
+|------------------------------|---------------------------------------------------|---------------------------------------------------------------|
+| Prefrontal Cortex (PFC)      | Executive control, planning, the "inner voice"   | Orchestrator — decides what to work on, how, and when to stop |
+| Working Memory (dlPFC)       | Temporary reasoning space, ~4 slots              | Scratchpads — isolated reasoning environments                 |
+| Hippocampus                  | Episodic memory, records experiences             | Experience Store — logs of problem + context + solution       |
+| Cortex (semantic memory)     | Facts, rules, identity — the consolidated knowledge | Engrams — `lessons.md`, `rules.md`, `profile.md`            |
+| Striatum (procedural memory) | Habits and learned procedures — patterns of action | Skills — multi-stage reusable procedures with declarative + chunked + code representations |
+| Cerebellum (error learning)  | Supervised correction — "what I expected vs what happened" | Cerebellum — buffers errored scratchpad cells, extracts generalizable lessons via post-mortem |
+
+These six systems coexist the way they coexist in the brain: declarative and procedural memory are dissociable (a person with hippocampal damage like H.M. can lose new declarative memories but still learn motor skills), and the cerebellum operates in parallel with continued action rather than blocking it.
 
 
 
 ## Architecture of Anton
 
-These three parts work in a very simple way:
+The high-level flow — how the executive, scratchpads, and the long-term stores collaborate on every turn:
 
 ```
   ┌────────────────────────────────────────────────────┐
@@ -31,7 +36,7 @@ These three parts work in a very simple way:
   │                                                    │
   │  On new problem:                                   │
   │    1. Check SKILL LIBRARY → match?                 │
-  │       YES → deploy skill's template scratchpad     │
+  │       YES → recall_skill(label) → load procedure   │
   │       NO  → open fresh scratchpad                  │
   │    2. Monitor scratchpad progress                  │
   │    3. Detect stuck/failure → pivot strategy        │
@@ -49,27 +54,33 @@ These three parts work in a very simple way:
   │  - Can request sub-scratchpads (decomposition)       │
   │  - Can invoke the hypocampus in a loop               │
   │                                                      │
-  └────────────────────┬─────────────────────────────────┘
-                       │ on success
-                       ▼
-  ┌──────────────────────────────────────────────────────┐
-  │         EXPERIENCE STORE (hippocampus)               │
-  │                                                      │
-  │  Each entry:                                         │
-  │  {                                                   │
-  │    problem_signature: "...",                         │
-  │    context: { what tools, what domain, what input }, │
-  │    scratchpad_trace: [ step1, step2, ... ],          │
-  │    outcome: success | failure,                       │
-  │    cost: tokens/time spent,                          │
-  │    salience: how important/novel was this            │
-  │  }                                                   │
-  │                                                      │
-  │  Searchable by similarity (embeddings)               │
-  └──────────────────────────────────────────────────────┘
-
+  │  Every cell execution fires pre/post hooks observed  │
+  │  by the CEREBELLUM (post-mortem error learning).     │
+  └──────┬──────────────┬───────────────────┬────────────┘
+         │              │                   │
+         │ on success   │ on cell errors    │ on success
+         ▼              ▼                   ▼
+  ┌────────────┐  ┌──────────────┐  ┌─────────────────────┐
+  │ EXPERIENCE │  │  CEREBELLUM  │  │   SKILL LIBRARY     │
+  │   STORE    │  │              │  │                     │
+  │ (hipp.)    │  │ Buffers bad  │  │ /skill save → LLM   │
+  │            │  │ cells, runs  │  │ drafts a procedure  │
+  │ Episodes — │  │ post-mortem  │  │ with label + name + │
+  │ JSONL log  │  │ via LLM,     │  │ when_to_use +       │
+  │ of every   │  │ encodes new  │  │ declarative_md.     │
+  │ turn.      │  │ lessons via  │  │                     │
+  │            │  │ Cortex.      │  │ Future turns recall │
+  │ Recall via │  │              │  │ the procedure via   │
+  │ `recall`   │  │ Lessons feed │  │ recall_skill tool.  │
+  │ tool.      │  │ next code    │  │                     │
+  │            │  │ generation   │  │ Stored at           │
+  │            │  │ (procedural  │  │ ~/.anton/skills/    │
+  │            │  │ priming).    │  │   <label>/          │
+  └────────────┘  └──────────────┘  └─────────────────────┘
 ```
 
+The brain analog: the executive (PFC) plans and delegates to working memory (scratchpads), which can pull on procedural memory (striatum/skills) for known recipes and on declarative memory (hippocampus/cortex/engrams) for facts. The cerebellum runs in parallel with continued action — it never blocks the agent, it just refines future cells through supervised error learning.
+
 And the Hipocampus also is controlled as follows:
 
 ```
@@ -122,6 +133,8 @@ And the Hipocampus also is controlled as follows:
 | **Prefrontal Cortex** (dlPFC/vmPFC) | `cortex.py` | The executive coordinator. Manages two hippocampi (global + project), decides which memories to load into the LLM's context window, gates whether new memories need confirmation. |
 | **Medial Temporal Lobe** (episodic) | `episodes.py` | Raw episodic memory. Logs every conversation turn as timestamped JSONL — user input, assistant responses, tool calls, scratchpad output. Searchable via the `recall` tool. Like HSAM: never forgets. |
 | **Hippocampal Replay** (SWS consolidation) | `consolidator.py` | After a scratchpad session ends, replays what happened in compressed form and extracts durable lessons via a fast LLM call. Like sleep — offline, post-hoc, selective. |
+| **Striatum** (procedural memory) | `skills.py` | Long-term procedural memory. Stores reusable skills as multi-stage directories (declarative → chunks → code). The LLM retrieves skills on demand via the `recall_skill` tool, the way the basal ganglia activates a learned action sequence in response to a familiar context. |
+| **Cerebellum** (supervised error learning) | `cerebellum.py` | Forward-model + error correction. Observes every scratchpad cell via pre/post execute hooks, buffers errored/warning cells across the turn, and runs a post-mortem LLM diff to extract generalizable lessons. Lessons flow through the existing wisdom-injection pipeline into future code generation. Operates in parallel with the agent — never blocks. |
 | **Reconsolidation** (Nader et al.) | `reconsolidator.py` | One-time migration. When old memory formats are reactivated, they enter a labile state and get re-encoded in the new format. Preserves content, updates structure. |
 | **Medial PFC / Default Mode Network** | `profile.md` | Always-on self-model. Identity facts (name, timezone, preferences) that contextualize all processing — you don't "look up" your own name. |
 | **Basal Ganglia + OFC** | `rules.md` | Go/No-Go behavioral gates. The direct pathway enables ("always"), the indirect pathway suppresses ("never"), the OFC handles conditions ("when X → do Y"). |
@@ -134,17 +147,26 @@ And the Hipocampus also is controlled as follows:
 
 ```
 ~/.anton/                              GLOBAL scope (cross-project)
-└── memory/
-    ├── profile.md                     Identity — who the user is
-    ├── rules.md                       Always/never/when behavioral rules
-    ├── lessons.md                     Semantic facts from experience
-    └── topics/                        Deep domain expertise
-        └── *.md
+├── memory/
+│   ├── profile.md                     Identity — who the user is
+│   ├── rules.md                       Always/never/when behavioral rules
+│   ├── lessons.md                     Semantic facts from experience
+│   └── topics/                        Deep domain expertise
+│       └── *.md
+└── skills/                            PROCEDURAL MEMORY (striatum)
+    └── <label>/                       One directory per skill
+        ├── meta.json                  label, name, when_to_use, provenance, presence flags
+        ├── declarative.md             Stage 1 — step-by-step procedure (always present)
+        ├── chunks.md                  Stage 2 — higher-level recipes/macros (optional, v2+)
+        ├── code/                      Stage 3 — runnable helper modules (optional, v2+)
+        │   └── __init__.py
+        ├── requirements.txt           Stage 3 dependencies (optional)
+        └── stats.json                 Per-stage usage counters (recommended/used)
 
 <project>/.anton/                      PROJECT scope (workspace-specific)
 ├── memory/
 │   ├── rules.md                       Project-specific rules
-│   ├── lessons.md                     Project-specific knowledge
+│   ├── lessons.md                     Project-specific knowledge (cerebellum writes here)
 │   └── topics/
 │       └── *.md
 ├── episodes/                          EPISODIC MEMORY (conversation archive)
@@ -154,7 +176,7 @@ And the Hipocampus also is controlled as follows:
 └── .env                               Secrets (unchanged)
 ```
 
-Profile (`profile.md`) is global-only — identity is singular. Rules and lessons exist at both scopes. `anton.md` stays as the user-written instruction file and is not managed by the memory system.
+Profile (`profile.md`) is global-only — identity is singular. Rules and lessons exist at both scopes. Skills live globally (one library across projects) at `~/.anton/skills/`. `anton.md` stays as the user-written instruction file and is not managed by the memory system.
 
 ## Memory Entry Format
 
@@ -442,6 +464,192 @@ On first run after upgrading, Anton automatically migrates old memory formats:
 - Old files are preserved — nothing is deleted.
 - Runs synchronously at startup (fast, no LLM calls needed).
 
+## Procedural Memory — The Skills System
+
+Skills are Anton's **procedural memory** — reusable workflows the user has marked as worth remembering. Brain analog: the **striatum** stores motor programs and habits, learned action sequences that fire when a familiar context is recognized. Anton's skill system mirrors this: when the LLM sees a request that matches a stored skill, it pulls the procedure into working memory and follows it instead of reasoning from scratch.
+
+Skills are intentionally distinct from engrams. **Engrams hold facts** ("CoinGecko rate-limits at 50 req/min"), are loaded into every prompt unconditionally because they're cheap, and live in `lessons.md` / `rules.md` / `profile.md`. **Skills hold whole procedures** ("how to summarize a CSV end-to-end"), are NOT loaded into every prompt, and the LLM explicitly retrieves them via the `recall_skill` tool when it recognizes a match. Both systems coexist in the brain — declarative and procedural memory are dissociable — and both coexist in Anton.
+
+### Skill Directory Format
+
+Each skill is a directory at `~/.anton/skills/<label>/` containing multi-stage representations that coexist (rather than graduating between stages):
+
+```
+~/.anton/skills/csv_summary/
+├── meta.json          ← label, name, description, when_to_use, provenance, presence flags
+├── declarative.md     ← Stage 1: step-by-step procedure the LLM reads (always present)
+├── chunks.md          ← Stage 2: higher-level recipes/macros (emerges with use, v2+)
+├── code/              ← Stage 3: runnable helper modules (emerges with reliability, v2+)
+│   └── __init__.py
+├── requirements.txt   ← Stage 3 dependencies (optional)
+└── stats.json         ← per-stage usage counters
+```
+
+The three stages mirror the cortico-striatal-cerebellar gradient:
+- **Stage 1 (declarative)** — what the prefrontal cortex reads when first learning a skill. Slow, deliberate, fully flexible.
+- **Stage 2 (chunks)** — chunked sub-procedures (associative striatum). Faster than Stage 1, still LLM-mediated.
+- **Stage 3 (code)** — runnable helpers (sensorimotor striatum). Cheapest, fastest, used when context is highly familiar.
+
+The executive picks the highest stage that's reliable enough for the current context. v1 only ships Stage 1; the directory format pre-allocates the other slots so consolidation can fill them later without a migration.
+
+### Naming: `label`, not `slug`
+
+Each skill's unique identifier is its `label`. In cognitive psychology, a *label* is the declarative handle by which a procedural memory is addressed in working memory — the verbal token the executive holds when deciding to invoke a stored procedure. It's deliberately distinct from `name` (the human-readable display like "CSV Summary") and `when_to_use` (the retrieval cue describing the matching context).
+
+### How Skills Get Created
+
+Skills are created manually in v1 via the `/skill save` command. The user runs it after a successful task; the LLM reads the recent scratchpad cells + chat history and drafts the skill via `LLMClient.generate_object` with a `_SkillDraft` Pydantic schema:
+
+```
+you> Take a quick look at sales_q3.csv
+
+anton> [opens scratchpad, loads pandas, infers schema, prints describe(), plots distributions]
+       Here's what I found...
+
+you> /skill save csv summary
+anton> Drafting a skill from recent work…
+       Saved skill csv_summary → ~/.anton/skills/csv_summary/
+       Name: CSV Summary
+       When to use: User asks to explore, summarize, or describe a CSV file.
+```
+
+Automatic skill extraction (the consolidator promoting recurring scratchpad patterns into skills) is a v2/v3 feature. v1 deliberately uses manual curation to learn what "good" skills look like before automating.
+
+### How Skills Get Used
+
+On every turn, the system prompt includes a compact `## Procedural memory` section listing every available skill as one line: `- <label> — <when_to_use>`. The full procedures stay on disk. When the LLM recognizes a match, it calls the `recall_skill` tool:
+
+```
+{"name": "recall_skill", "input": {"label": "csv_summary"}}
+```
+
+The tool reads `declarative.md` and returns it as the tool result, which the LLM follows as guidance for the rest of the turn. Each successful recall increments `stats.json::stage_1::recommended` — that's the classifier signal, mechanically captured without any LLM compliance dance.
+
+Brain analog: the prefrontal cortex doesn't keep every skill loaded. It has fast pattern recognition that flags "I might need skill X" and *retrieves* the skill into working memory only when it actually needs it. The `recall_skill` tool is exactly this retrieval operation.
+
+### Skill Slash Commands
+
+| Command | What it does |
+|---|---|
+| `/skill save [name hint]` | LLM drafts a new skill from recent work and saves it |
+| `/skills list` (or `/skill list`) | Show all saved skills with usage counters |
+| `/skill show <label>` | Print one skill's procedure + stats (typo-tolerant via closest_match) |
+| `/skill remove <label>` | Delete a skill from disk |
+
+### Typo Recovery
+
+When the LLM passes a label that doesn't exist (typos, guesses), `recall_skill` uses `closest_match()` to find the nearest existing slug via difflib and returns that skill's procedure with a warning. The `recommended` counter is credited to the *resolved* label, not the input — so `recall_skill('csv_sumary')` still increments `csv_summary` in the stats. The LLM gets useful behavior even when it gets the spelling wrong.
+
+## Cerebellum — Supervised Error Learning
+
+The Cerebellum is Anton's **supervised error-correction system**. It observes every scratchpad cell and learns from the ones that diverge from intent. Brain analog: the cerebellum's classical role is *forward modeling and error correction* — when a motor command is issued, the cerebellum predicts the expected sensory consequences, and when actual feedback arrives, it computes the prediction error and uses it to refine future commands.
+
+For Anton, the "motor command" is a scratchpad cell. Before the cell runs, the LLM declares its intent via the `one_line_description` field on the scratchpad tool. That description IS the forward model — the prediction of what the cell should do. After the cell runs, we have its actual outcome (stdout, stderr, error). The Cerebellum compares the two and, when they diverge meaningfully, encodes a generalizable lesson that future code-generating LLM calls will see.
+
+### Decoupling: Hooks Live in the Dispatcher, Not the Runtime
+
+The Cerebellum operates via two observer hooks called from the scratchpad tool dispatcher (`handle_scratchpad`), NOT from the runtime backend itself:
+
+```
+handle_scratchpad (orchestration layer)
+  ├─ build prelim Cell from tool input
+  ├─ FIRE pre-execute observers ──→ Cerebellum.on_pre_execute (counter)
+  ├─ pad.execute(code, ...)        (pure execution — runtime never sees observers)
+  ├─ FIRE post-execute observers ─→ Cerebellum.on_post_execute (buffer if errored)
+  └─ return formatted result
+```
+
+This decoupling is intentional. `LocalScratchpadRuntime`, `ScratchpadManager`, and any future `RemoteScratchpadRuntime` are **completely hook-agnostic** — they don't import the Cerebellum, they don't have hook attributes, they never call observers. When a remote runtime backend is added, it inherits zero hook code because there is none to inherit. The orchestration layer is the only place where execution and observation meet.
+
+### Cheap Path
+
+Most cells succeed cleanly. The Cerebellum's `on_post_execute` hook checks `cell.error is None and not cell.stderr.strip()` and returns immediately for clean cells — they're never buffered, no LLM call is ever made for them. Only cells that errored or warned trigger the buffer. The cost of running the Cerebellum on a happy-path turn is **zero LLM calls**.
+
+### Batched Per-Turn Diff
+
+When errored cells exist, they accumulate in a buffer across the turn. At end-of-turn, `_schedule_cerebellum_flush()` fires `Cerebellum.flush()` as a fire-and-forget background task. The user gets their reply immediately while the diff runs in parallel:
+
+1. The buffered cells get formatted into a compact post-mortem prompt
+2. One LLM call via `LLMClient.generate_object_code` (the cheap coding model) returns a `_DiffPassResult` Pydantic model with extracted lessons
+3. Each lesson is wrapped as an `Engram` with `kind="lesson"`, `topic="scratchpad"`, `source="consolidation"`, and routed through `Cortex.encode()` — the same path manual lessons and the consolidator already use
+4. Future scratchpad cells see those lessons via the existing `recall_scratchpad_wisdom()` injection into the scratchpad tool description
+
+The cerebellum is a **producer** only — it generates new lesson entries for the existing storage and retrieval pipeline. There's no parallel storage system, no separate `corrections.md` file. Whatever the consolidator and `/memorize` write to, the cerebellum also writes to.
+
+Brain analog: cerebellar plasticity (LTD at parallel-fiber → Purkinje cell synapses) operates in parallel with continued action, never blocking it. Lessons compound silently across turns; future cells avoid traps that earlier cells fell into.
+
+### The Generated Lessons Look Like
+
+```markdown
+- For CSV files with mixed column types, pass low_memory=False to pd.read_csv. <!-- topic:scratchpad source:consolidation ts:2026-04-11 -->
+- Wrap pd.to_datetime() calls in errors='coerce' when the input may contain malformed strings. <!-- topic:scratchpad source:consolidation ts:2026-04-11 -->
+```
+
+These appear in `lessons.md` like any other engram, carry the same metadata, and get pruned by the same compaction loop when memory grows past threshold.
+
+## Structured Output — `LLMClient.generate_object`
+
+Anton has a single primitive for getting structured data out of the LLM, used by the cerebellum, the consolidator, the cortex's identity/compaction passes, the connect collector, the skill drafter, and the custom-datasource flow. It lives at `anton/llm/client.py`:
+
+```python
+async def generate_object(
+    self,
+    schema_class,        # A Pydantic BaseModel subclass, or list[Model]
+    *,
+    system: str,
+    messages: list[dict],
+    max_tokens: int | None = None,
+):
+    """Forced-tool-call structured output via the planning provider."""
+```
+
+There's also a paired `generate_object_code(...)` that uses the cheap *coding* provider — appropriate for fast/cheap structured tasks like the cerebellum's post-mortem and the cortex's identity extraction.
+
+### How It Works
+
+1. The Pydantic model is converted to a JSON schema via `model_json_schema()`
+2. A synthetic tool is built whose `input_schema` is that JSON schema
+3. The LLM provider is called with `tool_choice={"type": "tool", "name": tool_name}` — this *forces* the LLM to call the tool rather than returning text
+4. The tool's input dict is validated through `model_validate()` and returned as a typed instance
+
+### Why It Beats Asking for JSON in Text
+
+| Old pattern (text JSON) | New pattern (`generate_object`) |
+|---|---|
+| "Return ONLY valid JSON, no commentary, no markdown fences" | Forced tool_choice — the LLM cannot return text |
+| Manual `json.loads()` with try/except | Pydantic `model_validate()` with structural validation |
+| Strip markdown fences with regex (`_strip_json_fences`) | Never needed — there's no text response to strip |
+| Defensive `if not isinstance(data, dict): return` checks | Pydantic catches type errors at the schema layer |
+| Field-by-field `.get(key, default)` extraction | Typed attribute access on the validated instance |
+
+### The Shared Helper
+
+The schema-derivation and validation logic lives in exactly one place — `anton/llm/structured.py` — and is shared by both `LLMClient.generate_object` (main process, async) and `_ScratchpadLLM.generate_object` (subprocess bridge, sync). Two pure helper functions:
+
+```python
+def build_structured_tool(schema_class) -> tuple[dict, type, bool]:
+    """Pydantic model → (tool_dict, validator_class, is_list)."""
+
+def unwrap_structured_response(tool_call_input, validator_class, is_list):
+    """LLM tool call input → validated typed Pydantic instance."""
+```
+
+This pattern is what every extraction call site uses. Adding a new one is mechanical: define a Pydantic model with `Field(description=...)` on each field, call `await session._llm.generate_object(MySchema, ...)`, wrap in try/except for graceful degradation. The field descriptions on the Pydantic model double as the LLM's instructions — there's no separate prompt explaining the schema.
+
+### Where It's Used
+
+| Module | Schema | Provider | Purpose |
+|---|---|---|---|
+| `connect_collector.py::extract_variables` | `_ExtractionResult` | planning | Parse free-form credential input into structured fields |
+| `commands/skills.py::handle_skill_save` | `_SkillDraft` | planning | LLM drafts a skill from recent scratchpad work |
+| `commands/datasource.py::handle_add_custom_datasource` | `_CustomDatasourceSpec` | planning | LLM identifies a custom datasource's auth fields |
+| `cortex.py::_compact_file` | `_CompactionResult` | **coding** | Memory deduplication during synaptic homeostasis |
+| `cortex.py::maybe_update_identity` | `_IdentityFacts` | **coding** | Default-mode identity extraction every 5 turns |
+| `consolidator.py::replay_and_extract` | `_ConsolidatedLessons` | **coding** | Sleep-replay extraction of lessons from scratchpad sessions |
+| `cerebellum.py::_run_diff` | `_DiffPassResult` | **coding** | Post-mortem error learning from cell failures |
+
+The split between *planning* and *coding* providers preserves the original intent of each call site — anything that previously used `_llm.code()` now uses `generate_object_code` (cheap, fast model), and anything that previously used `_llm.plan()` now uses `generate_object` (planning model).
+
 ## Concurrency Safety
 
 | Operation | Scope | Strategy |
@@ -486,12 +694,24 @@ Source (user/LLM/consolidation)
 ```
 anton/memory/
 ├── hippocampus.py      Engram + Hippocampus class
-├── cortex.py           Cortex class
+├── cortex.py           Cortex class (executive declarative-memory coordinator)
 ├── episodes.py         Episode + EpisodicMemory class
-├── consolidator.py     Consolidator class
+├── consolidator.py     Consolidator class (sleep-replay → Engrams)
+├── cerebellum.py       Cerebellum class (supervised error learning over scratchpad cells)
+├── skills.py           Skill, SkillStore, SkillStats — procedural memory storage layer
 ├── reconsolidator.py   needs_reconsolidation() + reconsolidate() functions
 ├── learnings.py        [legacy] LearningStore — replaced by Hippocampus
 └── store.py            SessionStore — session history (orthogonal to long-term memory)
+
+anton/llm/
+├── client.py           LLMClient with plan/code/generate_object/generate_object_code
+├── structured.py       build_structured_tool + unwrap_structured_response (shared helper)
+└── ...                 anthropic.py, openai.py, provider.py, prompt_builder.py
+
+anton/tools/
+├── recall_skill.py     RECALL_SKILL_TOOL — the LLM's procedural memory retrieval primitive
+├── tool_handlers.py    handle_scratchpad with pre/post-execute observer firing
+└── ...                 registry.py, tool_defs.py
 ```
 
 ### `hippocampus.py` — Storage Engine
@@ -516,17 +736,17 @@ The Hippocampus handles one scope (global OR project). It doesn't decide what to
 
 ### `cortex.py` — Executive Coordinator
 
-The Cortex manages two Hippocampus instances and orchestrates all memory operations.
+The Cortex manages two Hippocampus instances and orchestrates all declarative memory operations. It is also the encoding endpoint that the cerebellum and consolidator route their generated lessons through.
 
 | Method | Purpose |
 |---|---|
 | `build_memory_context()` | Assemble memories for system prompt injection (~5800 token budget) |
-| `get_scratchpad_context()` | Combine scratchpad wisdom from both scopes for tool description injection |
-| `encode(engrams)` | Route engrams to correct hippocampus by scope. Returns action log. |
+| `get_scratchpad_context()` | Combine scratchpad wisdom from both scopes for tool description injection. **This is the channel the cerebellum's lessons flow through into future code generation.** |
+| `encode(engrams)` | Route engrams to correct hippocampus by scope. Returns action log. Called by `/memorize`, the consolidator, and the cerebellum. |
 | `encoding_gate(engram)` | Check if an engram needs user confirmation (mode-dependent) |
-| `needs_compaction()` | Check if any file exceeds 50 entries |
-| `compact_all()` | LLM-assisted deduplication + merge on all oversized files |
-| `maybe_update_identity(message)` | Extract identity facts from user message (fast model, background) |
+| `needs_compaction()` | Check if any file exceeds the threshold |
+| `compact_all()` | LLM-assisted deduplication + merge on all oversized files. Uses `generate_object_code(_CompactionResult, ...)`. |
+| `maybe_update_identity(message)` | Extract identity facts from user message via `generate_object_code(_IdentityFacts, ...)`. Background, fires every 5 turns. |
 
 ### `episodes.py` — Episodic Memory
 
@@ -546,7 +766,51 @@ The EpisodicMemory handles raw conversation logging and recall.
 | Method | Purpose |
 |---|---|
 | `should_replay(cells)` | Heuristic check: errors, 5+ cells, or cancellations → True |
-| `replay_and_extract(cells, llm)` | Compress cells → fast LLM call → parse JSON → return Engrams |
+| `replay_and_extract(cells, llm)` | Compress cells → `generate_object_code(_ConsolidatedLessons, ...)` → return Engrams |
+
+### `cerebellum.py` — Supervised Error Learning
+
+| Method | Purpose |
+|---|---|
+| `on_pre_execute(cell)` | Pre-execute hook called by `handle_scratchpad`. Counter only in v1. |
+| `on_post_execute(cell)` | Post-execute hook. Cheap path skips clean cells; errored/warning cells get buffered. |
+| `flush()` | Run the batched diff pass on all buffered cells, encode lessons via Cortex, clear buffer. Fire-and-forget at end-of-turn. |
+| `reset()` | Drop the buffer without encoding (used when a turn is cancelled mid-flight). |
+| `buffered_count` | Number of cells waiting for the next flush. |
+| `_run_diff(cells)` | Internal: send buffered cells to `generate_object_code(_DiffPassResult, ...)` and return validated lessons. |
+| `_encode_lessons(lessons)` | Internal: wrap lessons as Engrams (`kind="lesson"`, `topic="scratchpad"`, `source="consolidation"`) and route through `Cortex.encode()`. |
+
+### `skills.py` — Procedural Memory Store
+
+| Method | Purpose |
+|---|---|
+| `SkillStore.list_all()` | Return every loadable skill, sorted by label. |
+| `SkillStore.list_summaries()` | Lightweight listing — `[{"label": "...", "name": "...", "when_to_use": "..."}]`. Used by the prompt builder to inject the procedural-memory section without loading any declarative content. |
+| `SkillStore.load(label)` | Read a single skill by label. Returns None if absent or malformed. |
+| `SkillStore.save(skill)` | Write the skill directory. Creates `meta.json`, `declarative.md`, `stats.json`. Never wipes accumulated counters. |
+| `SkillStore.delete(label)` | Remove a skill directory. |
+| `SkillStore.increment_recommended(label, *, stage)` | Atomic-ish bump of the per-stage `recommended` counter (called by `recall_skill`). |
+| `SkillStore.closest_match(bad_label, *, cutoff=0.6)` | Difflib-based fuzzy match for typo recovery. |
+| `make_unique_label(base, store)` | Generate a slug that doesn't collide with any existing skill (`csv_summary`, `csv_summary_2`, ...). |
+| `slugify(text)` | Normalize arbitrary text into a snake_case identifier. |
+
+### `tools/recall_skill.py` — Procedural Memory Retrieval Tool
+
+The LLM-facing tool that pulls a skill into working memory. Lives alongside the other tool defs but is the only tool whose handler reads `session._skill_store`.
+
+| Element | Purpose |
+|---|---|
+| `RECALL_SKILL_TOOL` | The `ToolDef` registered with the session — name, description, input_schema, handler. |
+| `handle_recall_skill(session, tc_input)` | Resolve the label (with closest_match fallback for typos), increment the per-stage `recommended` counter, return a formatted procedure to the LLM as the tool result. |
+
+### `llm/structured.py` — Shared Schema Helper
+
+Two pure helper functions for forced-tool-call structured output. Used by both `LLMClient.generate_object` (main process, async) and `_ScratchpadLLM.generate_object` (subprocess bridge, sync) — they share this code via lazy imports so neither runtime forces pydantic at module load time.
+
+| Function | Purpose |
+|---|---|
+| `build_structured_tool(schema_class)` | Pydantic model (or `list[Model]`) → `(tool_dict, validator_class, is_list)`. The `tool_dict` is ready to pass as `tools=[...]` with `tool_choice={"type": "tool", "name": tool_dict["name"]}`. |
+| `unwrap_structured_response(tool_call_input, validator_class, is_list)` | Validate the LLM's tool call input via Pydantic and unwrap the wrapper if it was a list. Raises `pydantic.ValidationError` on schema drift. |
 
 ### `reconsolidator.py` — Legacy Migration
 
@@ -557,7 +821,7 @@ The EpisodicMemory handles raw conversation logging and recall.
 
 ## Integration Points in chat.py
 
-The memory system is wired into `ChatSession` and `_chat_loop()`:
+The memory + skills + cerebellum systems are wired into `ChatSession` and `_chat_loop()`:
 
 ```
 1. _chat_loop() startup:
@@ -570,49 +834,74 @@ The memory system is wired into `ChatSession` and `_chat_loop()`:
 2. ChatSession.__init__():
    → Stores cortex as self._cortex
    → Stores episodic as self._episodic
+   → Initializes self._skill_store = SkillStore() (procedural memory)
+   → Initializes self._cerebellum = Cerebellum(cortex=self._cortex, llm=self._llm)
+   → Initializes self._scratchpad_observers = [self._cerebellum]
    → Initializes self._pending_memory_confirmations = []
 
 3. ChatSession._build_system_prompt():
    → Calls cortex.build_memory_context()  →  injected before anton.md
+   → Passes self._skill_store to prompt builder
+   → Builder appends "## Procedural memory" section listing all available skills
 
 4. ChatSession._build_tools():
    → Calls cortex.get_scratchpad_context()  →  appended to scratchpad tool desc
    → Includes MEMORIZE_TOOL in tool list
    → Includes RECALL_TOOL when episodic memory is enabled
+   → Includes RECALL_SKILL_TOOL (always available — no-op if no skills saved)
 
 5. Tool dispatch (tools.py):
    → "memorize" → handle_memorize() → cortex.encode()
    → "recall" → handle_recall() → episodic.recall_formatted()
+   → "recall_skill" → handle_recall_skill() → SkillStore.load() + increment_recommended()
+   → "scratchpad" exec → handle_scratchpad() fires pre/post observers around pad.execute()
 
-6. turn_stream():
+6. handle_scratchpad (tool_handlers.py) — observer dispatch:
+   → Build prelim Cell from tool input (code + description + estimated_time)
+   → _fire_pre_execute(session, prelim_cell) → cerebellum.on_pre_execute (counter)
+   → pad.execute(...) — pure execution, runtime never sees observers
+   → _fire_post_execute(session, cell) → cerebellum.on_post_execute (buffer if errored)
+
+7. turn_stream():
    → Logs user input to episodic memory (before LLM call)
    → Logs assistant response to episodic memory (after LLM call)
 
-7. _stream_and_handle_tools() tool loop:
+8. _stream_and_handle_tools() tool loop:
    → Logs each tool_call to episodic memory
    → Logs each tool_result to episodic memory
    → Logs scratchpad cell output to episodic memory
    → _maybe_consolidate_scratchpads() → background asyncio.create_task
 
-8. After turn (turn_stream):
+9. End of turn (turn / turn_stream):
    → Every 5 turns → cortex.maybe_update_identity() as background task
+   → _schedule_cerebellum_flush() → fire-and-forget background task
+     → Runs cerebellum diff on all buffered cells
+     → Encodes extracted lessons via cortex.encode()
+     → Lessons appear in next turn's scratchpad tool description automatically
+
+10. Before user prompt (_chat_loop):
+    → Show pending memory confirmations → user approves/rejects/picks
 
-9. Before user prompt (_chat_loop):
-   → Show pending memory confirmations → user approves/rejects/picks
+11. Slash commands for skills (chat.py):
+    → /skill save [name hint] → handle_skill_save() → drafts via generate_object → SkillStore.save()
+    → /skills or /skill list → handle_skills_list() → tabular display of skills + counters
+    → /skill show <label> → handle_skill_show() → full procedure + stats
+    → /skill remove <label> → handle_skill_remove() → SkillStore.delete()
 
-10. /setup wizard (sub-menu):
+12. /setup wizard (sub-menu):
     → Option 1: Models — provider, API key, planning & coding models
     → Option 2: Memory — memory mode (autopilot/copilot/off) + episodic toggle
     → Persisted to ANTON_MEMORY_MODE and ANTON_EPISODIC_MEMORY in .anton/.env
 
-11. /memory (read-only dashboard):
+13. /memory (read-only dashboard):
     → Shows semantic memory counts (global/project rules, lessons, topics)
     → Shows episodic memory status (ON/OFF) and session count
     → No configuration prompts — directs to /setup > Memory
 
-12. _rebuild_session():
+14. _rebuild_session():
     → Updates cortex._llm and cortex.mode when settings change
     → Propagates episodic memory instance
+    → Re-creates cerebellum if llm or cortex changed
 ```
 
 ## Context Budget Summary
@@ -624,7 +913,11 @@ The memory system is wired into `ChatSession` and `_chat_loop()`:
 | Project rules | Basal Ganglia | ~1500 tokens | Always (system prompt) |
 | Global lessons | ATL semantics | ~1000 tokens | Always (most recent first) |
 | Project lessons | ATL semantics | ~1000 tokens | Always (most recent first) |
-| Scratchpad wisdom | Procedural memory | ~500 tokens | Scratchpad active (tool desc) |
+| Scratchpad wisdom | Procedural priming | ~500 tokens | Scratchpad active (tool desc). Cerebellum-generated lessons flow through here. |
+| Procedural memory list | Striatum (skill labels) | ~50 tokens per skill (compact list) | Always — when any skills are saved. Full procedures NOT loaded; only labels + when_to_use. |
 | Topic files | Cortical association | Unlimited | On demand |
-| Episodic recall | MTL episodic | Variable | On demand (recall tool) |
-| **Total in prompt** | **Working memory** | **~5800 tokens** | ~3% of 200K context |
+| Skill procedures | Striatum (full skills) | Variable per skill | On demand (`recall_skill` tool) — only when the LLM recognizes a match |
+| Episodic recall | MTL episodic | Variable | On demand (`recall` tool) |
+| **Total in prompt** | **Working memory** | **~5800 tokens + ~50/skill** | ~3% of 200K context |
+
+The procedural memory list scales linearly with the number of saved skills but stays cheap (~50 tokens each — slug + one-line `when_to_use`). The full skill procedures are *paid for only when retrieved*, the same way the prefrontal cortex doesn't keep every procedural memory loaded — it has fast pattern recognition that flags relevance and pulls the full procedure from storage on demand.

From 6bccb7b370dd768f80304278d60d91ec6fce3b87 Mon Sep 17 00:00:00 2001
From: Hamish Fagg <hamish@mindsdb.com>
Date: Mon, 13 Apr 2026 09:59:56 +1200
Subject: [PATCH 088/134] move shares to anton.mindsdb.com

---
 anton/config/settings.py | 2 +-
 anton/publisher.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/anton/config/settings.py b/anton/config/settings.py
index c12d9d29..9cfb3809 100644
--- a/anton/config/settings.py
+++ b/anton/config/settings.py
@@ -67,7 +67,7 @@ class AntonSettings(BaseSettings):
     minds_ssl_verify: bool = True
 
     # Publish service (anton-services API Gateway)
-    publish_url: str = "https://4nton.ai"
+    publish_url: str = "https://anton.mindsdb.com"
 
     @field_validator("minds_ssl_verify", mode="before")
     @classmethod
diff --git a/anton/publisher.py b/anton/publisher.py
index b6bf9738..bcd0b2aa 100644
--- a/anton/publisher.py
+++ b/anton/publisher.py
@@ -24,7 +24,7 @@
 _TEXT_EXTENSIONS = {".html", ".htm", ".js", ".css"}
 
 
-DEFAULT_PUBLISH_URL = "https://4nton.ai"
+DEFAULT_PUBLISH_URL = "https://anton.mindsdb.com"
 
 # Patterns that capture relative paths from HTML attributes and CSS url()
 _REF_PATTERNS = [

From 02c6ff76d87a4012ff9e7fb74434fe137e5d432f Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Mon, 13 Apr 2026 00:04:25 -0500
Subject: [PATCH 089/134] better timers and messaging

---
 anton/chat_ui.py      | 148 ++++++++++++++++++++++++++++++++++--------
 anton/core/session.py |  32 ++++++++-
 tests/test_chat_ui.py |  31 +++++----
 3 files changed, 169 insertions(+), 42 deletions(-)

diff --git a/anton/chat_ui.py b/anton/chat_ui.py
index a50c5e59..11aad507 100644
--- a/anton/chat_ui.py
+++ b/anton/chat_ui.py
@@ -28,14 +28,55 @@ class _ToolActivity:
     eta_str: str = ""
     printed: bool = False  # whether the activity line has been printed
     done: bool = False  # whether execution is complete
-
-
-_TOOL_LABELS: dict[str, str] = {
-    "scratchpad": "Scratchpad",
-    "memorize": "Memory",
-    "recall": "Recall",
+    start_time: float = 0.0  # monotonic timestamp when execution began
+
+
+# Witty one-liners for non-scratchpad tool display. One is picked at
+# random each time the tool fires, so the UI never feels repetitive.
+_TOOL_PHRASES: dict[str, list[str]] = {
+    "recall_skill": [
+        "Pulling up the procedure\u2026",
+        "Recalling the recipe\u2026",
+        "Loading the playbook\u2026",
+        "Reaching into procedural memory\u2026",
+        "Activating muscle memory\u2026",
+    ],
+    "memorize": [
+        "Jotting this down\u2026",
+        "Committing to memory\u2026",
+        "Filing away for later\u2026",
+        "Encoding a new engram\u2026",
+        "Stashing that in long-term storage\u2026",
+    ],
+    "recall": [
+        "Digging through the archives\u2026",
+        "Searching episodic memory\u2026",
+        "Rewinding the tape\u2026",
+        "Scanning past conversations\u2026",
+        "Activating hippocampal recall\u2026",
+    ],
+    "publish_or_preview": [
+        "Preparing the preview\u2026",
+        "Rendering your dashboard\u2026",
+        "Getting things ready to show\u2026",
+        "Spinning up the preview\u2026",
+    ],
+    "connect_new_datasource": [
+        "Setting up the connection\u2026",
+        "Wiring up the datasource\u2026",
+        "Establishing the link\u2026",
+    ],
 }
 
+# Fallback for tools without their own phrase list
+_GENERIC_TOOL_PHRASES = [
+    "On it\u2026",
+    "Working on that\u2026",
+    "Running the tool\u2026",
+    "Processing\u2026",
+    "Executing\u2026",
+]
+
 _MAX_DESC = 60
 
 _REFRESH_FPS = 6
@@ -45,24 +86,27 @@ class _ToolActivity:
 
 
 def _tool_display_text(name: str, input_json: str) -> str:
-    """Map tool name + raw JSON input to a human-readable description."""
-    label = _TOOL_LABELS.get(name, name)
+    """Map tool name + raw JSON input to a human-readable description.
+
+    For scratchpad: return just the description text (no wrapper).
+    For other tools: return a witty random phrase from _TOOL_PHRASES.
+    """
     try:
         data = json.loads(input_json)
     except (json.JSONDecodeError, TypeError):
-        return label
+        data = {}
 
-    desc = ""
     if name == "scratchpad":
         desc = data.get("one_line_description") or data.get("action", "")
-    elif name == "memorize":
-        entries = data.get("entries", [])
-        desc = f"{len(entries)} entry/entries"
-    if desc:
-        if len(desc) > _MAX_DESC:
-            desc = desc[: _MAX_DESC - 1] + "\u2026"
-        return f"{label}({desc})"
-    return label
+        if desc:
+            if len(desc) > _MAX_DESC:
+                desc = desc[: _MAX_DESC - 1] + "\u2026"
+            return desc
+        return "Running code"
+
+    # Non-scratchpad: pick a witty phrase
+    phrases = _TOOL_PHRASES.get(name, _GENERIC_TOOL_PHRASES)
+    return random.choice(phrases)  # noqa: S311
 
 
 THINKING_MESSAGES = [
@@ -270,11 +314,15 @@ def show_tool_execution(self, task: str) -> None:
 
     def on_tool_use_start(self, tool_id: str, name: str) -> None:
         """Track a new tool use."""
+        import time as _time
+
         if not self._active:
             return
         self._in_tool_phase = True
         self._last_was_tool = True
-        activity = _ToolActivity(tool_id=tool_id, name=name)
+        activity = _ToolActivity(
+            tool_id=tool_id, name=name, start_time=_time.monotonic()
+        )
         self._activities.append(activity)
 
     def on_tool_use_delta(self, tool_id: str, json_delta: str) -> None:
@@ -315,11 +363,10 @@ def update_progress(
             return
 
         if phase == "scratchpad_start":
-            # Print the scratchpad activity line NOW (before execution)
+            # Print the scratchpad description line NOW (no estimate — just
+            # the description, since LLM estimates are unreliable).
             for act in reversed(self._activities):
                 if act.name == "scratchpad" and not act.printed:
-                    if eta:
-                        act.eta_str = f"~{int(eta)}s"
                     self._stop_spinner()
                     self._print_activity_line(act)
                     act.printed = True
@@ -358,6 +405,43 @@ def update_progress(
             self._stop_spinner()
             return
 
+        if phase == "tool_start":
+            # Non-scratchpad tool started execution — spinner shows the
+            # witty phrase; the activity line was already printed at
+            # on_tool_use_end with the description.
+            self._line2_status = message
+            self._update_spinner()
+            return
+
+        if phase == "tool_done":
+            # Non-scratchpad tool finished — print ✔ + actual elapsed
+            elapsed = eta if eta else 0
+            for act in reversed(self._activities):
+                if act.name == message and act.printed and not act.done:
+                    act.done = True
+                    self._stop_spinner()
+                    self._print_done_line(act, elapsed)
+                    self._start_spinner()
+                    break
+            return
+
+        if phase == "reasoning_start":
+            # LLM is thinking between tool rounds. Spinner shows a
+            # fresh witty message. The elapsed time will be printed
+            # when reasoning_done arrives.
+            self._line1_fun = random.choice(THINKING_MESSAGES)  # noqa: S311
+            self._line2_status = random.choice(WORKING_FOOTER_MESSAGES)  # noqa: S311
+            self._line3_peek = ""
+            self._update_spinner()
+            return
+
+        if phase == "reasoning_done":
+            elapsed = eta if eta else 0
+            self._stop_spinner()
+            self._print_reasoning_line(elapsed)
+            self._start_spinner()
+            return
+
         label = PHASE_LABELS.get(phase, phase)
         eta_str = f"  ~{int(eta)}s" if eta else ""
         self._line2_status = f"{label}  {message}{eta_str}"
@@ -424,14 +508,18 @@ def _extract_peek(self, text: str) -> str:
         return last
 
     def _print_activity_line(self, act: _ToolActivity) -> None:
-        """Print a single activity line permanently (before execution)."""
+        """Print a single activity line permanently (before execution).
+
+        For scratchpad: just the description text.
+        For other tools: the witty phrase from _tool_display_text.
+        No estimate is shown — only the actual elapsed time is printed
+        later by _print_done_line.
+        """
         line = Text()
-        label = act.description or _TOOL_LABELS.get(act.name, act.name)
+        label = act.description or act.name
         prefix = "\u23bf " if act is self._activities[0] else "  "
         line.append(prefix)
         line.append(label, style="bold")
-        if act.eta_str:
-            line.append(f" {act.eta_str}", style="anton.muted")
         self._console.print(line)
 
     def _print_done_line(self, act: _ToolActivity, elapsed: float) -> None:
@@ -442,6 +530,14 @@ def _print_done_line(self, act: _ToolActivity, elapsed: float) -> None:
         line.append(elapsed_str, style="anton.muted")
         self._console.print(line)
 
+    def _print_reasoning_line(self, elapsed: float) -> None:
+        """Print the LLM's reasoning time between tool rounds."""
+        line = Text()
+        elapsed_str = f"{elapsed:.1f}s" if elapsed >= 1 else f"{int(elapsed * 1000)}ms"
+        line.append("  Reasoning: ", style="anton.muted")
+        line.append(elapsed_str, style="anton.muted")
+        self._console.print(line)
+
 
 class EscapeWatcher:
     """Detect Escape keypress during streaming via cbreak terminal mode."""
diff --git a/anton/core/session.py b/anton/core/session.py
index 2be23167..27cb6dfe 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -949,6 +949,8 @@ async def _stream_and_handle_tools(
                 )
 
                 # Process each tool call
+                import time as _time
+
                 tool_results: list[dict] = []
                 for tc in llm_response.tool_calls:
                     if self._episodic is not None:
@@ -959,6 +961,8 @@ async def _stream_and_handle_tools(
                             tool=tc.name,
                         )
 
+                    _tool_t0 = _time.monotonic()
+
                     try:
                         if tc.name == "scratchpad" and tc.input.get("action") == "exec":
                             # Inline streaming exec — yields progress events
@@ -978,7 +982,6 @@ async def _stream_and_handle_tools(
                                     message=description or "Running code",
                                     eta_seconds=estimated_seconds,
                                 )
-                                import time as _time
 
                                 _sp_t0 = _time.monotonic()
                                 from anton.core.backends.base import Cell
@@ -1044,9 +1047,20 @@ async def _stream_and_handle_tools(
                                 message="Analyzing results...",
                             )
                         else:
+                            # Non-scratchpad, non-interactive tool — track elapsed
+                            yield StreamTaskProgress(
+                                phase="tool_start",
+                                message=tc.name,
+                            )
                             result_text = await self.tool_registry.dispatch_tool(
                                 self, tc.name, tc.input
                             )
+                            _tool_elapsed = _time.monotonic() - _tool_t0
+                            yield StreamTaskProgress(
+                                phase="tool_done",
+                                message=tc.name,
+                                eta_seconds=_tool_elapsed,
+                            )
                             if (
                                 tc.name == "scratchpad"
                                 and tc.input.get("action") == "dump"
@@ -1081,9 +1095,10 @@ async def _stream_and_handle_tools(
 
                 self._history.append({"role": "user", "content": tool_results})
 
-                # Signal that tools are done and LLM is now analyzing
+                # Signal that tools are done and LLM is now reasoning
+                _reasoning_t0 = _time.monotonic()
                 yield StreamTaskProgress(
-                    phase="analyzing", message="Analyzing results..."
+                    phase="reasoning_start", message="Thinking..."
                 )
 
                 # Stream follow-up
@@ -1094,6 +1109,17 @@ async def _stream_and_handle_tools(
                         messages=self._history,
                         tools=tools,
                     ):
+                        # Capture reasoning elapsed on first text or tool event
+                        if _reasoning_t0 and isinstance(
+                            event, (StreamTextDelta, StreamComplete)
+                        ):
+                            _reasoning_elapsed = _time.monotonic() - _reasoning_t0
+                            _reasoning_t0 = 0  # only fire once
+                            yield StreamTaskProgress(
+                                phase="reasoning_done",
+                                message="",
+                                eta_seconds=_reasoning_elapsed,
+                            )
                         yield event
                         if isinstance(event, StreamComplete):
                             response = event
diff --git a/tests/test_chat_ui.py b/tests/test_chat_ui.py
index 539ab1b3..4ed6d141 100644
--- a/tests/test_chat_ui.py
+++ b/tests/test_chat_ui.py
@@ -2,7 +2,7 @@
 
 from unittest.mock import MagicMock, patch
 
-from anton.chat_ui import PHASE_LABELS, StreamDisplay, _tool_display_text
+from anton.chat_ui import PHASE_LABELS, StreamDisplay, _MAX_DESC, _tool_display_text
 
 
 
@@ -111,7 +111,7 @@ def test_json_delta_accumulation(self, MockLive):
         display.on_tool_use_end("tool_1")
 
         act = display._activities[0]
-        assert act.description == "Scratchpad(exec)"
+        assert act.description == "exec"  # no Scratchpad() wrapper
 
     @patch("anton.chat_ui.Live")
     def test_finish_prints_activity_summary(self, MockLive):
@@ -163,39 +163,44 @@ def test_multiple_tool_calls(self, MockLive):
         display.on_tool_use_end("tool_2")
 
         assert len(display._activities) == 2
-        assert display._activities[0].description == "Scratchpad(exec)"
-        assert display._activities[1].description == "Memory(1 entry/entries)"
+        # Scratchpad now shows just the description (no wrapper)
+        assert display._activities[0].description == "exec"
+        # Memorize now shows a witty phrase (random, so just check it's a string)
+        assert display._activities[1].description  # non-empty
 
     def test_malformed_json_fallback(self):
-        # Bad JSON should not crash, falls back to just the label
+        # Bad JSON should not crash — falls back to a default
         result = _tool_display_text("scratchpad", "{broken json")
-        assert result == "Scratchpad"
+        assert result == "Running code"
 
     def test_tool_display_text_truncation(self):
         long_desc = "a" * 100
         result = _tool_display_text("scratchpad", f'{{"one_line_description": "{long_desc}"}}')
-        assert len(result) <= len("Scratchpad()") + 60
-        assert result.endswith("\u2026)")
+        # No wrapper — just the truncated description
+        assert len(result) <= _MAX_DESC
+        assert result.endswith("\u2026")
 
     def test_tool_display_text_unknown_tool(self):
         result = _tool_display_text("some_new_tool", '{"foo": "bar"}')
-        assert result == "some_new_tool"
+        # Unknown tools get a generic phrase from _GENERIC_TOOL_PHRASES
+        assert isinstance(result, str)
+        assert len(result) > 0
 
     def test_scratchpad_display_uses_one_line_description(self):
-        """one_line_description should be preferred over action for scratchpad."""
+        """one_line_description should be used directly (no Scratchpad() wrapper)."""
         result = _tool_display_text(
             "scratchpad",
             '{"action": "exec", "name": "pad", "one_line_description": "Install packages"}',
         )
-        assert result == "Scratchpad(Install packages)"
+        assert result == "Install packages"
 
     def test_scratchpad_display_falls_back_to_action(self):
-        """Without one_line_description, scratchpad should show action."""
+        """Without one_line_description, scratchpad shows the action."""
         result = _tool_display_text(
             "scratchpad",
             '{"action": "exec", "name": "pad"}',
         )
-        assert result == "Scratchpad(exec)"
+        assert result == "exec"
 
     @patch("anton.chat_ui.Live")
     def test_text_routes_to_initial_before_tools(self, MockLive):

From 78bcd52998808ee35ed058ebf6d6b9f69cb2d0cd Mon Sep 17 00:00:00 2001
From: andrew <elkin.andr@gmail.com>
Date: Mon, 13 Apr 2026 11:45:58 +0300
Subject: [PATCH 090/134] replace space at the end of a prompt with invisible
 symbol

---
 anton/chat.py         |  2 +-
 anton/cli.py          |  3 ++-
 anton/utils/prompt.py | 15 ++++++++-------
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 71251d2c..1642ddb0 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -2309,7 +2309,7 @@ def _bottom_toolbar():
                 from anton.channel.theme import get_palette as _gp
                 _you_color = _gp().user_prompt
                 user_input = await prompt_session.prompt_async(
-                    [(f"bold fg:{_you_color}", "you>"), ("", " ")]
+                    [(f"bold fg:{_you_color}", "you>"), ("", '\u2009')]
                 )
             except EOFError:
                 break
diff --git a/anton/cli.py b/anton/cli.py
index 7a26df6a..bb4b31b7 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -569,7 +569,7 @@ def _on_esc(event):
     def _toolbar():
         return HTML("<style fg='#ff69b4'>\u23f5\u23f5 Esc to go back</style>")
 
-    suffix = f" ({default}): " if default else ": "
+    suffix = f" ({default}):" if default else ":"
     session: PromptSession[str] = PromptSession(
         mouse_support=False,
         bottom_toolbar=_toolbar,
@@ -585,6 +585,7 @@ def _toolbar():
     except RuntimeError:
         in_async = False
 
+    suffix = suffix + '\u2009'
     if in_async:
         # We're inside an async context (e.g. /setup from chat loop)
         # Run prompt_toolkit in a thread to avoid nested event loop conflict
diff --git a/anton/utils/prompt.py b/anton/utils/prompt.py
index dbb6fa79..14f91bd6 100644
--- a/anton/utils/prompt.py
+++ b/anton/utils/prompt.py
@@ -87,18 +87,18 @@ def _toolbar():
     opts_text = choices_display or ("/".join(choices) if choices else "")
 
     if password:
-        suffix = " (hidden): "
+        suffix = " (hidden):"
     elif opts_text and default:
         suffix = (
             f" <b><ansimagenta>[{opts_text}]</ansimagenta></b>"
-            f" <b><ansicyan>({default})</ansicyan></b>: "
+            f" <b><ansicyan>({default})</ansicyan></b>:"
         )
     elif opts_text:
-        suffix = f" <b><ansimagenta>[{opts_text}]</ansimagenta></b>: "
+        suffix = f" <b><ansimagenta>[{opts_text}]</ansimagenta></b>:"
     elif default:
-        suffix = f" <b><ansicyan>({default})</ansicyan></b>: "
+        suffix = f" <b><ansicyan>({default})</ansicyan></b>:"
     else:
-        suffix = ": "
+        suffix = ":"
 
     pt_session: PromptSession[str] = PromptSession(
         mouse_support=False,
@@ -111,11 +111,12 @@ def _toolbar():
     from anton.channel.theme import get_palette as _get_palette
     _prompt_color = _get_palette().prompt
 
+    space = '\u2009'
     if label.startswith("(anton) "):
         body = label[len("(anton) "):]
-        message = HTML(f"<b><style fg='{_prompt_color}'>(anton)</style></b> {body}{suffix}")
+        message = HTML(f"<b><style fg='{_prompt_color}'>(anton)</style></b> {body}{suffix}{space}")
     else:
-        message = HTML(f"{label}{suffix}")
+        message = HTML(f"{label}{suffix}{space}")
 
     while True:
         _esc = False

From b96354297e8ff5eb20f9d8d7cd187ba44d416d67 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Thu, 9 Apr 2026 12:51:43 -0700
Subject: [PATCH 091/134] added the interface for accessing coding creds

---
 anton/core/llm/anthropic.py |  7 +++++++
 anton/core/llm/openai.py    | 15 +++++++++++++++
 anton/core/llm/provider.py  | 24 ++++++++++++++++++++++++
 3 files changed, 46 insertions(+)

diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
index 264e4f1f..6c6114d8 100644
--- a/anton/core/llm/anthropic.py
+++ b/anton/core/llm/anthropic.py
@@ -9,6 +9,7 @@
     ContextOverflowError,
     LLMProvider,
     LLMResponse,
+    ProviderConnectionInfo,
     StreamComplete,
     StreamEvent,
     StreamTextDelta,
@@ -22,12 +23,18 @@
 
 
 class AnthropicProvider(LLMProvider):
+    name: str = "anthropic"
+
     def __init__(self, api_key: str | None = None) -> None:
+        self._api_key = api_key
         kwargs = {}
         if api_key:
             kwargs["api_key"] = api_key
         self._client = anthropic.AsyncAnthropic(**kwargs)
 
+    def export_connection_info(self) -> ProviderConnectionInfo:
+        return ProviderConnectionInfo(provider=self.name, api_key=self._api_key)
+
     async def complete(
         self,
         *,
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
index c9425a3f..29c08313 100644
--- a/anton/core/llm/openai.py
+++ b/anton/core/llm/openai.py
@@ -9,6 +9,7 @@
     ContextOverflowError,
     LLMProvider,
     LLMResponse,
+    ProviderConnectionInfo,
     StreamComplete,
     StreamEvent,
     StreamTextDelta,
@@ -194,12 +195,18 @@ def build_chat_completion_kwargs(
 
 
 class OpenAIProvider(LLMProvider):
+    name: str = "openai"
+
     def __init__(
         self,
         api_key: str | None = None,
         base_url: str | None = None,
         ssl_verify: bool = True,
     ) -> None:
+        self._api_key = api_key
+        self._base_url = base_url
+        self._ssl_verify = ssl_verify
+
         import httpx
 
         kwargs = {}
@@ -211,6 +218,14 @@ def __init__(
             kwargs["http_client"] = httpx.AsyncClient(verify=False)
         self._client = openai.AsyncOpenAI(**kwargs)
 
+    def export_connection_info(self) -> ProviderConnectionInfo:
+        return ProviderConnectionInfo(
+            provider=self.name,
+            api_key=self._api_key,
+            base_url=self._base_url,
+            ssl_verify=self._ssl_verify,
+        )
+
     async def complete(
         self,
         *,
diff --git a/anton/core/llm/provider.py b/anton/core/llm/provider.py
index 02bf93de..ef22692e 100644
--- a/anton/core/llm/provider.py
+++ b/anton/core/llm/provider.py
@@ -134,7 +134,23 @@ class TokenLimitExceeded(Exception):
     """Raised when the LLM returns 429 due to billing/token limits."""
 
 
+@dataclass
+class ProviderConnectionInfo:
+    """Serializable provider connection details.
+
+    `api_key` is marked repr=False to reduce accidental leakage via logs/debugging.
+    """
+
+    provider: str
+    api_key: str | None = field(default=None, repr=False)
+    base_url: str | None = None
+    ssl_verify: bool | None = None
+
+
 class LLMProvider(ABC):
+    # Human-readable provider id (e.g. "anthropic", "openai-compatible").
+    name: str = ""
+
     @abstractmethod
     async def complete(
         self,
@@ -147,6 +163,14 @@ async def complete(
         max_tokens: int = 4096,
     ) -> LLMResponse: ...
 
+    def export_connection_info(self) -> ProviderConnectionInfo:
+        """Return provider connection details for other runtimes (e.g. scratchpad).
+
+        Providers should override this to expose the minimal needed configuration
+        without relying on SDK client internals.
+        """
+        return ProviderConnectionInfo(provider=self.name)
+
     async def stream(
         self,
         *,

From 723008d7350c603abc6c7265fd5750b7f1d9be7f Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Thu, 9 Apr 2026 12:52:41 -0700
Subject: [PATCH 092/134] removed extra coding params from session

---
 anton/chat.py         |  3 ---
 anton/chat_session.py |  3 ---
 anton/core/session.py | 15 ++++++++-------
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 62976f01..de1c70af 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -998,9 +998,6 @@ async def _chat_loop(
         runtime_context=runtime_context,
         workspace=workspace,
         console=console,
-        coding_provider=settings.coding_provider,
-        coding_api_key=coding_api_key,
-        coding_base_url=settings.openai_base_url or "",
         history_store=history_store,
         session_id=current_session_id,
         proactive_dashboards=settings.proactive_dashboards,
diff --git a/anton/chat_session.py b/anton/chat_session.py
index 9c8b5e5d..257a4f8b 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -93,9 +93,6 @@ def rebuild_session(
         runtime_context=runtime_context,
         workspace=workspace,
         console=console,
-        coding_provider=settings.coding_provider,
-        coding_api_key=api_key,
-        coding_base_url=settings.openai_base_url or "",
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
diff --git a/anton/core/session.py b/anton/core/session.py
index 27cb6dfe..8ebc8bb8 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -67,9 +67,6 @@ class ChatSessionConfig:
     runtime_context: str = ""
     workspace: Workspace | None = None
     console: Console | None = None
-    coding_provider: str = "anthropic"
-    coding_api_key: str = ""
-    coding_base_url: str = ""
     initial_history: list[dict] | None = None
     history_store: HistoryStore | None = None
     session_id: str | None = None
@@ -113,13 +110,17 @@ def __init__(self, config: ChatSessionConfig) -> None:
         self._cancel_event = asyncio.Event()
         self._escape_watcher: EscapeWatcher | None = None
         self._active_datasource: str | None = None
+
+        coding_provider = config.llm_client.coding_provider
+        coding_conn = coding_provider.export_connection_info()
         self._scratchpads = ScratchpadManager(
-            coding_provider=config.coding_provider,
-            coding_model=getattr(config.llm_client, "coding_model", ""),
-            coding_api_key=config.coding_api_key,
-            coding_base_url=config.coding_base_url,
+            coding_provider=coding_conn.provider,
+            coding_model=config.llm_client.coding_model,
+            coding_api_key=coding_conn.api_key or "",
+            coding_base_url=coding_conn.base_url or "",
             workspace_path=config.workspace.base if config.workspace else None,
         )
+
         self.tool_registry = ToolRegistry()
         # Procedural memory: brain-inspired skills (Stage 1 = declarative).
         # Lives at ~/.anton/skills/<label>/. The recall_skill tool retrieves

From 5b1a5c36e87f9d59b27ec68b59ce67f978a5c330 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Thu, 9 Apr 2026 12:54:48 -0700
Subject: [PATCH 093/134] removed unused code

---
 anton/chat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index de1c70af..3d291ca7 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -985,11 +985,6 @@ async def _chat_loop(
     # Build runtime context so the LLM knows what it's running on
     runtime_context = build_runtime_context(settings)
 
-    coding_api_key = (
-        settings.anthropic_api_key
-        if settings.coding_provider == "anthropic"
-        else settings.openai_api_key
-    ) or ""
     session = ChatSession(ChatSessionConfig(
         llm_client=state["llm_client"],
         self_awareness=self_awareness,

From 06ce4221d8d8468899b33eeae8303c84fd1a22c3 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Thu, 9 Apr 2026 22:42:45 -0700
Subject: [PATCH 094/134] fixed broken import

---
 anton/minds_client.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/anton/minds_client.py b/anton/minds_client.py
index a0df7372..3215e4cc 100644
--- a/anton/minds_client.py
+++ b/anton/minds_client.py
@@ -13,13 +13,12 @@
 import ssl
 import urllib.error
 import urllib.request
-from pathlib import Path
 from typing import TYPE_CHECKING
 
 from anton.core.llm.openai import build_chat_completion_kwargs
 
 if TYPE_CHECKING:
-    from anton.settings import AntonSettings
+    from anton.config.settings import AntonSettings
 
 
 def minds_request(

From eb2ab19ba6eb0faa581eb3075f09334d6cecfc55 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Fri, 10 Apr 2026 00:01:58 -0700
Subject: [PATCH 095/134] moved data vault and registry to core

---
 anton/chat.py                                       | 4 ++--
 anton/commands/datasource.py                        | 4 ++--
 anton/connect_collector.py                          | 7 +++++--
 anton/core/datasources/__init__.py                  | 0
 anton/{ => core/datasources}/data_vault.py          | 0
 anton/{ => core/datasources}/datasource_registry.py | 2 +-
 anton/{config => core/datasources}/datasources.md   | 0
 anton/tools.py                                      | 2 +-
 anton/utils/datasources.py                          | 6 +++---
 tests/test_connect_collector.py                     | 6 +++++-
 tests/test_datasource.py                            | 4 ++--
 11 files changed, 21 insertions(+), 14 deletions(-)
 create mode 100644 anton/core/datasources/__init__.py
 rename anton/{ => core/datasources}/data_vault.py (100%)
 rename anton/{ => core/datasources}/datasource_registry.py (98%)
 rename anton/{config => core/datasources}/datasources.md (100%)

diff --git a/anton/chat.py b/anton/chat.py
index 3d291ca7..189118af 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -69,11 +69,11 @@
     list_datasources,
     test_llm,
 )
-from anton.data_vault import DataVault
+from anton.core.datasources.data_vault import DataVault
 from anton.utils.datasources import (
     register_secret_vars,
 )
-from anton.datasource_registry import (
+from anton.core.datasources.datasource_registry import (
     DatasourceRegistry,
 )
 
diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
index cc606dff..24ec65a8 100644
--- a/anton/commands/datasource.py
+++ b/anton/commands/datasource.py
@@ -15,8 +15,8 @@
 from rich.padding import Padding
 
 from anton.connect_collector import ConnectionCollector, extract_variables
-from anton.data_vault import DataVault
-from anton.datasource_registry import (
+from anton.core.datasources.data_vault import DataVault
+from anton.core.datasources.datasource_registry import (
     AuthMethod,
     DatasourceEngine,
     DatasourceField,
diff --git a/anton/connect_collector.py b/anton/connect_collector.py
index d351c1a5..c209138f 100644
--- a/anton/connect_collector.py
+++ b/anton/connect_collector.py
@@ -20,10 +20,13 @@
 
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
-
 from pydantic import BaseModel, Field
 
-from anton.datasource_registry import AuthMethod, DatasourceEngine, DatasourceField
+from anton.core.datasources.datasource_registry import (
+    AuthMethod,
+    DatasourceEngine,
+    DatasourceField,
+)
 
 if TYPE_CHECKING:
     from rich.console import Console
diff --git a/anton/core/datasources/__init__.py b/anton/core/datasources/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/anton/data_vault.py b/anton/core/datasources/data_vault.py
similarity index 100%
rename from anton/data_vault.py
rename to anton/core/datasources/data_vault.py
diff --git a/anton/datasource_registry.py b/anton/core/datasources/datasource_registry.py
similarity index 98%
rename from anton/datasource_registry.py
rename to anton/core/datasources/datasource_registry.py
index e932807e..1b0206b2 100644
--- a/anton/datasource_registry.py
+++ b/anton/core/datasources/datasource_registry.py
@@ -123,7 +123,7 @@ def _parse_file(
 class DatasourceRegistry:
     """Parsed registry of all available data source engines."""
 
-    _BUILTIN_PATH: Path = Path(__file__).parent / "config" / "datasources.md"
+    _BUILTIN_PATH: Path = Path(__file__).resolve().parent / "datasources.md"
     _USER_PATH: Path = Path("~/.anton/datasources.md").expanduser()
 
     def __init__(self) -> None:
diff --git a/anton/config/datasources.md b/anton/core/datasources/datasources.md
similarity index 100%
rename from anton/config/datasources.md
rename to anton/core/datasources/datasources.md
diff --git a/anton/tools.py b/anton/tools.py
index bd1f2505..f3c97692 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -31,7 +31,7 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     )
 
     from anton.commands.datasource import handle_connect_datasource
-    from anton.data_vault import DataVault
+    from anton.core.datasources.data_vault import DataVault
 
     # Check which connections exist before
     vault = DataVault()
diff --git a/anton/utils/datasources.py b/anton/utils/datasources.py
index 7dd7943d..fd18a933 100644
--- a/anton/utils/datasources.py
+++ b/anton/utils/datasources.py
@@ -5,11 +5,11 @@
 import yaml
 from typing import TYPE_CHECKING
 
-from anton.data_vault import DataVault, _slug_env_prefix
-from anton.datasource_registry import DatasourceRegistry, _YAML_BLOCK_RE
+from anton.core.datasources.data_vault import DataVault, _slug_env_prefix
+from anton.core.datasources.datasource_registry import DatasourceRegistry, _YAML_BLOCK_RE
 
 if TYPE_CHECKING:
-    from anton.datasource_registry import DatasourceEngine
+    from anton.core.datasources.datasource_registry import DatasourceEngine
 
 # DS_* var names whose values are known to be secret (passwords, tokens, keys).
 # Populated at startup and after each successful connect.
diff --git a/tests/test_connect_collector.py b/tests/test_connect_collector.py
index 5a426907..cb938366 100644
--- a/tests/test_connect_collector.py
+++ b/tests/test_connect_collector.py
@@ -10,7 +10,11 @@
     _ExtractionResult,
     extract_variables,
 )
-from anton.datasource_registry import AuthMethod, DatasourceEngine, DatasourceField
+from anton.core.datasources.datasource_registry import (
+    AuthMethod,
+    DatasourceEngine,
+    DatasourceField,
+)
 
 
 def _postgres_engine() -> DatasourceEngine:
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index b26e4205..9d2da8b1 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -32,8 +32,8 @@
     parse_connection_slug,
 )
 from anton.cli import app as cli_app
-from anton.data_vault import DataVault, _slug_env_prefix
-from anton.datasource_registry import (
+from anton.core.datasources.data_vault import DataVault, _slug_env_prefix
+from anton.core.datasources.datasource_registry import (
     DatasourceEngine,
     DatasourceRegistry,
     _parse_file,

From e3e6b267f6c2982e90f9629f80d8a5fc16034cb8 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Fri, 10 Apr 2026 00:02:21 -0700
Subject: [PATCH 096/134] fixed handling non-string env var injection

---
 anton/core/datasources/data_vault.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/core/datasources/data_vault.py b/anton/core/datasources/data_vault.py
index 61f55cc3..e5e75229 100644
--- a/anton/core/datasources/data_vault.py
+++ b/anton/core/datasources/data_vault.py
@@ -115,7 +115,7 @@ def inject_env(self, engine: str, name: str, *, flat: bool = False) -> list[str]
             prefix = _slug_env_prefix(engine, name)
             for key, value in fields.items():
                 var = f"{prefix}__{key.upper()}"
-                os.environ[var] = value
+                os.environ[var] = value if isinstance(value, str) else str(value)
                 var_names.append(var)
         return var_names
 

From 025a41d1057e9bd1c3280f5de25811b24bca9eb2 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Fri, 10 Apr 2026 00:21:51 -0700
Subject: [PATCH 097/134] enabled data_vault to be passed to session

---
 anton/core/session.py      | 5 ++++-
 anton/utils/datasources.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index 8ebc8bb8..fecc4be2 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -5,6 +5,7 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 
+from anton.core.datasources.data_vault import DataVault
 from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
 from anton.core.memory.cerebellum import Cerebellum
 from anton.core.memory.skills import SkillStore
@@ -66,6 +67,7 @@ class ChatSessionConfig:
     episodic: EpisodicMemory | None = None
     runtime_context: str = ""
     workspace: Workspace | None = None
+    data_vault: DataVault | None = None
     console: Console | None = None
     initial_history: list[dict] | None = None
     history_store: HistoryStore | None = None
@@ -95,6 +97,7 @@ def __init__(self, config: ChatSessionConfig) -> None:
         self._extra_tools = config.tools
         self._output_dir = config.output_dir
         self._workspace = config.workspace
+        self._data_vault = config.data_vault
         self._console = config.console
         self._history: list[dict] = (
             list(config.initial_history) if config.initial_history else []
@@ -289,7 +292,7 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
             md_context = self._workspace.build_anton_md_context()
 
         # Inject connected datasource context without credentials
-        ds_ctx = build_datasource_context(active_only=self._active_datasource)
+        ds_ctx = build_datasource_context(self._data_vault, active_only=self._active_datasource)
 
         # Ensure the registry is populated before we extract tool prompts.
         self._build_tools()
diff --git a/anton/utils/datasources.py b/anton/utils/datasources.py
index fd18a933..1767bd11 100644
--- a/anton/utils/datasources.py
+++ b/anton/utils/datasources.py
@@ -105,7 +105,7 @@ def scrub_credentials(text: str) -> str:
     return text
 
 
-def build_datasource_context(active_only: str | None = None) -> str:
+def build_datasource_context(vault: DataVault, active_only: str | None = None) -> str:
     """Build a system-prompt section listing available DS_* env vars by name.
 
     Shows the LLM what data sources are connected and which environment
@@ -114,7 +114,7 @@ def build_datasource_context(active_only: str | None = None) -> str:
     If active_only is set, only the matching slug is included.
     """
     try:
-        vault = DataVault()
+        vault = vault or DataVault()
         conns = vault.list_connections()
     except Exception:
         return ""

From b6da9eed63601f77d141fca9f33ad5d90b572f58 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Fri, 10 Apr 2026 00:25:29 -0700
Subject: [PATCH 098/134] updated path given to vault dir

---
 anton/utils/datasources.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/utils/datasources.py b/anton/utils/datasources.py
index 1767bd11..63ebe1d5 100644
--- a/anton/utils/datasources.py
+++ b/anton/utils/datasources.py
@@ -125,7 +125,7 @@ def build_datasource_context(vault: DataVault, active_only: str | None = None) -
         "Credentials are pre-injected as namespaced DS_<ENGINE_NAME>__<FIELD> "
         "environment variables. Use them directly in scratchpad code "
         "(e.g. DS_POSTGRES_PROD_DB__HOST). "
-        "Never read ~/.anton/data_vault/ files directly.\n"
+        f"Never read {str(vault.vault_dir)} files directly.\n"
     )
     for c in conns:
         slug = f"{c['engine']}-{c['name']}"

From 103995ea85c51345719fa313fec2f9673a3270cb Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 16:52:19 -0700
Subject: [PATCH 099/134] fixed the default jungle for scratchpad runtimes

---
 anton/core/backends/base.py  | 13 +++++++------
 anton/core/backends/local.py | 12 ++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/anton/core/backends/base.py b/anton/core/backends/base.py
index 51a267de..93f57bbf 100644
--- a/anton/core/backends/base.py
+++ b/anton/core/backends/base.py
@@ -6,9 +6,8 @@
 
 from __future__ import annotations
 
-import json
 from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 
 
@@ -37,17 +36,19 @@ def __init__(
         self,
         name: str,
         *,
+        coding_provider: str,
+        coding_model: str,
+        coding_api_key: str,
+        coding_base_url: str,
         cells: list[Cell] | None = None,
-        coding_provider: str = "anthropic",
-        coding_model: str = "",
-        coding_api_key: str = "",
         workspace_path: Path | None = None,
     ) -> None:
         self.name = name
-        self.cells: list[Cell] = cells if cells is not None else []
         self._coding_provider = coding_provider
         self._coding_model = coding_model
         self._coding_api_key = coding_api_key
+        self._coding_base_url = coding_base_url
+        self.cells: list[Cell] = cells if cells is not None else []
         self._workspace_path = workspace_path or Path("~/.anton").expanduser()
         self._installed_packages: set[str] = set()
 
diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index bed8801e..e3c7bed1 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -46,23 +46,23 @@ def __init__(
         self,
         name: str,
         *,
+        coding_provider: str,
+        coding_model: str,
+        coding_api_key: str,
+        coding_base_url: str,
         cells: list[Cell] | None = None,
-        coding_provider: str = "anthropic",
-        coding_model: str = "",
-        coding_api_key: str = "",
-        coding_base_url: str = "",
         workspace_path: Path | None = None,
         _venvs_base: Path | None = None,
     ) -> None:
         super().__init__(
             name,
-            cells=cells,
             coding_provider=coding_provider,
             coding_model=coding_model,
             coding_api_key=coding_api_key,
+            coding_base_url=coding_base_url,
+            cells=cells,
             workspace_path=workspace_path,
         )
-        self._coding_base_url = coding_base_url
         self._proc: asyncio.subprocess.Process | None = None
         self._boot_path: str | None = None
         self._venv_dir: str | None = None

From 966a4f86a4fc21601db1e531c57ea4411a0b9f58 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 17:05:05 -0700
Subject: [PATCH 100/134] introduced the runtime factory protocol

---
 anton/core/backends/base.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/anton/core/backends/base.py b/anton/core/backends/base.py
index 93f57bbf..c45a1147 100644
--- a/anton/core/backends/base.py
+++ b/anton/core/backends/base.py
@@ -9,6 +9,7 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Protocol
 
 
 @dataclass
@@ -234,3 +235,17 @@ def _compact_cells(self) -> bool:
         )
         self.cells = [summary_cell] + recent
         return True
+
+
+class ScratchpadRuntimeFactory(Protocol):
+    def __call__(
+        self,
+        *,
+        name: str,
+        cells: list[Cell] | None,
+        coding_provider: str,
+        coding_model: str,
+        coding_api_key: str,
+        coding_base_url: str,
+        workspace_path: Path | None,
+    ) -> ScratchpadRuntime: ...
\ No newline at end of file

From d66e7761d0ffd5b2cea2e88943fc9b4cb5a59534 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 17:16:17 -0700
Subject: [PATCH 101/134] incorporated runtime factory to backend
 initialization

---
 anton/core/backends/local.py   | 21 +++++++++++++++++++++
 anton/core/backends/manager.py | 18 +++++++++++-------
 anton/core/session.py          |  5 +++++
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index e3c7bed1..67562716 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -682,3 +682,24 @@ def _kill_tree(self) -> None:
                     pass
         else:
             self._proc.kill()
+
+
+def local_scratchpad_runtime_factory(
+    *,
+    name: str,
+    cells: list[Cell] | None,
+    coding_provider: str,
+    coding_model: str,
+    coding_api_key: str,
+    coding_base_url: str,
+    workspace_path: Path | None,
+) -> ScratchpadRuntime:
+    return LocalScratchpadRuntime(
+        name=name,
+        cells=cells,
+        coding_provider=coding_provider,
+        coding_model=coding_model,
+        coding_api_key=coding_api_key,
+        coding_base_url=coding_base_url,
+        workspace_path=workspace_path,
+    )
diff --git a/anton/core/backends/manager.py b/anton/core/backends/manager.py
index eff9334c..f1d7d7fe 100644
--- a/anton/core/backends/manager.py
+++ b/anton/core/backends/manager.py
@@ -4,8 +4,7 @@
 
 from pathlib import Path
 
-from anton.core.backends.base import ScratchpadRuntime
-from anton.core.backends.local import LocalScratchpadRuntime
+from anton.core.backends.base import Cell, ScratchpadRuntime, ScratchpadRuntimeFactory
 
 
 class ScratchpadManager:
@@ -13,17 +12,21 @@ class ScratchpadManager:
 
     def __init__(
         self,
-        coding_provider: str = "anthropic",
-        coding_model: str = "",
-        coding_api_key: str = "",
-        coding_base_url: str = "",
+        runtime_factory: ScratchpadRuntimeFactory,
+        coding_provider: str,
+        coding_model: str,
+        coding_api_key: str,
+        coding_base_url: str,
+        cells: list[Cell] | None = None,
         workspace_path: Path | None = None,
     ) -> None:
         self._pads: dict[str, ScratchpadRuntime] = {}
+        self._runtime_factory = runtime_factory
         self._coding_provider = coding_provider
         self._coding_model = coding_model
         self._coding_api_key = coding_api_key
         self._coding_base_url = coding_base_url
+        self._cells = cells
         self._workspace_path = workspace_path
         self._available_packages: list[str] = self.probe_packages()
 
@@ -47,8 +50,9 @@ def probe_packages() -> list[str]:
     async def get_or_create(self, name: str) -> ScratchpadRuntime:
         """Return existing pad or create + start a new one."""
         if name not in self._pads:
-            pad = LocalScratchpadRuntime(
+            pad = self._runtime_factory(
                 name=name,
+                cells=self._cells,
                 coding_provider=self._coding_provider,
                 coding_model=self._coding_model,
                 coding_api_key=self._coding_api_key,
diff --git a/anton/core/session.py b/anton/core/session.py
index fecc4be2..ac8bc3f5 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -5,6 +5,8 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING
 
+from anton.core.backends.base import Cell, ScratchpadRuntimeFactory
+from anton.core.backends.local import local_scratchpad_runtime_factory
 from anton.core.datasources.data_vault import DataVault
 from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
 from anton.core.memory.cerebellum import Cerebellum
@@ -61,6 +63,8 @@ class ChatSessionConfig:
     """
 
     llm_client: LLMClient
+    runtime_factory: ScratchpadRuntimeFactory = field(default_factory=local_scratchpad_runtime_factory)
+    cells: list[Cell] | None = None
     settings: CoreSettings | None = None
     self_awareness: SelfAwarenessContext | None = None
     cortex: Cortex | None = None
@@ -117,6 +121,7 @@ def __init__(self, config: ChatSessionConfig) -> None:
         coding_provider = config.llm_client.coding_provider
         coding_conn = coding_provider.export_connection_info()
         self._scratchpads = ScratchpadManager(
+            runtime_factory=config.runtime_factory,
             coding_provider=coding_conn.provider,
             coding_model=config.llm_client.coding_model,
             coding_api_key=coding_conn.api_key or "",

From 8813ac632387eea9856a95fd00587121608f8838 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 19:00:36 -0700
Subject: [PATCH 102/134] introduced utilities for backends

---
 anton/core/backends/local.py | 16 ++--------------
 anton/core/backends/utils.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 14 deletions(-)
 create mode 100644 anton/core/backends/utils.py

diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index 67562716..0416f6b7 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -19,24 +19,12 @@
     RESULT_START,
 )
 from anton.core.settings import CoreSettings
+from anton.core.backends.utils import compute_timeouts
 
 _BOOT_SCRIPT_PATH = Path(__file__).parent / "scratchpad_boot.py"
 _MAX_OUTPUT = 10_000
 
 
-def _compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
-    """Compute (total_timeout, inactivity_timeout) from an estimated run time.
-
-    Reads defaults from CoreSettings so they're tunable via env vars.
-    """
-    s = CoreSettings()
-    if estimated_seconds <= 0:
-        return float(s.cell_timeout_default), float(s.cell_inactivity_timeout)
-    total = max(estimated_seconds * 2, estimated_seconds + 30)
-    inactivity = max(estimated_seconds * 0.5, 30)
-    return float(total), float(inactivity)
-
-
 class LocalScratchpadRuntime(ScratchpadRuntime):
     """Runs scratchpad cells in a persistent per-named venv subprocess."""
 
@@ -444,7 +432,7 @@ async def execute_streaming(
         self._proc.stdin.write(payload.encode())  # type: ignore[union-attr]
         await self._proc.stdin.drain()  # type: ignore[union-attr]
 
-        total_timeout, inactivity_timeout = _compute_timeouts(estimated_seconds)
+        total_timeout, inactivity_timeout = compute_timeouts(estimated_seconds)
 
         try:
             result_data: dict | None = None
diff --git a/anton/core/backends/utils.py b/anton/core/backends/utils.py
new file mode 100644
index 00000000..07cd1796
--- /dev/null
+++ b/anton/core/backends/utils.py
@@ -0,0 +1,14 @@
+from anton.core.settings import CoreSettings
+
+
+def compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
+    """Compute (total_timeout, inactivity_timeout) from an estimated run time.
+
+    Reads defaults from CoreSettings so they're tunable via env vars.
+    """
+    s = CoreSettings()
+    if estimated_seconds <= 0:
+        return float(s.cell_timeout_default), float(s.cell_inactivity_timeout)
+    total = max(estimated_seconds * 2, estimated_seconds + 30)
+    inactivity = max(estimated_seconds * 0.5, 30)
+    return float(total), float(inactivity)
\ No newline at end of file

From 3c2413325fb942252b0f64bfa672ebbf9a25e74f Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 19:11:28 -0700
Subject: [PATCH 103/134] fixed local runtime factory

---
 anton/core/backends/local.py | 4 ++--
 anton/core/session.py        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/anton/core/backends/local.py b/anton/core/backends/local.py
index 0416f6b7..66020742 100644
--- a/anton/core/backends/local.py
+++ b/anton/core/backends/local.py
@@ -675,19 +675,19 @@ def _kill_tree(self) -> None:
 def local_scratchpad_runtime_factory(
     *,
     name: str,
-    cells: list[Cell] | None,
     coding_provider: str,
     coding_model: str,
     coding_api_key: str,
     coding_base_url: str,
+    cells: list[Cell] | None,
     workspace_path: Path | None,
 ) -> ScratchpadRuntime:
     return LocalScratchpadRuntime(
         name=name,
-        cells=cells,
         coding_provider=coding_provider,
         coding_model=coding_model,
         coding_api_key=coding_api_key,
         coding_base_url=coding_base_url,
+        cells=cells,
         workspace_path=workspace_path,
     )
diff --git a/anton/core/session.py b/anton/core/session.py
index ac8bc3f5..1325d03e 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -63,7 +63,7 @@ class ChatSessionConfig:
     """
 
     llm_client: LLMClient
-    runtime_factory: ScratchpadRuntimeFactory = field(default_factory=local_scratchpad_runtime_factory)
+    runtime_factory: ScratchpadRuntimeFactory = field(default=local_scratchpad_runtime_factory)
     cells: list[Cell] | None = None
     settings: CoreSettings | None = None
     self_awareness: SelfAwarenessContext | None = None

From 914f873fde7c430b577e9f244d2943a949b387d7 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 19:31:35 -0700
Subject: [PATCH 104/134] fixed vault dir error

---
 anton/utils/datasources.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/utils/datasources.py b/anton/utils/datasources.py
index 63ebe1d5..70a63141 100644
--- a/anton/utils/datasources.py
+++ b/anton/utils/datasources.py
@@ -125,7 +125,7 @@ def build_datasource_context(vault: DataVault, active_only: str | None = None) -
         "Credentials are pre-injected as namespaced DS_<ENGINE_NAME>__<FIELD> "
         "environment variables. Use them directly in scratchpad code "
         "(e.g. DS_POSTGRES_PROD_DB__HOST). "
-        f"Never read {str(vault.vault_dir)} files directly.\n"
+        "Never read the data vault files directly.\n"
     )
     for c in conns:
         slug = f"{c['engine']}-{c['name']}"

From 6c2e9f70f86697bcb15be63e7b439dbfd5c5f9cb Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Mon, 13 Apr 2026 22:17:54 -0700
Subject: [PATCH 105/134] added structured system prompt ctx instead of run ctx

---
 anton/chat.py                    |  3 ++-
 anton/chat_session.py            |  3 ++-
 anton/core/llm/prompt_builder.py | 34 ++++++++++++++++++++++++++++----
 anton/core/session.py            |  8 ++++----
 4 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 189118af..84a95e02 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -18,6 +18,7 @@
     save_clipboard_image,
 )
 from anton.core.session import ChatSession, ChatSessionConfig
+from anton.core.llm.prompt_builder import SystemPromptContext
 from anton.core.llm.provider import (
     TokenLimitExceeded,
     StreamComplete,
@@ -990,7 +991,7 @@ async def _chat_loop(
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
-        runtime_context=runtime_context,
+        system_prompt_context=SystemPromptContext(runtime_context=runtime_context),
         workspace=workspace,
         console=console,
         history_store=history_store,
diff --git a/anton/chat_session.py b/anton/chat_session.py
index 257a4f8b..4ee6e387 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -8,6 +8,7 @@
 from rich.console import Console
 
 from anton.config.settings import AntonSettings
+from anton.core.llm.prompt_builder import SystemPromptContext
 from anton.minds_client import refresh_knowledge
 
 if TYPE_CHECKING:
@@ -90,7 +91,7 @@ def rebuild_session(
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
-        runtime_context=runtime_context,
+        system_prompt_context=SystemPromptContext(runtime_context=runtime_context),
         workspace=workspace,
         console=console,
         history_store=history_store,
diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index 93d179cb..4fda0843 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING
 
@@ -15,6 +16,21 @@
     from anton.core.tools.tool_defs import ToolDef
 
 
+@dataclass(frozen=True)
+class SystemPromptContext:
+    """Bundled prompt-injection points for the system prompt.
+
+    Three levels with increasing importance (later = stronger influence):
+      1. ``prefix``  — prepended before the base prompt
+      2. ``runtime_context`` — interpolated into the RUNTIME IDENTITY section
+      3. ``suffix``  — appended after all other sections
+    """
+
+    runtime_context: str = ""
+    prefix: str = ""
+    suffix: str = ""
+
+
 class ChatSystemPromptBuilder:
     """
     Build Anton's chat system prompt from core components.
@@ -109,7 +125,7 @@ def build(
         self,
         *,
         current_datetime: str,
-        runtime_context: str,
+        system_prompt_context: SystemPromptContext,
         proactive_dashboards: bool,
         output_dir: str,
         tool_defs: list["ToolDef"] | None = None,
@@ -126,8 +142,14 @@ def build(
             output_path=output_path,
         )
 
-        prompt = CHAT_SYSTEM_PROMPT.format(
-            runtime_context=runtime_context,
+        prompt = ""
+
+        prefix = system_prompt_context.prefix.strip()
+        if prefix:
+            prompt += f"{prefix}\n\n"
+
+        prompt += CHAT_SYSTEM_PROMPT.format(
+            runtime_context=system_prompt_context.runtime_context,
             visualizations_section=visualizations_section,
             current_datetime=current_datetime,
         )
@@ -149,7 +171,11 @@ def build(
         if procedural_memory:
             prompt += procedural_memory
 
+        suffix = system_prompt_context.suffix.strip()
+        if suffix:
+            prompt += f"\n\n{suffix}"
+
         return prompt
 
 
-__all__ = ["ChatSystemPromptBuilder"]
+__all__ = ["ChatSystemPromptBuilder", "SystemPromptContext"]
diff --git a/anton/core/session.py b/anton/core/session.py
index 1325d03e..af0f156d 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -8,7 +8,7 @@
 from anton.core.backends.base import Cell, ScratchpadRuntimeFactory
 from anton.core.backends.local import local_scratchpad_runtime_factory
 from anton.core.datasources.data_vault import DataVault
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.cerebellum import Cerebellum
 from anton.core.memory.skills import SkillStore
 from anton.core.tools.recall_skill import RECALL_SKILL_TOOL
@@ -69,7 +69,7 @@ class ChatSessionConfig:
     self_awareness: SelfAwarenessContext | None = None
     cortex: Cortex | None = None
     episodic: EpisodicMemory | None = None
-    runtime_context: str = ""
+    system_prompt_context: SystemPromptContext = field(default_factory=SystemPromptContext)
     workspace: Workspace | None = None
     data_vault: DataVault | None = None
     console: Console | None = None
@@ -96,7 +96,7 @@ def __init__(self, config: ChatSessionConfig) -> None:
         self._self_awareness = config.self_awareness
         self._cortex = config.cortex
         self._episodic = config.episodic
-        self._runtime_context = config.runtime_context
+        self._system_prompt_context = config.system_prompt_context
         self._proactive_dashboards = config.proactive_dashboards
         self._extra_tools = config.tools
         self._output_dir = config.output_dir
@@ -306,7 +306,7 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
         prompt = prompt_builder.build(
             output_dir=self._output_dir,
             current_datetime=_current_datetime,
-            runtime_context=self._runtime_context,
+            system_prompt_context=self._system_prompt_context,
             proactive_dashboards=self._proactive_dashboards,
             tool_defs=self.tool_registry.get_tool_defs(),
             memory_context=memory_section,

From b597619f1bf6147f95d5d807297f96b81c5f62e8 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:53:41 +0200
Subject: [PATCH 106/134] Rmv old ds

---
 anton/commands/datasource.py | 1493 ----------------------------------
 1 file changed, 1493 deletions(-)
 delete mode 100644 anton/commands/datasource.py

diff --git a/anton/commands/datasource.py b/anton/commands/datasource.py
deleted file mode 100644
index 24ec65a8..00000000
--- a/anton/commands/datasource.py
+++ /dev/null
@@ -1,1493 +0,0 @@
-"""Slash-command handlers for datasource commands."""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-import uuid
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from pydantic import BaseModel, Field
-from rich.console import Console
-from rich.markdown import Markdown
-from rich.padding import Padding
-
-from anton.connect_collector import ConnectionCollector, extract_variables
-from anton.core.datasources.data_vault import DataVault
-from anton.core.datasources.datasource_registry import (
-    AuthMethod,
-    DatasourceEngine,
-    DatasourceField,
-    DatasourceRegistry,
-)
-from anton.utils.datasources import (
-    register_secret_vars,
-    remove_engine_block,
-    restore_namespaced_env,
-    parse_connection_slug,
-)
-from anton.utils.prompt import prompt_or_cancel
-from anton.core.backends.manager import ScratchpadManager
-
-if TYPE_CHECKING:
-    from anton.chat import ChatSession
-
-
-# ─────────────────────────────────────────────────────────────────────────────
-# LLM-facing schema (Pydantic) for handle_add_custom_datasource
-# ─────────────────────────────────────────────────────────────────────────────
-
-
-class _CustomDatasourceField(BaseModel):
-    """One credential field in a custom-datasource spec."""
-
-    name: str = Field(
-        ...,
-        description=(
-            "snake_case field name (e.g. 'host', 'api_key'). Must be a "
-            "valid Python identifier; this becomes both the on-disk key "
-            "and the env var suffix (DS_<NAME>)."
-        ),
-    )
-    value: str = Field(
-        default="",
-        description=(
-            "Inline value if the user already provided one in their "
-            "description, otherwise empty string."
-        ),
-    )
-    secret: bool = Field(
-        default=False,
-        description=(
-            "True if the field is sensitive (passwords, API keys, "
-            "tokens) — affects how it's stored and prompted for."
-        ),
-    )
-    required: bool = Field(
-        default=True,
-        description="True if the connection cannot be tested without this field.",
-    )
-    description: str = Field(
-        default="",
-        description=(
-            "One-line description shown to the user when prompting "
-            "for this field."
-        ),
-    )
-
-
-class _CustomDatasourceSpec(BaseModel):
-    """Structured output of the LLM call in handle_add_custom_datasource."""
-
-    display_name: str = Field(
-        ...,
-        description="Human-readable name for the service (e.g. 'GitHub API').",
-    )
-    pip: str = Field(
-        default="",
-        description=(
-            "pip-installable package name (or space-separated names) "
-            "needed to interact with this service. Empty string if no "
-            "extra package is required (e.g. plain HTTPS via stdlib)."
-        ),
-    )
-    test_snippet: str = Field(
-        default="",
-        description=(
-            "Python code that tests the connection using os.environ "
-            "vars DS_FIELDNAME (uppercase field name with DS_ prefix) "
-            "and prints 'ok' on success. Empty string if untestable."
-        ),
-    )
-    fields: list[_CustomDatasourceField] = Field(
-        default_factory=list,
-        description=(
-            "Credential fields the user will need to provide. List in "
-            "the order they should be prompted."
-        ),
-    )
-
-_PROMPT_RECONNECT_CANCEL = "(reconnect/cancel)"
-
-
-def handle_list_data_sources(console: Console) -> None:
-    """Print all saved Local Vault connections in a table with status."""
-    from rich.table import Table
-
-    vault = DataVault()
-    registry = DatasourceRegistry()
-    conns = vault.list_connections()
-    console.print()
-    if not conns:
-        console.print("[anton.muted]No data sources connected yet.[/]")
-        console.print("[anton.muted]Use /connect to add one.[/]")
-        console.print()
-        return
-
-    table = Table(title="Local Vault — Saved Connections", show_lines=False)
-    table.add_column("Name", style="bold")
-    table.add_column("Source")
-    table.add_column("Status")
-
-    for c in conns:
-        slug = f"{c['engine']}-{c['name']}"
-        engine_def = registry.get(c["engine"])
-        source = engine_def.display_name if engine_def else c["engine"]
-        fields = vault.load(c["engine"], c["name"]) or {}
-
-        if not fields:
-            status = "[yellow]incomplete[/]"
-        elif engine_def and engine_def.auth_method != "choice":
-            required = [f.name for f in engine_def.fields if f.required]
-            missing = [name for name in required if name not in fields]
-            status = "[yellow]incomplete[/]" if missing else "[green]saved[/]"
-        else:
-            status = "[green]saved[/]"
-
-        table.add_row(slug, source, status)
-
-    console.print(table)
-    console.print()
-
-
-async def handle_remove_data_source(console: Console, slug: str) -> None:
-    """Delete a connection from the Local Vault by slug (engine-name)."""
-    vault = DataVault()
-    registry = DatasourceRegistry()
-
-    if not slug:
-        connections = vault.list_connections()
-        if not connections:
-            console.print("[anton.muted]No saved connections to remove.[/]")
-            console.print()
-            return
-        console.print()
-        console.print("[anton.cyan](anton)[/] Which connection do you want to remove?\n")
-        for i, c in enumerate(connections, 1):
-            conn_slug = f"{c['engine']}-{c['name']}"
-            engine_def = registry.get(c["engine"])
-            label = engine_def.display_name if engine_def else c["engine"]
-            console.print(f"          [bold]{i:>2}.[/bold] {conn_slug} [dim]({label})[/]")
-        console.print()
-        choices = [str(i) for i in range(1, len(connections) + 1)]
-        pick = await prompt_or_cancel("(anton) Enter a number", choices=choices)
-        if pick is None:
-            console.print("[anton.muted]Cancelled.[/]")
-            console.print()
-            return
-        picked = connections[int(pick) - 1]
-        slug = f"{picked['engine']}-{picked['name']}"
-
-    parsed = parse_connection_slug(slug, [e.engine for e in registry.all_engines()], vault=vault)
-    if parsed is None:
-        console.print(
-            f"[anton.warning]Invalid name '{slug}'. Use engine-name format.[/]"
-        )
-        console.print()
-        return
-    engine, name = parsed
-    if vault.load(engine, name) is None:
-        console.print(f"[anton.warning]No connection '{slug}' found.[/]")
-        console.print()
-        return
-
-    confirm = await prompt_or_cancel(
-        f"(anton) Remove '{slug}' from Local Vault?",
-        choices=["y", "n"], default="n",
-    )
-    if confirm is not None and confirm.strip().lower() == "y":
-        vault.delete(engine, name)
-        restore_namespaced_env(vault)
-        engine_def = registry.get(engine)
-        if engine_def is not None and engine_def.custom:
-            remaining = [
-                c for c in vault.list_connections() if c["engine"] == engine
-            ]
-            if not remaining:
-                user_path = DatasourceRegistry._USER_PATH
-                if user_path.is_file():
-                    updated = remove_engine_block(
-                        user_path.read_text(encoding="utf-8"), engine
-                    )
-                    user_path.write_text(updated, encoding="utf-8")
-                    registry.reload()
-        console.print(f"[anton.success]Removed {slug}.[/]")
-    else:
-        console.print("[anton.muted]Cancelled.[/]")
-    console.print()
-
-
-async def show_credential_help(
-    console: Console,
-    session: "ChatSession",
-    service_name: str,
-    current_field,
-    all_fields: list,
-) -> None:
-    """Use the LLM to explain how to obtain credentials."""
-    field_descriptions = ", ".join(
-        f"{f.name} ({f.description})" for f in all_fields
-    )
-    storage_note = (
-        "The credentials will be stored securely in Anton's Local Vault — "
-        "do NOT suggest storage tips, password managers, or safe-keeping advice."
-    )
-    if current_field is not None:
-        prompt = (
-            f"I'm connecting to {service_name} and need to provide: {field_descriptions}\n\n"
-            f"I need help with the '{current_field.name}' field"
-            f" ({current_field.description}).\n\n"
-            "Give me a brief step-by-step guide on where and how to get this credential. "
-            f"Be concise — numbered steps, no fluff. {storage_note}"
-        )
-        heading = f"[anton.cyan](anton)[/] How to get [bold]{current_field.name}[/]:"
-    else:
-        prompt = (
-            f"I'm connecting to {service_name} and need these credentials: {field_descriptions}\n\n"
-            "Give me a brief step-by-step guide on where and how to obtain each of these. "
-            f"Be concise — numbered steps, no fluff. {storage_note}"
-        )
-        heading = f"[anton.cyan](anton)[/] How to get credentials for [bold]{service_name}[/]:"
-
-    console.print()
-    console.print("[anton.muted]        Looking up instructions…[/]")
-
-    try:
-        resp = await session._llm.plan(
-            system="You are a helpful assistant that guides users through obtaining credentials for services.",
-            messages=[
-                {
-                    "role": "user",
-                    "content": prompt,
-                }
-            ],
-            max_tokens=512,
-        )
-        help_text = (resp.content or "").strip()
-    except Exception:
-        help_text = "Sorry, couldn't fetch help right now. Try checking the service's documentation."
-
-    console.print()
-    console.print(heading)
-    console.print()
-    console.print(Padding(Markdown(help_text), (0, 0, 0, 8)))
-    console.print()
-
-
-async def run_connection_test(
-    console: "Console",
-    scratchpads: "ScratchpadManager",
-    vault: "DataVault",
-    engine_def: "DatasourceEngine",
-    credentials: dict[str, str],
-    retry_fields: "list[DatasourceField]",
-) -> bool:
-    """Inject flat DS_* vars, run engine_def.test_snippet, restore env.
-
-    Returns True on success, False if the user declines retry after failure.
-    Mutates credentials in-place when the user re-enters secrets on retry.
-    """
-    while True:
-        console.print()
-        console.print("[anton.cyan](anton)[/] Got it. Testing connection…")
-
-        vault.clear_ds_env()
-        for key, value in credentials.items():
-            os.environ[f"DS_{key.upper()}"] = value
-        register_secret_vars(engine_def)  # flat mode, for scrubbing during test
-
-        try:
-            pad = await scratchpads.get_or_create("__datasource_test__")
-            await pad.reset()
-            if engine_def.pip:
-                if isinstance(engine_def.pip, list):
-                    pip_pkgs = engine_def.pip
-                else:
-                    # Split space-separated package strings into individual packages
-                    pip_pkgs = engine_def.pip.split()
-                install_result = await pad.install_packages(pip_pkgs)
-                if "failed" in (install_result or "").lower():
-                    console.print()
-                    console.print(f"[anton.warning](anton)[/] Package install issue: {install_result[:200]}")
-
-            # Run the test, retry up to 2 times on ModuleNotFoundError
-            cell = None
-            for attempt in range(3):
-                cell = await pad.execute(engine_def.test_snippet)
-                if cell.error and "ModuleNotFoundError" in cell.error:
-                    # Extract the missing module and try to install it
-                    match = re.search(r"No module named '([^']+)'", cell.error)
-                    if match:
-                        missing = match.group(1).split(".")[0]
-                        await pad.install_packages([missing])
-                        continue
-                break
-        finally:
-            restore_namespaced_env(vault)
-
-        if cell.error or (cell.stdout.strip() != "ok" and cell.stderr.strip()):
-            error_text = cell.error or cell.stderr.strip() or cell.stdout.strip()
-            last_line = next(
-                (ln for ln in reversed(error_text.splitlines()) if ln.strip()), error_text
-            )
-            console.print()
-            console.print("[anton.warning](anton)[/] ✗ Connection failed.")
-            console.print()
-            console.print(f"        Error: {last_line}")
-            console.print()
-            retry = await prompt_or_cancel(
-                "(anton) Would you like to re-enter your credentials?",
-                choices=["y", "n"], default="n",
-            )
-            if retry is None or retry.strip().lower() != "y":
-                return False
-            console.print()
-            for f in retry_fields:
-                if not f.secret:
-                    continue
-                value = await prompt_or_cancel(f"(anton) {f.name}", password=True)
-                if value is None:
-                    return False
-                if value:
-                    credentials[f.name] = value
-            continue
-
-        console.print("[anton.success]        ✓ Connected successfully![/]")
-        return True
-
-
-async def handle_add_custom_datasource(
-    console: Console,
-    name: str,
-    registry,
-    session: "ChatSession",
-    *,
-    known_service: bool = False,
-):
-    """Ask for the tool name, use the LLM to identify required fields, then collect credentials."""
-
-    console.print()
-    if name:
-        tool_name = name
-    else:
-        tool_name = await prompt_or_cancel(
-            "(anton) What is the name of the tool or service?",
-        )
-        if not tool_name or not tool_name.strip():
-            return None
-        tool_name = tool_name.strip()
-
-    if known_service:
-        # LLM already recognised this service — skip the auth question
-        user_answer = ""
-        console.print("[anton.muted]        Working out the connection details…[/]")
-    else:
-        user_answer = await prompt_or_cancel(
-            f"(anton) How do you authenticate with {tool_name}? "
-            "Describe what credentials you have (don't paste actual values)",
-        )
-        if not user_answer or not user_answer.strip():
-            return None
-        console.print()
-        console.print("[anton.muted]    Got it — working out the connection details…[/]")
-
-    llm_prompt = f"The user wants to connect to {repr(tool_name)}."
-    if user_answer:
-        llm_prompt += f" They said: {user_answer}"
-    else:
-        llm_prompt += " Determine the standard authentication fields for this service."
-    llm_prompt += (
-        "\n\nReturn the connection spec following the schema you've been given. "
-        "For test_snippet, write Python that uses os.environ['DS_<FIELDNAME>'] "
-        "vars (uppercase, DS_ prefix) and prints 'ok' on success."
-    )
-
-    try:
-        spec: _CustomDatasourceSpec = await session._llm.generate_object(
-            _CustomDatasourceSpec,
-            system="You are a data source connection expert.",
-            messages=[{"role": "user", "content": llm_prompt}],
-            max_tokens=1024,
-        )
-    except Exception:
-        console.print(
-            "[anton.warning]        Couldn't identify connection details. Try again.[/]"
-        )
-        console.print()
-        return None
-
-    test_snippet = spec.test_snippet.strip()
-    fields: list[DatasourceField] = []
-    for f in spec.fields:
-        if not f.name:
-            continue
-        fields.append(
-            DatasourceField(
-                name=f.name,
-                required=f.required,
-                secret=f.secret,
-                description=f.description,
-            )
-        )
-
-    if not fields:
-        console.print("[anton.warning]    Couldn't identify any connection fields.[/]")
-        console.print()
-        return None
-
-    display_name = spec.display_name or name
-    pip_pkg = spec.pip
-
-    # Show summary
-    console.print()
-    console.print("      [bold]── What I'll save ──────────────────────────[/]")
-    credentials: dict[str, str] = {}
-    for f, raw in zip(fields, spec.fields):
-        inline_value = (raw.value or "").strip()
-        if f.secret and inline_value:
-            console.print(
-                f"        • [bold]{f.name:<14}[/] (secret — provided, stored securely)"
-            )
-            credentials[f.name] = inline_value
-        elif f.secret:
-            console.print(
-                f"        • [bold]{f.name:<14}[/] (secret — I'll ask for this)"
-            )
-        else:
-            val_display = inline_value or "[anton.muted]<to be collected>[/]"
-            console.print(f"        • [bold]{f.name:<14}[/] {val_display}")
-            if inline_value:
-                credentials[f.name] = inline_value
-    console.print()
-
-    # Offer help before collecting credentials
-    help_answer = await prompt_or_cancel(
-        "(anton) Do you need instructions on how to obtain these credentials?",
-        choices=["y", "n"], default="n",
-    )
-    if help_answer is None:
-        return None
-    if help_answer.strip().lower() == "y":
-        await show_credential_help(
-            console, session, display_name, None, fields,
-        )
-
-    # Prompt for any secret fields not provided inline
-    for f, raw in zip(fields, spec.fields):
-        if not f.secret:
-            continue
-        if (raw.value or "").strip():
-            continue
-        value = await prompt_or_cancel(f"(anton) {f.name}", password=True)
-        if value is None:
-            return None
-        if value:
-            credentials[f.name] = value
-
-    # Prompt for any required non-secret fields not provided inline
-    for f, raw in zip(fields, spec.fields):
-        if f.secret:
-            continue
-        if not f.required:
-            continue
-        if f.name in credentials:
-            continue
-        value = await prompt_or_cancel(f"(anton) {f.name}")
-        if value is None:
-            return None
-        if value:
-            credentials[f.name] = value
-
-    # Offer to collect optional non-secret fields
-    for f, raw in zip(fields, spec.fields):
-        if f.secret or f.required or f.name in credentials:
-            continue
-        value = await prompt_or_cancel(f"(anton) {f.name} (optional — press Enter to skip)")
-        if value is None:
-            return None
-        if value:
-            credentials[f.name] = value
-
-    if not credentials:
-        console.print("[anton.warning]        No credentials collected. Aborting.[/]")
-        console.print()
-        return None
-
-    # Build engine slug and write definition to ~/.anton/datasources.md
-    slug = re.sub(r"[^\w]", "_", display_name.lower()).strip("_")
-    field_lines = "\n".join(
-        f"  - {{ name: {f.name}, required: {str(f.required).lower()}, "
-        f'secret: {str(f.secret).lower()}, description: "{f.description}" }}'
-        for f in fields
-    )
-    test_snippet_yaml = ""
-    if test_snippet:
-        indented = "\n".join(f"  {line}" for line in test_snippet.splitlines())
-        test_snippet_yaml = f"test_snippet: |\n{indented}\n"
-
-    yaml_block = (
-        f"\n---\n\n## {display_name}\n"
-        "```yaml\n"
-        f"engine: {slug}\n"
-        f"display_name: {display_name}\n"
-        + (f"pip: {pip_pkg}\n" if pip_pkg else "")
-        + f"fields:\n{field_lines}\n"
-        + test_snippet_yaml
-        + "```\n"
-    )
-    user_ds_path = Path("~/.anton/datasources.md").expanduser()
-    tmp_path = user_ds_path.with_suffix(".tmp")
-
-    # Write to temp, validate it parses, then rename atomically
-    existing = (
-        user_ds_path.read_text(encoding="utf-8") if user_ds_path.is_file() else ""
-    )
-
-    existing = remove_engine_block(existing, slug)
-
-    tmp_path.write_text(existing + yaml_block, encoding="utf-8")
-
-    parsed = registry.validate_file(tmp_path)
-    if slug in parsed:
-        import shutil
-
-        shutil.move(str(tmp_path), str(user_ds_path))
-    else:
-        tmp_path.unlink(missing_ok=True)
-        console.print(
-            "[anton.warning]Could not validate engine definition — "
-            "credentials saved but engine not written to datasources.md.[/]"
-        )
-
-    registry.reload()
-    engine_def = registry.get(slug)
-    if engine_def is None:
-        # Fallback: construct inline so the flow can continue even if parse failed
-        engine_def = DatasourceEngine(
-            engine=slug,
-            display_name=display_name,
-            pip=pip_pkg,
-            fields=fields,
-            test_snippet=test_snippet,
-        )
-
-    # All required fields must be present before the caller saves credentials
-    missing_required = [f.name for f in fields if f.required and f.name not in credentials]
-    if missing_required:
-        console.print(
-            "[anton.warning]    Cannot save — missing required fields: "
-            f"{', '.join(missing_required)}. Aborting.[/]"
-        )
-        console.print()
-        return None
-
-    return engine_def, credentials
-
-
-async def _reconnect_to_saved(
-    console: Console,
-    session: "ChatSession",
-    vault: "DataVault",
-    registry: "DatasourceRegistry",
-    slug: str,
-    conn: dict,
-    *,
-    from_tool_call: bool = False,
-) -> "ChatSession":
-    """Inject env for a saved connection and mark it as the active datasource."""
-    restore_namespaced_env(vault)
-    session._active_datasource = slug
-    recon_engine_def = registry.get(conn["engine"])
-    if recon_engine_def:
-        register_secret_vars(recon_engine_def, engine=conn["engine"], name=conn["name"])
-        engine_label = recon_engine_def.display_name
-    else:
-        engine_label = conn["engine"]
-    console.print()
-    console.print(
-        f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
-    )
-    console.print()
-    if not from_tool_call:
-        # When invoked via the LLM tool call, we must not append to
-        # session._history here — it would land between a tool_use and
-        # its tool_result. The tool wrapper returns a fresh message.
-        session._history.append(
-            {
-                "role": "assistant",
-                "content": (
-                    f'I\'ve reconnected to the {engine_label} connection "{slug}" '
-                    f"in the Local Vault. I can now query this data source when needed."
-                ),
-            }
-        )
-    return session
-
-
-def _build_redirect_message(
-    collector: ConnectionCollector,
-    user_message: str,
-    target_engine: str | None = None,
-) -> str:
-    """Build a structured REDIRECT message for the main agent.
-
-    Returns a string describing what was collected so far, what's still
-    missing, and what the user said. The caller decides where to put it
-    (session history for slash-command path, or tool-result return for
-    the LLM tool-call path — never both, to keep tool_use/tool_result
-    ordering intact).
-    """
-    collector.redirect_message = user_message.strip()
-    payload = collector.to_redirect_result()
-    parts = [
-        f"REDIRECT during {payload['engine_display']} connection setup.",
-        f"Collected so far: {json.dumps(payload['collected_variables'])}.",
-    ]
-    if payload["missing_required"]:
-        parts.append(
-            f"Still missing: {', '.join(payload['missing_required'])}."
-        )
-    if target_engine:
-        parts.append(f"User wants to switch to: {target_engine}.")
-    parts.append(f'User said: "{collector.redirect_message}".')
-    parts.append(
-        "Decide what to do next — you may call connect_new_datasource "
-        "again with the correct engine and pass known_variables to "
-        "pre-fill what's already collected."
-    )
-    return " ".join(parts)
-
-
-async def handle_connect_datasource(
-    console: Console,
-    scratchpads: ScratchpadManager,
-    session: "ChatSession",
-    datasource_name: str | None = None,
-    prefill: str | None = None,
-    known_variables: dict[str, str] | None = None,
-    from_tool_call: bool = False,
-) -> "ChatSession":
-    """
-    Connect a data source by entering credentials, either for a new name or re-entering for an existing one.
-
-    `known_variables` may pre-fill credential fields (e.g. when called as a
-    tool by the LLM, which may have already extracted host/port/etc. from
-    the conversation).
-
-    `from_tool_call=True` when invoked via the LLM `connect_new_datasource`
-    tool. In that case we must NOT append assistant messages to
-    `session._history` — we are sitting between a `tool_use` block and its
-    `tool_result` block, and appending messages there violates the
-    Anthropic API invariant. The tool wrapper builds its own return
-    message from the vault diff instead.
-    """
-
-    vault = DataVault()
-    registry = DatasourceRegistry()
-
-    if datasource_name is not None:
-        parsed = parse_connection_slug(
-            datasource_name, [e.engine for e in registry.all_engines()], vault=vault
-        )
-        if parsed is None:
-            console.print(
-                f"[anton.warning]Invalid slug '{datasource_name}'. "
-                "Expected format: engine-name.[/]"
-            )
-            console.print()
-            return session
-        edit_engine, edit_name = parsed
-        existing = vault.load(edit_engine, edit_name)
-        if existing is None:
-            console.print(
-                f"[anton.warning]No connection '{datasource_name}' found in Local Vault.[/]"
-            )
-            console.print()
-            return session
-        engine_def = registry.get(edit_engine)
-        if engine_def is None:
-            console.print(
-                f"[anton.warning]Unknown engine '{edit_engine}'. "
-                "Cannot update credentials.[/]"
-            )
-            console.print()
-            return session
-
-        console.print()
-        console.print(
-            f"[anton.cyan](anton)[/] Editing [bold]\"{datasource_name}\"[/bold]"
-            f" ({engine_def.display_name})."
-        )
-        console.print("[anton.muted]        Press Enter to keep the current value.[/]")
-        console.print()
-
-        # Detect which fields to present (handle auth_method=choice)
-        active_fields = engine_def.fields
-        if engine_def.auth_method == "choice" and engine_def.auth_methods:
-            for am in engine_def.auth_methods:
-                am_field_names = {af.name for af in am.fields}
-                if any(k in am_field_names for k in existing):
-                    active_fields = am.fields
-                    break
-            if not active_fields:
-                active_fields = engine_def.auth_methods[0].fields
-
-        # Start from existing values; let user update field-by-field
-        credentials: dict[str, str] = dict(existing)
-        for f in active_fields:
-            current = existing.get(f.name, "")
-            field_label = f"(anton) {f.name}"
-            if not f.required:
-                field_label += " (optional)"
-
-            if f.secret:
-                masked = "••••••••" if current else ""
-                label = f"{field_label} [{masked}]" if masked else field_label
-                value = await prompt_or_cancel(label, password=True)
-                if value is None:
-                    return session
-                if value:
-                    credentials[f.name] = value
-                # else: keep existing (already in credentials)
-            elif current:
-                value = await prompt_or_cancel(
-                    f"{field_label}",
-                    default=current,
-                )
-                if value is None:
-                    return session
-                credentials[f.name] = value if value else current
-            elif f.default:
-                value = await prompt_or_cancel(
-                    f"{field_label}",
-                    default=f.default,
-                )
-                if value is None:
-                    return session
-                if value:
-                    credentials[f.name] = value
-            else:
-                value = await prompt_or_cancel(field_label)
-                if value is None:
-                    return session
-                if value:
-                    credentials[f.name] = value
-
-        if engine_def.test_snippet:
-            if not await run_connection_test(
-                console, scratchpads, vault, engine_def, credentials, active_fields
-            ):
-                return session
-
-        vault.save(edit_engine, edit_name, credentials)
-        restore_namespaced_env(vault)
-        register_secret_vars(engine_def, engine=edit_engine, name=edit_name)
-        console.print()
-        console.print(
-            f'        Credentials updated for [bold]"{datasource_name}"[/bold].'
-        )
-        console.print()
-        console.print(
-            "[anton.muted]        You can now ask me questions about your data.[/]"
-        )
-        console.print()
-        if not from_tool_call:
-            session._history.append(
-                {
-                    "role": "assistant",
-                    "content": (
-                        f"I've updated the credentials for the {engine_def.display_name} connection "
-                        f'"{datasource_name}" in the Local Vault.'
-                    ),
-                }
-            )
-        return session
-
-    console.print()
-    all_engines = registry.all_engines()
-    popular_engines = [e for e in all_engines if e.popular and not e.custom]
-    other_engines = [e for e in all_engines if not e.popular and not e.custom]
-    custom_engines = [e for e in all_engines if e.custom]
-    display_engines = popular_engines + other_engines + custom_engines
-
-    saved_connections = vault.list_connections()
-    # Build deduplicated list of engine types from saved connections (one per engine)
-    seen_engines: set[str] = set()
-    recent_engine_entries: list[tuple[str, str]] = []  # (engine_slug, display_name)
-    for c in saved_connections:
-        if c["engine"] not in seen_engines:
-            seen_engines.add(c["engine"])
-            engine_obj = registry.get(c["engine"])
-            label = engine_obj.display_name if engine_obj else c["engine"]
-            recent_engine_entries.append((c["engine"], label))
-
-    def print_sections() -> None:
-        console.print(
-            "[anton.cyan](anton)[/] Select a data source to create a new connection:\n"
-        )
-        console.print("       [bold]  Primary")
-        console.print(
-            "         [bold]  0.[/bold] Custom datasource"
-            " (connect anything via API, SQL, or MCP)\n"
-        )
-        if popular_engines:
-            console.print("       [bold]  Most popular")
-            for i, e in enumerate(popular_engines, 1):
-                console.print(f"          [bold]{i:>2}.[/bold] {e.display_name}")
-            console.print()
-        if recent_engine_entries:
-            start = len(popular_engines) + 1
-            console.print("       [bold]  Recently used data sources")
-            for i, (_, label) in enumerate(recent_engine_entries, start):
-                console.print(f"          [bold]{i:>2}.[/bold] {label}")
-            console.print()
-
-    def print_all() -> None:
-        console.print(
-            "[anton.cyan](anton)[/] All data sources (★ = popular):\n"
-        )
-        console.print("       [bold]  Primary")
-        console.print(
-            "         [bold]  0.[/bold] Custom datasource"
-            " (connect anything via API, SQL, or MCP)\n"
-        )
-        for i, e in enumerate(display_engines, 1):
-            star = " ★" if e.popular else ""
-            console.print(f"          [bold]{i:>2}.[/bold] {e.display_name}{star}")
-        console.print()
-
-    async def get_create_new_answer() -> str | None:
-        print_sections()
-        console.print(
-            "       [anton.muted]Don't see yours? Type a datasource name (e.g., GitHub, Gmail, Jira, ...)\n"
-            "       It can be virtually any datasource — we'll figure out the details together.[/]"
-        )
-        console.print()
-        ans = await prompt_or_cancel(
-            "(anton) Enter a number or type a datasource name",
-        )
-        if ans is None:
-            return None
-        if ans.strip().lower() == "all":
-            console.print()
-            print_all()
-            ans = await prompt_or_cancel(
-                "(anton) Enter a number or type a name",
-            )
-        return ans
-
-    if prefill:
-        answer = prefill
-    elif saved_connections:
-        console.print()
-        console.print("[anton.cyan](anton)[/] What would you like to do?\n")
-        console.print("          [bold]  1.[/bold] Use an existing connection")
-        console.print("          [bold]  2.[/bold] Create a new connection")
-        console.print()
-        top_choice = await prompt_or_cancel(
-            "(anton) Enter a number", choices=["1", "2"]
-        )
-        if top_choice is None:
-            return session
-
-        if top_choice == "1":
-            console.print()
-            console.print("[anton.cyan](anton)[/] Your saved connections:\n")
-            for i, c in enumerate(saved_connections, 1):
-                conn_slug = f"{c['engine']}-{c['name']}"
-                engine_obj = registry.get(c["engine"])
-                engine_label = engine_obj.display_name if engine_obj else c["engine"]
-                console.print(
-                    f"          [bold]{i:>2}.[/bold] {conn_slug}"
-                    f" [dim]— {engine_label}[/]"
-                )
-            console.print()
-            pick = await prompt_or_cancel(
-                "(anton) Enter a number",
-                choices=[str(i) for i in range(1, len(saved_connections) + 1)],
-            )
-            if pick is None:
-                return session
-            picked_conn = saved_connections[int(pick) - 1]
-            picked_slug = f"{picked_conn['engine']}-{picked_conn['name']}"
-            return await _reconnect_to_saved(
-                console, session, vault, registry, picked_slug, picked_conn,
-                from_tool_call=from_tool_call,
-            )
-
-        # top_choice == "2": create new connection
-        answer = await get_create_new_answer()
-        if answer is None:
-            return session
-    else:
-        answer = await get_create_new_answer()
-        if answer is None:
-            return session
-
-    stripped_answer = answer.strip()
-    known_slugs = {
-        f"{c['engine']}-{c['name']}": c for c in vault.list_connections()
-    }
-    if stripped_answer in known_slugs:
-        conn = known_slugs[stripped_answer]
-        return await _reconnect_to_saved(
-            console, session, vault, registry, stripped_answer, conn,
-            from_tool_call=from_tool_call,
-        )
-
-    engine_def: DatasourceEngine | None = None
-    custom_source = False
-    llm_recognised = False
-    # Recently used data sources are numbered after popular engines
-    saved_start = len(popular_engines) + 1
-    max_num = len(popular_engines) + len(recent_engine_entries)
-
-    if stripped_answer.isdigit() or (stripped_answer.lstrip("-").isdigit()):
-        pick_num = int(stripped_answer)
-        if pick_num == 0:
-            custom_source = True
-        elif 1 <= pick_num <= len(popular_engines):
-            engine_def = popular_engines[pick_num - 1]
-        elif recent_engine_entries and saved_start <= pick_num <= max_num:
-            # User picked a recently used data source — start a new connection of that engine
-            picked_engine_slug, _ = recent_engine_entries[pick_num - saved_start]
-            engine_def = registry.get(picked_engine_slug)
-            if engine_def is None:
-                custom_source = True
-        else:
-            console.print(
-                f"[anton.warning](anton)[/] '{stripped_answer}' is out of range. "
-                f"Please enter 0\u2013{max_num}.[/]"
-            )
-            console.print()
-            return session
-
-    if engine_def is None and not custom_source:
-        engine_def = registry.find_by_name(stripped_answer)
-        # if exact match not found, try substring match against display and engine names
-        if engine_def is None:
-            needle = stripped_answer.lower()
-            candidates = [
-                e
-                for e in all_engines
-                if needle in e.display_name.lower() or needle in e.engine.lower()
-            ]
-            if len(candidates) == 1:
-                engine_def = candidates[0]
-            elif len(candidates) > 1:
-                console.print()
-                console.print(
-                    f"[anton.warning](anton)[/] '{stripped_answer}' matches multiple engines — "
-                    "which one did you mean?"
-                )
-                console.print()
-                for i, e in enumerate(candidates, 1):
-                    console.print(f"        {i}. {e.display_name}")
-                console.print()
-                pick = await prompt_or_cancel("(anton) Enter a number")
-                if pick is None:
-                    return session
-                pick = (pick or "").strip()
-                try:
-                    engine_def = candidates[int(pick) - 1]
-                except (ValueError, IndexError):
-                    console.print("[anton.warning]Invalid choice. Aborting.[/]")
-                    console.print()
-                    return session
-        # Ask the LLM to identify the datasource
-        if engine_def is None:
-            engine_names = [e.display_name for e in all_engines]
-            try:
-                console.print()
-                console.print("[anton.muted]        Looking up datasource…[/]")
-                llm_resp = await session._llm.plan(
-                    system="You are a datasource identification assistant.",
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": (
-                                f"The user typed: {stripped_answer!r}\n"
-                                f"Known datasources: {engine_names!r}\n\n"
-                                "If the user input clearly matches one of the known datasources, "
-                                "reply with EXACTLY: MATCH:<display_name>\n"
-                                "If it does NOT match any known datasource but you recognise it "
-                                "as a real service/tool, reply with EXACTLY: CUSTOM\n"
-                                "If you don't recognise it at all, reply with EXACTLY: UNKNOWN\n"
-                                "Reply with only one of those three forms, nothing else."
-                            ),
-                        }
-                    ],
-                    max_tokens=64,
-                )
-                llm_text = (llm_resp.content or "").strip()
-            except Exception:
-                llm_text = "UNKNOWN"
-
-            llm_recognised = llm_text == "CUSTOM" or llm_text.startswith("MATCH:")
-
-            if llm_text.startswith("MATCH:"):
-                matched_name = llm_text[len("MATCH:"):].strip()
-                matched_engine = next(
-                    (e for e in all_engines if e.display_name == matched_name), None
-                )
-                if matched_engine is not None:
-                    if matched_name.lower() != stripped_answer.lower():
-                        confirm = await prompt_or_cancel(
-                            f'(anton) Did you mean "{matched_name}"?',
-                            choices=["y", "n"], default="y",
-                        )
-                        if confirm is not None and confirm.strip().lower() == "y":
-                            engine_def = matched_engine
-                    else:
-                        engine_def = matched_engine
-
-            if engine_def is None:
-                custom_source = True
-
-    if custom_source:
-        result = await handle_add_custom_datasource(
-            console, stripped_answer if not stripped_answer.isdigit() else "", registry, session,
-            known_service=llm_recognised,
-        )
-        if result is None:
-            return session
-        engine_def, credentials = result
-        if engine_def.test_snippet:
-            if not await run_connection_test(
-                console, scratchpads, vault, engine_def, credentials, engine_def.fields
-            ):
-                return session
-        conn_name = uuid.uuid4().hex[:8]
-        vault.save(engine_def.engine, conn_name, credentials)
-        slug = f"{engine_def.engine}-{conn_name}"
-        restore_namespaced_env(vault)
-        session._active_datasource = slug
-        register_secret_vars(engine_def, engine=engine_def.engine, name=conn_name)
-        console.print(
-            f'        Credentials saved to Local Vault as [bold]"{slug}"[/bold].'
-        )
-        console.print()
-        console.print(
-            "[anton.muted]        You can now ask me questions about your data.[/]"
-        )
-        console.print()
-        if not from_tool_call:
-            session._history.append(
-                {
-                    "role": "assistant",
-                    "content": (
-                        f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
-                        f"to the Local Vault. I can now query this data source when needed."
-                    ),
-                }
-            )
-        return session
-
-    assert engine_def is not None  # custom_source path always returns before this line
-    active_fields = engine_def.fields
-    chosen_method: "AuthMethod | None" = None
-    if engine_def.auth_method == "choice" and engine_def.auth_methods:
-        console.print()
-        console.print(
-            f"[anton.cyan](anton)[/] How would you like to authenticate with "
-            f"[bold]{engine_def.display_name}[/]?"
-        )
-        console.print()
-        for i, am in enumerate(engine_def.auth_methods, 1):
-            console.print(f"        {i}. {am.display}")
-        console.print()
-        choice_str = await prompt_or_cancel("(anton) Enter a number")
-        if choice_str is None:
-            return session
-        choice_str = (choice_str or "").strip()
-        try:
-            choice_idx = int(choice_str) - 1
-            chosen_method = engine_def.auth_methods[choice_idx]
-        except (ValueError, IndexError):
-            console.print("[anton.warning]Invalid choice. Aborting.[/]")
-            console.print()
-            return session
-        active_fields = chosen_method.fields
-
-    # ── Smart credential collection ────────────────────────────────────
-    # Track filled vs. missing fields as a puzzle. Each user response is
-    # parsed via the LLM to extract any variables mentioned, so users can
-    # fill multiple fields at once, paste a connection string, or change
-    # direction mid-flow.
-    collector = ConnectionCollector(
-        engine_def=engine_def,
-        auth_method=chosen_method,
-    )
-    if known_variables:
-        collector.fill_many(known_variables)
-
-    known_engine_slugs = [e.engine for e in registry.all_engines()]
-    partial = False
-    required_fields = [f for f in active_fields if f.required]
-    optional_fields = [f for f in active_fields if not f.required]
-
-    if collector.is_complete:
-        # Pre-fill already covered everything — skip the field list and
-        # the help prompt and go straight to testing + saving. Show a
-        # brief confirmation of what was received.
-        filled_names = [
-            f.name for f in active_fields if collector.collected.get(f.name)
-        ]
-        console.print()
-        console.print(
-            f"[anton.cyan](anton)[/] Got everything for [bold]"
-            f"{engine_def.display_name}[/] from context: "
-            f"{', '.join(filled_names)}."
-        )
-        console.print()
-    else:
-        # Show the field list so the user sees what's expected.
-        console.print()
-        console.print(
-            f"[anton.cyan](anton)[/] To connect [bold]"
-            f"{engine_def.display_name}[/], I'll need the following:"
-        )
-        console.print()
-
-        if required_fields:
-            console.print("        [bold]Required[/]      " + "─" * 39)
-            for f in required_fields:
-                marker = (
-                    "[green]✓[/] " if collector.collected.get(f.name) else "• "
-                )
-                console.print(
-                    f"        {marker}[bold]{f.name:<12}[/] "
-                    f"[anton.muted]— {f.description}[/]"
-                )
-
-        if optional_fields:
-            console.print()
-            console.print("        [bold]Optional[/]      " + "─" * 39)
-            for f in optional_fields:
-                marker = (
-                    "[green]✓[/] " if collector.collected.get(f.name) else "• "
-                )
-                console.print(
-                    f"        {marker}[bold]{f.name:<12}[/] "
-                    f"[anton.muted]— {f.description}[/]"
-                )
-
-        console.print()
-
-        # Offer instructions — but only if nothing has been pre-filled.
-        # If the user already provided some credentials (via the tool's
-        # `known_variables` or a paste), they clearly know what they're
-        # doing and don't need guidance — just prompt for what's missing.
-        if not collector.collected:
-            help_answer = await prompt_or_cancel(
-                "(anton) Do you need instructions on how to obtain these credentials?",
-                choices_display="y/n", default="n",
-            )
-            if help_answer is None:
-                return session
-            normalized = help_answer.strip().lower()
-            if normalized == "y":
-                await show_credential_help(
-                    console, session, engine_def.display_name, None, active_fields,
-                )
-            elif normalized and normalized != "n":
-                # Non-y/n answer — maybe the user pasted credentials here.
-                extracted = await extract_variables(
-                    help_answer,
-                    expected_fields=collector.active_fields,
-                    current_engine=engine_def.engine,
-                    current_engine_display=engine_def.display_name,
-                    known_engine_slugs=known_engine_slugs,
-                    session=session,
-                )
-                if extracted.is_redirect:
-                    redirect_text = _build_redirect_message(
-                        collector, help_answer, extracted.redirect_engine
-                    )
-                    session._pending_connect_redirect = redirect_text
-                    if not from_tool_call:
-                        session._history.append(
-                            {"role": "assistant", "content": redirect_text}
-                        )
-                    return session
-                if extracted.variables:
-                    filled = collector.fill_many(extracted.variables)
-                    if filled:
-                        console.print(
-                            f"[anton.muted]        Got: {', '.join(filled)}[/]"
-                        )
-                        console.print()
-
-    while not collector.is_complete:
-        collector.format_status(console)
-        console.print()
-
-        next_field = collector.next_field
-        # When only one required field remains, ask for it directly with
-        # the matching prompt style (password masking, default value,
-        # etc.). No LLM extraction needed — the answer IS the value.
-        only_one_required = (
-            next_field is not None
-            and next_field.required
-            and len(collector.missing_required) == 1
-        )
-
-        if only_one_required and next_field is not None:
-            label = f"(anton) {next_field.name}"
-            if next_field.secret:
-                value = await prompt_or_cancel(label, password=True)
-            elif next_field.default:
-                value = await prompt_or_cancel(label, default=next_field.default)
-            else:
-                value = await prompt_or_cancel(label)
-            if value is None:
-                return session
-            if not value:
-                # Empty answer for the only missing required field —
-                # treat as a partial save signal.
-                partial = True
-                break
-            collector.fill(next_field.name, value)
-            continue
-
-        # Multiple fields remain — open prompt that accepts bulk input
-        missing_names = ", ".join(f.name for f in collector.missing_required)
-        prompt_label = (
-            f"(anton) Provide values for {missing_names} "
-            f"(one at a time, or 'key=value key2=value2', or 'skip')"
-        )
-        value = await prompt_or_cancel(prompt_label)
-        if value is None:
-            return session
-        if value.strip().lower() == "skip":
-            partial = True
-            break
-        if not value.strip():
-            continue
-
-        extracted = await extract_variables(
-            value,
-            expected_fields=collector.active_fields,
-            current_engine=engine_def.engine,
-            current_engine_display=engine_def.display_name,
-            known_engine_slugs=known_engine_slugs,
-            session=session,
-        )
-
-        if extracted.is_redirect:
-            redirect_text = _build_redirect_message(
-                collector, value, extracted.redirect_engine
-            )
-            # Stash for the tool wrapper; also mirror to history only if
-            # we're NOT inside a tool_use/tool_result pair.
-            session._pending_connect_redirect = redirect_text
-            if not from_tool_call:
-                session._history.append(
-                    {"role": "assistant", "content": redirect_text}
-                )
-            return session
-
-        if extracted.variables:
-            filled = collector.fill_many(extracted.variables)
-            if filled:
-                console.print(
-                    f"[anton.muted]        Got: {', '.join(filled)}[/]"
-                )
-                console.print()
-                continue
-
-        # LLM returned nothing structured — fall back to treating the
-        # input as the value for the next missing required field.
-        if next_field is not None:
-            collector.fill(next_field.name, value.strip())
-        else:
-            console.print(
-                "[anton.warning]        Couldn't parse that. "
-                "Try 'key=value' or one value at a time.[/]"
-            )
-            console.print()
-
-    credentials: dict[str, str] = dict(collector.collected)
-
-    if partial:
-        auto_name = uuid.uuid4().hex[:8]
-        vault.save(engine_def.engine, auto_name, credentials)
-        slug = f"{engine_def.engine}-{auto_name}"
-        console.print()
-        console.print(
-            f"[anton.muted]Partial connection saved to Local Vault as "
-            f'[bold]"{slug}"[/bold]. '
-            f"Run [bold]/edit {slug}[/bold] to complete it when you're ready.[/]"
-        )
-        console.print()
-        return session
-
-    if engine_def.test_snippet:
-        if not await run_connection_test(
-            console, scratchpads, vault, engine_def, credentials, active_fields
-        ):
-            # Either the test failed and the user declined to re-enter
-            # credentials, or the user pressed Escape during the retry
-            # prompt. Mark this so the tool wrapper can return an
-            # accurate (non-misleading) message to the LLM.
-            session._pending_connect_status = "test_failed"
-            return session
-
-    conn_name = registry.derive_name(engine_def, credentials)
-    if not conn_name:
-        conn_name = uuid.uuid4().hex[:8]
-
-    slug = f"{engine_def.engine}-{conn_name}"
-
-    if vault.load(engine_def.engine, conn_name) is not None:
-        console.print()
-        console.print(
-            f'[anton.warning](anton)[/] A connection [bold]"{slug}"[/bold] already exists.'
-        )
-        console.print()
-        choice = await prompt_or_cancel(
-            f"(anton) {_PROMPT_RECONNECT_CANCEL}",
-        )
-        if choice is None or choice.strip().lower() != "reconnect":
-            console.print("[anton.muted]Cancelled.[/]")
-            console.print()
-            return session
-        restore_namespaced_env(vault)
-        register_secret_vars(engine_def, engine=engine_def.engine, name=conn_name)
-        console.print()
-        console.print(
-            f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
-        )
-        console.print()
-        if not from_tool_call:
-            session._history.append(
-                {
-                    "role": "assistant",
-                    "content": (
-                        f'I\'ve reconnected to the {engine_def.display_name} connection "{slug}" '
-                        f"in the Local Vault. I can now query this data source when needed."
-                    ),
-                }
-            )
-        return session
-
-    vault.save(engine_def.engine, conn_name, credentials)
-    restore_namespaced_env(vault)
-    session._active_datasource = slug
-    register_secret_vars(engine_def, engine=engine_def.engine, name=conn_name)
-    console.print(f'        Credentials saved to Local Vault as [bold]"{slug}"[/bold].')
-
-    console.print()
-    console.print(
-        "[anton.muted]        You can now ask me questions about your data.[/]"
-    )
-    console.print()
-
-    # Inject a brief assistant message so the LLM is aware of the new
-    # connection — but only when NOT in a tool call (in that case the
-    # tool wrapper constructs its own return message; appending here
-    # would break tool_use/tool_result pairing).
-    if not from_tool_call:
-        session._history.append(
-            {
-                "role": "assistant",
-                "content": (
-                    f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
-                    f"to the Local Vault. I can now query this data source when needed."
-                ),
-            }
-        )
-    return session
-
-
-async def handle_test_datasource(
-    console: Console,
-    scratchpads: ScratchpadManager,
-    slug: str,
-) -> None:
-    """Test an existing Local Vault connection by running its test_snippet."""
-    if not slug:
-        console.print(
-            "[anton.warning]Usage: /test <engine-name>[/]"
-        )
-        console.print()
-        return
-
-    vault = DataVault()
-    registry = DatasourceRegistry()
-    parsed = parse_connection_slug(slug, [e.engine for e in registry.all_engines()], vault=vault)
-    if parsed is None:
-        console.print(
-            f"[anton.warning]Invalid name '{slug}'. Use engine-name format.[/]"
-        )
-        console.print()
-        return
-    engine, name = parsed
-    fields = vault.load(engine, name)
-    if fields is None:
-        console.print(
-            f"[anton.warning]No connection '{slug}' found in Local Vault.[/]"
-        )
-        console.print()
-        return
-
-    engine_def = registry.get(engine)
-    if engine_def is None:
-        console.print(
-            f"[anton.warning]Unknown engine '{engine}'. Cannot test.[/]"
-        )
-        console.print()
-        return
-
-    if not engine_def.test_snippet:
-        console.print(
-            f"[anton.warning]No test snippet defined for '{engine}'. Cannot test.[/]"
-        )
-        console.print()
-        return
-
-    console.print()
-    console.print(
-        f"[anton.cyan](anton)[/] Testing connection [bold]{slug}[/bold]…"
-    )
-
-    vault.clear_ds_env()
-    vault.inject_env(engine, name, flat=True)
-    register_secret_vars(engine_def)  # flat names for scrubbing during test
-
-    cell = None
-    try:
-        pad = await scratchpads.get_or_create("__datasource_test__")
-        await pad.reset()
-        if engine_def.pip:
-            await pad.install_packages([engine_def.pip])
-        cell = await pad.execute(engine_def.test_snippet)
-    finally:
-        restore_namespaced_env(vault)
-
-    if cell is None or cell.error or (
-        cell.stdout.strip() != "ok" and cell.stderr.strip()
-    ):
-        error_text = ""
-        if cell is not None:
-            error_text = cell.error or cell.stderr.strip() or cell.stdout.strip()
-        first_line = (
-            next((ln for ln in error_text.splitlines() if ln.strip()), error_text)
-            if error_text
-            else "unknown error"
-        )
-        console.print()
-        console.print(
-            f"[anton.warning](anton)[/] ✗ Connection test failed for"
-            f" [bold]{slug}[/bold]."
-        )
-        console.print()
-        console.print(f"        Error: {first_line}")
-    else:
-        console.print(
-            f"[anton.success]        ✓ Connection test passed for"
-            f" [bold]{slug}[/bold]![/]"
-        )
-    console.print()

From 9b4231f2b1371a2a570eaa97038b63654aec5e37 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:53:51 +0200
Subject: [PATCH 107/134] Split ds commands

---
 anton/commands/datasource/__init__.py |  23 +
 anton/commands/datasource/connect.py  | 790 ++++++++++++++++++++++++++
 2 files changed, 813 insertions(+)
 create mode 100644 anton/commands/datasource/__init__.py
 create mode 100644 anton/commands/datasource/connect.py

diff --git a/anton/commands/datasource/__init__.py b/anton/commands/datasource/__init__.py
new file mode 100644
index 00000000..929c840d
--- /dev/null
+++ b/anton/commands/datasource/__init__.py
@@ -0,0 +1,23 @@
+"""Datasource slash-command handlers."""
+
+from anton.commands.datasource.helpers import show_credential_help
+from anton.commands.datasource.connect import _PROMPT_RECONNECT_CANCEL
+from anton.commands.datasource.manage import handle_list_data_sources, handle_remove_data_source
+from anton.commands.datasource.custom import (
+    handle_add_custom_datasource,
+    _CustomDatasourceField,
+    _CustomDatasourceSpec,
+)
+from anton.commands.datasource.verify import run_connection_test, handle_test_datasource
+from anton.commands.datasource.connect import handle_connect_datasource
+
+__all__ = [
+    "_PROMPT_RECONNECT_CANCEL",
+    "show_credential_help",
+    "handle_list_data_sources",
+    "handle_remove_data_source",
+    "handle_add_custom_datasource",
+    "run_connection_test",
+    "handle_test_datasource",
+    "handle_connect_datasource",
+]
diff --git a/anton/commands/datasource/connect.py b/anton/commands/datasource/connect.py
new file mode 100644
index 00000000..351dff87
--- /dev/null
+++ b/anton/commands/datasource/connect.py
@@ -0,0 +1,790 @@
+"""Main datasource connection flow."""
+
+from __future__ import annotations
+
+import json
+import uuid
+from typing import TYPE_CHECKING
+
+from anton.connect_collector import ConnectionCollector, extract_variables
+from anton.core.datasources.data_vault import DataVault, LocalDataVault
+from anton.core.datasources.datasource_registry import DatasourceRegistry
+from anton.utils.datasources import parse_connection_slug, register_secret_vars, restore_namespaced_env
+from anton.utils.prompt import prompt_or_cancel
+from anton.commands.datasource.helpers import show_credential_help
+from anton.commands.datasource.custom import handle_add_custom_datasource
+from anton.commands.datasource.verify import run_connection_test
+
+_PROMPT_RECONNECT_CANCEL = "(reconnect/cancel)"
+
+
+def _build_redirect_message(
+    collector: ConnectionCollector,
+    user_message: str,
+    target_engine: str | None = None,
+) -> str:
+    """Build a structured REDIRECT message for the main agent."""
+    collector.redirect_message = user_message.strip()
+    payload = collector.to_redirect_result()
+    parts = [
+        f"REDIRECT during {payload['engine_display']} connection setup.",
+        f"Collected so far: {json.dumps(payload['collected_variables'])}.",
+    ]
+    if payload["missing_required"]:
+        parts.append(
+            f"Still missing: {', '.join(payload['missing_required'])}."
+        )
+    if target_engine:
+        parts.append(f"User wants to switch to: {target_engine}.")
+    parts.append(f'User said: "{collector.redirect_message}".')
+    parts.append(
+        "Decide what to do next — you may call connect_new_datasource "
+        "again with the correct engine and pass known_variables to "
+        "pre-fill what's already collected."
+    )
+    return " ".join(parts)
+
+if TYPE_CHECKING:
+    from rich.console import Console
+    from anton.chat import ChatSession
+    from anton.core.backends.manager import ScratchpadManager
+
+
+async def _reconnect_to_saved(
+    console: "Console",
+    session: "ChatSession",
+    vault: "DataVault",
+    registry: "DatasourceRegistry",
+    slug: str,
+    conn: dict,
+    *,
+    from_tool_call: bool = False,
+) -> "ChatSession":
+    """Inject env for a saved connection and mark it as the active datasource."""
+    restore_namespaced_env(vault)
+    session._active_datasource = slug
+    recon_engine_def = registry.get(conn["engine"])
+    if recon_engine_def:
+        register_secret_vars(recon_engine_def, engine=conn["engine"], name=conn["name"])
+        engine_label = recon_engine_def.display_name
+    else:
+        engine_label = conn["engine"]
+    console.print()
+    console.print(
+        f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
+    )
+    console.print()
+    if not from_tool_call:
+        session._history.append(
+            {
+                "role": "assistant",
+                "content": (
+                    f'I\'ve reconnected to the {engine_label} connection "{slug}" '
+                    f"in the Local Vault. I can now query this data source when needed."
+                ),
+            }
+        )
+    return session
+
+
+async def handle_connect_datasource(
+    console: "Console",
+    scratchpads: "ScratchpadManager",
+    session: "ChatSession",
+    datasource_name: str | None = None,
+    prefill: str | None = None,
+    known_variables: dict[str, str] | None = None,
+    from_tool_call: bool = False,
+    vault: "DataVault | None" = None,
+) -> "ChatSession":
+    """
+    Connect a data source by entering credentials, either for a new name or re-entering for an existing one.
+
+    `known_variables` may pre-fill credential fields (e.g. when called as a
+    tool by the LLM, which may have already extracted host/port/etc. from
+    the conversation).
+
+    `from_tool_call=True` when invoked via the LLM `connect_new_datasource`
+    tool. In that case we must NOT append assistant messages to
+    `session._history` — we are sitting between a `tool_use` block and its
+    `tool_result` block, and appending messages there violates the
+    Anthropic API invariant. The tool wrapper builds its own return
+    message from the vault diff instead.
+    """
+
+    vault = vault or LocalDataVault()
+    registry = DatasourceRegistry()
+
+    if datasource_name is not None:
+        parsed = parse_connection_slug(
+            datasource_name, [e.engine for e in registry.all_engines()], vault=vault
+        )
+        if parsed is None:
+            console.print(
+                f"[anton.warning]Invalid slug '{datasource_name}'. "
+                "Expected format: engine-name.[/]"
+            )
+            console.print()
+            return session
+        edit_engine, edit_name = parsed
+        existing = vault.load(edit_engine, edit_name)
+        if existing is None:
+            console.print(
+                f"[anton.warning]No connection '{datasource_name}' found in Local Vault.[/]"
+            )
+            console.print()
+            return session
+        engine_def = registry.get(edit_engine)
+        if engine_def is None:
+            console.print(
+                f"[anton.warning]Unknown engine '{edit_engine}'. "
+                "Cannot update credentials.[/]"
+            )
+            console.print()
+            return session
+
+        console.print()
+        console.print(
+            f"[anton.cyan](anton)[/] Editing [bold]\"{datasource_name}\"[/bold]"
+            f" ({engine_def.display_name})."
+        )
+        console.print("[anton.muted]        Press Enter to keep the current value.[/]")
+        console.print()
+
+        active_fields = engine_def.fields
+        if engine_def.auth_method == "choice" and engine_def.auth_methods:
+            for am in engine_def.auth_methods:
+                am_field_names = {af.name for af in am.fields}
+                if any(k in am_field_names for k in existing):
+                    active_fields = am.fields
+                    break
+            if not active_fields:
+                active_fields = engine_def.auth_methods[0].fields
+
+        credentials: dict[str, str] = dict(existing)
+        for f in active_fields:
+            current = existing.get(f.name, "")
+            field_label = f"(anton) {f.name}"
+            if not f.required:
+                field_label += " (optional)"
+
+            if f.secret:
+                masked = "••••••••" if current else ""
+                label = f"{field_label} [{masked}]" if masked else field_label
+                value = await prompt_or_cancel(label, password=True)
+                if value is None:
+                    return session
+                if value:
+                    credentials[f.name] = value
+            elif current:
+                value = await prompt_or_cancel(
+                    f"{field_label}",
+                    default=current,
+                )
+                if value is None:
+                    return session
+                credentials[f.name] = value if value else current
+            elif f.default:
+                value = await prompt_or_cancel(
+                    f"{field_label}",
+                    default=f.default,
+                )
+                if value is None:
+                    return session
+                if value:
+                    credentials[f.name] = value
+            else:
+                value = await prompt_or_cancel(field_label)
+                if value is None:
+                    return session
+                if value:
+                    credentials[f.name] = value
+
+        if engine_def.test_snippet:
+            if not await run_connection_test(
+                console, scratchpads, vault, engine_def, credentials, active_fields
+            ):
+                return session
+
+        vault.save(edit_engine, edit_name, credentials)
+        restore_namespaced_env(vault)
+        register_secret_vars(engine_def, engine=edit_engine, name=edit_name)
+        console.print()
+        console.print(
+            f'        Credentials updated for [bold]"{datasource_name}"[/bold].'
+        )
+        console.print()
+        console.print(
+            "[anton.muted]        You can now ask me questions about your data.[/]"
+        )
+        console.print()
+        if not from_tool_call:
+            session._history.append(
+                {
+                    "role": "assistant",
+                    "content": (
+                        f"I've updated the credentials for the {engine_def.display_name} connection "
+                        f'"{datasource_name}" in the Local Vault.'
+                    ),
+                }
+            )
+        return session
+
+    console.print()
+    all_engines = registry.all_engines()
+    popular_engines = [e for e in all_engines if e.popular and not e.custom]
+    other_engines = [e for e in all_engines if not e.popular and not e.custom]
+    custom_engines = [e for e in all_engines if e.custom]
+    display_engines = popular_engines + other_engines + custom_engines
+
+    saved_connections = vault.list_connections()
+    seen_engines: set[str] = set()
+    recent_engine_entries: list[tuple[str, str]] = []
+    for c in saved_connections:
+        if c["engine"] not in seen_engines:
+            seen_engines.add(c["engine"])
+            engine_obj = registry.get(c["engine"])
+            label = engine_obj.display_name if engine_obj else c["engine"]
+            recent_engine_entries.append((c["engine"], label))
+
+    def print_sections() -> None:
+        console.print(
+            "[anton.cyan](anton)[/] Select a data source to create a new connection:\n"
+        )
+        console.print("       [bold]  Primary")
+        console.print(
+            "         [bold]  0.[/bold] Custom datasource"
+            " (connect anything via API, SQL, or MCP)\n"
+        )
+        if popular_engines:
+            console.print("       [bold]  Most popular")
+            for i, e in enumerate(popular_engines, 1):
+                console.print(f"          [bold]{i:>2}.[/bold] {e.display_name}")
+            console.print()
+        if recent_engine_entries:
+            start = len(popular_engines) + 1
+            console.print("       [bold]  Recently used data sources")
+            for i, (_, label) in enumerate(recent_engine_entries, start):
+                console.print(f"          [bold]{i:>2}.[/bold] {label}")
+            console.print()
+
+    def print_all() -> None:
+        console.print(
+            "[anton.cyan](anton)[/] All data sources (★ = popular):\n"
+        )
+        console.print("       [bold]  Primary")
+        console.print(
+            "         [bold]  0.[/bold] Custom datasource"
+            " (connect anything via API, SQL, or MCP)\n"
+        )
+        for i, e in enumerate(display_engines, 1):
+            star = " ★" if e.popular else ""
+            console.print(f"          [bold]{i:>2}.[/bold] {e.display_name}{star}")
+        console.print()
+
+    async def get_create_new_answer() -> str | None:
+        print_sections()
+        console.print(
+            "       [anton.muted]Don't see yours? Type a datasource name (e.g., GitHub, Gmail, Jira, ...)\n"
+            "       It can be virtually any datasource — we'll figure out the details together.[/]"
+        )
+        console.print()
+        ans = await prompt_or_cancel(
+            "(anton) Enter a number or type a datasource name",
+        )
+        if ans is None:
+            return None
+        if ans.strip().lower() == "all":
+            console.print()
+            print_all()
+            ans = await prompt_or_cancel(
+                "(anton) Enter a number or type a name",
+            )
+        return ans
+
+    if prefill:
+        answer = prefill
+    elif saved_connections:
+        console.print()
+        console.print("[anton.cyan](anton)[/] What would you like to do?\n")
+        console.print("          [bold]  1.[/bold] Use an existing connection")
+        console.print("          [bold]  2.[/bold] Create a new connection")
+        console.print()
+        top_choice = await prompt_or_cancel(
+            "(anton) Enter a number", choices=["1", "2"]
+        )
+        if top_choice is None:
+            return session
+
+        if top_choice == "1":
+            console.print()
+            console.print("[anton.cyan](anton)[/] Your saved connections:\n")
+            for i, c in enumerate(saved_connections, 1):
+                conn_slug = f"{c['engine']}-{c['name']}"
+                engine_obj = registry.get(c["engine"])
+                engine_label = engine_obj.display_name if engine_obj else c["engine"]
+                console.print(
+                    f"          [bold]{i:>2}.[/bold] {conn_slug}"
+                    f" [dim]— {engine_label}[/]"
+                )
+            console.print()
+            pick = await prompt_or_cancel(
+                "(anton) Enter a number",
+                choices=[str(i) for i in range(1, len(saved_connections) + 1)],
+            )
+            if pick is None:
+                return session
+            picked_conn = saved_connections[int(pick) - 1]
+            picked_slug = f"{picked_conn['engine']}-{picked_conn['name']}"
+            return await _reconnect_to_saved(
+                console, session, vault, registry, picked_slug, picked_conn,
+                from_tool_call=from_tool_call,
+            )
+
+        answer = await get_create_new_answer()
+        if answer is None:
+            return session
+    else:
+        answer = await get_create_new_answer()
+        if answer is None:
+            return session
+
+    stripped_answer = answer.strip()
+    known_slugs = {
+        f"{c['engine']}-{c['name']}": c for c in vault.list_connections()
+    }
+    if stripped_answer in known_slugs:
+        conn = known_slugs[stripped_answer]
+        return await _reconnect_to_saved(
+            console, session, vault, registry, stripped_answer, conn,
+            from_tool_call=from_tool_call,
+        )
+
+    engine_def = None
+    custom_source = False
+    llm_recognised = False
+    saved_start = len(popular_engines) + 1
+    max_num = len(popular_engines) + len(recent_engine_entries)
+
+    if stripped_answer.isdigit() or (stripped_answer.lstrip("-").isdigit()):
+        pick_num = int(stripped_answer)
+        if pick_num == 0:
+            custom_source = True
+        elif 1 <= pick_num <= len(popular_engines):
+            engine_def = popular_engines[pick_num - 1]
+        elif recent_engine_entries and saved_start <= pick_num <= max_num:
+            picked_engine_slug, _ = recent_engine_entries[pick_num - saved_start]
+            engine_def = registry.get(picked_engine_slug)
+            if engine_def is None:
+                custom_source = True
+        else:
+            console.print(
+                f"[anton.warning](anton)[/] '{stripped_answer}' is out of range. "
+                f"Please enter 0\u2013{max_num}.[/]"
+            )
+            console.print()
+            return session
+
+    if engine_def is None and not custom_source:
+        engine_def = registry.find_by_name(stripped_answer)
+        if engine_def is None:
+            needle = stripped_answer.lower()
+            candidates = [
+                e
+                for e in all_engines
+                if needle in e.display_name.lower() or needle in e.engine.lower()
+            ]
+            if len(candidates) == 1:
+                engine_def = candidates[0]
+            elif len(candidates) > 1:
+                console.print()
+                console.print(
+                    f"[anton.warning](anton)[/] '{stripped_answer}' matches multiple engines — "
+                    "which one did you mean?"
+                )
+                console.print()
+                for i, e in enumerate(candidates, 1):
+                    console.print(f"        {i}. {e.display_name}")
+                console.print()
+                pick = await prompt_or_cancel("(anton) Enter a number")
+                if pick is None:
+                    return session
+                pick = (pick or "").strip()
+                try:
+                    engine_def = candidates[int(pick) - 1]
+                except (ValueError, IndexError):
+                    console.print("[anton.warning]Invalid choice. Aborting.[/]")
+                    console.print()
+                    return session
+
+        if engine_def is None:
+            engine_names = [e.display_name for e in all_engines]
+            try:
+                console.print()
+                console.print("[anton.muted]        Looking up datasource…[/]")
+                llm_resp = await session._llm.plan(
+                    system="You are a datasource identification assistant.",
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": (
+                                f"The user typed: {stripped_answer!r}\n"
+                                f"Known datasources: {engine_names!r}\n\n"
+                                "If the user input clearly matches one of the known datasources, "
+                                "reply with EXACTLY: MATCH:<display_name>\n"
+                                "If it does NOT match any known datasource but you recognise it "
+                                "as a real service/tool, reply with EXACTLY: CUSTOM\n"
+                                "If you don't recognise it at all, reply with EXACTLY: UNKNOWN\n"
+                                "Reply with only one of those three forms, nothing else."
+                            ),
+                        }
+                    ],
+                    max_tokens=64,
+                )
+                llm_text = (llm_resp.content or "").strip()
+            except Exception:
+                llm_text = "UNKNOWN"
+
+            llm_recognised = llm_text == "CUSTOM" or llm_text.startswith("MATCH:")
+
+            if llm_text.startswith("MATCH:"):
+                matched_name = llm_text[len("MATCH:"):].strip()
+                matched_engine = next(
+                    (e for e in all_engines if e.display_name == matched_name), None
+                )
+                if matched_engine is not None:
+                    if matched_name.lower() != stripped_answer.lower():
+                        confirm = await prompt_or_cancel(
+                            f'(anton) Did you mean "{matched_name}"?',
+                            choices=["y", "n"], default="y",
+                        )
+                        if confirm is not None and confirm.strip().lower() == "y":
+                            engine_def = matched_engine
+                    else:
+                        engine_def = matched_engine
+
+            if engine_def is None:
+                custom_source = True
+
+    if custom_source:
+        result = await handle_add_custom_datasource(
+            console, stripped_answer if not stripped_answer.isdigit() else "", registry, session,
+            known_service=llm_recognised,
+        )
+        if result is None:
+            return session
+        engine_def, credentials = result
+        if engine_def.test_snippet:
+            if not await run_connection_test(
+                console, scratchpads, vault, engine_def, credentials, engine_def.fields
+            ):
+                return session
+        conn_name = uuid.uuid4().hex[:8]
+        vault.save(engine_def.engine, conn_name, credentials)
+        slug = f"{engine_def.engine}-{conn_name}"
+        restore_namespaced_env(vault)
+        session._active_datasource = slug
+        register_secret_vars(engine_def, engine=engine_def.engine, name=conn_name)
+        console.print(
+            f'        Credentials saved to Local Vault as [bold]"{slug}"[/bold].'
+        )
+        console.print()
+        console.print(
+            "[anton.muted]        You can now ask me questions about your data.[/]"
+        )
+        console.print()
+        if not from_tool_call:
+            session._history.append(
+                {
+                    "role": "assistant",
+                    "content": (
+                        f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
+                        f"to the Local Vault. I can now query this data source when needed."
+                    ),
+                }
+            )
+        return session
+
+    assert engine_def is not None
+    active_fields = engine_def.fields
+    chosen_method = None
+    if engine_def.auth_method == "choice" and engine_def.auth_methods:
+        console.print()
+        console.print(
+            f"[anton.cyan](anton)[/] How would you like to authenticate with "
+            f"[bold]{engine_def.display_name}[/]?"
+        )
+        console.print()
+        for i, am in enumerate(engine_def.auth_methods, 1):
+            console.print(f"        {i}. {am.display}")
+        console.print()
+        choice_str = await prompt_or_cancel("(anton) Enter a number")
+        if choice_str is None:
+            return session
+        choice_str = (choice_str or "").strip()
+        try:
+            choice_idx = int(choice_str) - 1
+            chosen_method = engine_def.auth_methods[choice_idx]
+        except (ValueError, IndexError):
+            console.print("[anton.warning]Invalid choice. Aborting.[/]")
+            console.print()
+            return session
+        active_fields = chosen_method.fields
+
+    collector = ConnectionCollector(
+        engine_def=engine_def,
+        auth_method=chosen_method,
+    )
+    if known_variables:
+        collector.fill_many(known_variables)
+
+    known_engine_slugs = [e.engine for e in registry.all_engines()]
+    partial = False
+    required_fields = [f for f in active_fields if f.required]
+    optional_fields = [f for f in active_fields if not f.required]
+
+    if collector.is_complete:
+        filled_names = [
+            f.name for f in active_fields if collector.collected.get(f.name)
+        ]
+        console.print()
+        console.print(
+            f"[anton.cyan](anton)[/] Got everything for [bold]"
+            f"{engine_def.display_name}[/] from context: "
+            f"{', '.join(filled_names)}."
+        )
+        console.print()
+    else:
+        console.print()
+        console.print(
+            f"[anton.cyan](anton)[/] To connect [bold]"
+            f"{engine_def.display_name}[/], I'll need the following:"
+        )
+        console.print()
+
+        if required_fields:
+            console.print("        [bold]Required[/]      " + "─" * 39)
+            for f in required_fields:
+                marker = (
+                    "[green]✓[/] " if collector.collected.get(f.name) else "• "
+                )
+                console.print(
+                    f"        {marker}[bold]{f.name:<12}[/] "
+                    f"[anton.muted]— {f.description}[/]"
+                )
+
+        if optional_fields:
+            console.print()
+            console.print("        [bold]Optional[/]      " + "─" * 39)
+            for f in optional_fields:
+                marker = (
+                    "[green]✓[/] " if collector.collected.get(f.name) else "• "
+                )
+                console.print(
+                    f"        {marker}[bold]{f.name:<12}[/] "
+                    f"[anton.muted]— {f.description}[/]"
+                )
+
+        console.print()
+
+        if not collector.collected:
+            help_answer = await prompt_or_cancel(
+                "(anton) Do you need instructions on how to obtain these credentials?",
+                choices_display="y/n", default="n",
+            )
+            if help_answer is None:
+                return session
+            normalized = help_answer.strip().lower()
+            if normalized == "y":
+                await show_credential_help(
+                    console, session, engine_def.display_name, None, active_fields,
+                )
+            elif normalized and normalized != "n":
+                extracted = await extract_variables(
+                    help_answer,
+                    expected_fields=collector.active_fields,
+                    current_engine=engine_def.engine,
+                    current_engine_display=engine_def.display_name,
+                    known_engine_slugs=known_engine_slugs,
+                    session=session,
+                )
+                if extracted.is_redirect:
+                    redirect_text = _build_redirect_message(
+                        collector, help_answer, extracted.redirect_engine
+                    )
+                    session._pending_connect_redirect = redirect_text
+                    if not from_tool_call:
+                        session._history.append(
+                            {"role": "assistant", "content": redirect_text}
+                        )
+                    return session
+                if extracted.variables:
+                    filled = collector.fill_many(extracted.variables)
+                    if filled:
+                        console.print(
+                            f"[anton.muted]        Got: {', '.join(filled)}[/]"
+                        )
+                        console.print()
+
+    while not collector.is_complete:
+        collector.format_status(console)
+        console.print()
+
+        next_field = collector.next_field
+        only_one_required = (
+            next_field is not None
+            and next_field.required
+            and len(collector.missing_required) == 1
+        )
+
+        if only_one_required and next_field is not None:
+            label = f"(anton) {next_field.name}"
+            if next_field.secret:
+                value = await prompt_or_cancel(label, password=True)
+            elif next_field.default:
+                value = await prompt_or_cancel(label, default=next_field.default)
+            else:
+                value = await prompt_or_cancel(label)
+            if value is None:
+                return session
+            if not value:
+                partial = True
+                break
+            collector.fill(next_field.name, value)
+            continue
+
+        missing_names = ", ".join(f.name for f in collector.missing_required)
+        prompt_label = (
+            f"(anton) Provide values for {missing_names} "
+            f"(one at a time, or 'key=value key2=value2', or 'skip')"
+        )
+        value = await prompt_or_cancel(prompt_label)
+        if value is None:
+            return session
+        if value.strip().lower() == "skip":
+            partial = True
+            break
+        if not value.strip():
+            continue
+
+        extracted = await extract_variables(
+            value,
+            expected_fields=collector.active_fields,
+            current_engine=engine_def.engine,
+            current_engine_display=engine_def.display_name,
+            known_engine_slugs=known_engine_slugs,
+            session=session,
+        )
+
+        if extracted.is_redirect:
+            redirect_text = _build_redirect_message(
+                collector, value, extracted.redirect_engine
+            )
+            session._pending_connect_redirect = redirect_text
+            if not from_tool_call:
+                session._history.append(
+                    {"role": "assistant", "content": redirect_text}
+                )
+            return session
+
+        if extracted.variables:
+            filled = collector.fill_many(extracted.variables)
+            if filled:
+                console.print(
+                    f"[anton.muted]        Got: {', '.join(filled)}[/]"
+                )
+                console.print()
+                continue
+
+        if next_field is not None:
+            collector.fill(next_field.name, value.strip())
+        else:
+            console.print(
+                "[anton.warning]        Couldn't parse that. "
+                "Try 'key=value' or one value at a time.[/]"
+            )
+            console.print()
+
+    credentials: dict[str, str] = dict(collector.collected)
+
+    if partial:
+        auto_name = uuid.uuid4().hex[:8]
+        vault.save(engine_def.engine, auto_name, credentials)
+        slug = f"{engine_def.engine}-{auto_name}"
+        console.print()
+        console.print(
+            f"[anton.muted]Partial connection saved to Local Vault as "
+            f'[bold]"{slug}"[/bold]. '
+            f"Run [bold]/edit {slug}[/bold] to complete it when you're ready.[/]"
+        )
+        console.print()
+        return session
+
+    if engine_def.test_snippet:
+        if not await run_connection_test(
+            console, scratchpads, vault, engine_def, credentials, active_fields
+        ):
+            session._pending_connect_status = "test_failed"
+            return session
+
+    conn_name = registry.derive_name(engine_def, credentials)
+    if not conn_name:
+        conn_name = uuid.uuid4().hex[:8]
+
+    slug = f"{engine_def.engine}-{conn_name}"
+
+    if vault.load(engine_def.engine, conn_name) is not None:
+        console.print()
+        console.print(
+            f'[anton.warning](anton)[/] A connection [bold]"{slug}"[/bold] already exists.'
+        )
+        console.print()
+        choice = await prompt_or_cancel(
+            f"(anton) {_PROMPT_RECONNECT_CANCEL}",
+        )
+        if choice is None or choice.strip().lower() != "reconnect":
+            console.print("[anton.muted]Cancelled.[/]")
+            console.print()
+            return session
+        restore_namespaced_env(vault)
+        register_secret_vars(engine_def, engine=engine_def.engine, name=conn_name)
+        console.print()
+        console.print(
+            f'[anton.success]        ✓ Reconnected to [bold]"{slug}"[/bold].[/]'
+        )
+        console.print()
+        if not from_tool_call:
+            session._history.append(
+                {
+                    "role": "assistant",
+                    "content": (
+                        f'I\'ve reconnected to the {engine_def.display_name} connection "{slug}" '
+                        f"in the Local Vault. I can now query this data source when needed."
+                    ),
+                }
+            )
+        return session
+
+    vault.save(engine_def.engine, conn_name, credentials)
+    restore_namespaced_env(vault)
+    session._active_datasource = slug
+    register_secret_vars(engine_def, engine=engine_def.engine, name=conn_name)
+    console.print(f'        Credentials saved to Local Vault as [bold]"{slug}"[/bold].')
+
+    console.print()
+    console.print(
+        "[anton.muted]        You can now ask me questions about your data.[/]"
+    )
+    console.print()
+
+    if not from_tool_call:
+        session._history.append(
+            {
+                "role": "assistant",
+                "content": (
+                    f'I\'ve saved a {engine_def.display_name} connection named "{slug}" '
+                    f"to the Local Vault. I can now query this data source when needed."
+                ),
+            }
+        )
+    return session

From 0c39b9cfbc041f0e248cc10b2bee8251452454b3 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:53:54 +0200
Subject: [PATCH 108/134] Split ds commands

---
 anton/commands/datasource/custom.py | 310 ++++++++++++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 anton/commands/datasource/custom.py

diff --git a/anton/commands/datasource/custom.py b/anton/commands/datasource/custom.py
new file mode 100644
index 00000000..bc2ec1b0
--- /dev/null
+++ b/anton/commands/datasource/custom.py
@@ -0,0 +1,310 @@
+"""Custom datasource creation (LLM-assisted)."""
+
+from __future__ import annotations
+
+import re
+import shutil
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, Field
+
+from anton.core.datasources.datasource_registry import DatasourceEngine, DatasourceField
+from anton.utils.datasources import remove_engine_block
+from anton.utils.prompt import prompt_or_cancel
+from anton.commands.datasource.helpers import show_credential_help
+
+if TYPE_CHECKING:
+    from rich.console import Console
+    from anton.chat import ChatSession
+    from anton.core.datasources.datasource_registry import DatasourceRegistry
+
+
+class _CustomDatasourceField(BaseModel):
+    """One credential field in a custom-datasource spec."""
+
+    name: str = Field(
+        ...,
+        description=(
+            "snake_case field name (e.g. 'host', 'api_key'). Must be a "
+            "valid Python identifier; this becomes both the on-disk key "
+            "and the env var suffix (DS_<NAME>)."
+        ),
+    )
+    value: str = Field(
+        default="",
+        description=(
+            "Inline value if the user already provided one in their "
+            "description, otherwise empty string."
+        ),
+    )
+    secret: bool = Field(
+        default=False,
+        description=(
+            "True if the field is sensitive (passwords, API keys, "
+            "tokens) — affects how it's stored and prompted for."
+        ),
+    )
+    required: bool = Field(
+        default=True,
+        description="True if the connection cannot be tested without this field.",
+    )
+    description: str = Field(
+        default="",
+        description=(
+            "One-line description shown to the user when prompting "
+            "for this field."
+        ),
+    )
+
+
+class _CustomDatasourceSpec(BaseModel):
+    """Structured output of the LLM call in handle_add_custom_datasource."""
+
+    display_name: str = Field(
+        ...,
+        description="Human-readable name for the service (e.g. 'GitHub API').",
+    )
+    pip: str = Field(
+        default="",
+        description=(
+            "pip-installable package name (or space-separated names) "
+            "needed to interact with this service. Empty string if no "
+            "extra package is required (e.g. plain HTTPS via stdlib)."
+        ),
+    )
+    test_snippet: str = Field(
+        default="",
+        description=(
+            "Python code that tests the connection using os.environ "
+            "vars DS_FIELDNAME (uppercase field name with DS_ prefix) "
+            "and prints 'ok' on success. Empty string if untestable."
+        ),
+    )
+    fields: list[_CustomDatasourceField] = Field(
+        default_factory=list,
+        description=(
+            "Credential fields the user will need to provide. List in "
+            "the order they should be prompted."
+        ),
+    )
+
+
+async def handle_add_custom_datasource(
+    console: "Console",
+    name: str,
+    registry: "DatasourceRegistry",
+    session: "ChatSession",
+    *,
+    known_service: bool = False,
+):
+    """Ask for the tool name, use the LLM to identify required fields, then collect credentials."""
+
+    console.print()
+    if name:
+        tool_name = name
+    else:
+        tool_name = await prompt_or_cancel(
+            "(anton) What is the name of the tool or service?",
+        )
+        if not tool_name or not tool_name.strip():
+            return None
+        tool_name = tool_name.strip()
+
+    if known_service:
+        user_answer = ""
+        console.print("[anton.muted]        Working out the connection details…[/]")
+    else:
+        user_answer = await prompt_or_cancel(
+            f"(anton) How do you authenticate with {tool_name}? "
+            "Describe what credentials you have (don't paste actual values)",
+        )
+        if not user_answer or not user_answer.strip():
+            return None
+        console.print()
+        console.print("[anton.muted]    Got it — working out the connection details…[/]")
+
+    llm_prompt = f"The user wants to connect to {repr(tool_name)}."
+    if user_answer:
+        llm_prompt += f" They said: {user_answer}"
+    else:
+        llm_prompt += " Determine the standard authentication fields for this service."
+    llm_prompt += (
+        "\n\nReturn the connection spec following the schema you've been given. "
+        "For test_snippet, write Python that uses os.environ['DS_<FIELDNAME>'] "
+        "vars (uppercase, DS_ prefix) and prints 'ok' on success."
+    )
+
+    try:
+        spec: _CustomDatasourceSpec = await session._llm.generate_object(
+            _CustomDatasourceSpec,
+            system="You are a data source connection expert.",
+            messages=[{"role": "user", "content": llm_prompt}],
+            max_tokens=1024,
+        )
+    except Exception:
+        console.print(
+            "[anton.warning]        Couldn't identify connection details. Try again.[/]"
+        )
+        console.print()
+        return None
+
+    test_snippet = spec.test_snippet.strip()
+    fields: list[DatasourceField] = []
+    for f in spec.fields:
+        if not f.name:
+            continue
+        fields.append(
+            DatasourceField(
+                name=f.name,
+                required=f.required,
+                secret=f.secret,
+                description=f.description,
+            )
+        )
+
+    if not fields:
+        console.print("[anton.warning]    Couldn't identify any connection fields.[/]")
+        console.print()
+        return None
+
+    display_name = spec.display_name or name
+    pip_pkg = spec.pip
+
+    # Show summary
+    console.print()
+    console.print("      [bold]── What I'll save ──────────────────────────[/]")
+    credentials: dict[str, str] = {}
+    for f, raw in zip(fields, spec.fields):
+        inline_value = (raw.value or "").strip()
+        if f.secret and inline_value:
+            console.print(
+                f"        • [bold]{f.name:<14}[/] (secret — provided, stored securely)"
+            )
+            credentials[f.name] = inline_value
+        elif f.secret:
+            console.print(
+                f"        • [bold]{f.name:<14}[/] (secret — I'll ask for this)"
+            )
+        else:
+            val_display = inline_value or "[anton.muted]<to be collected>[/]"
+            console.print(f"        • [bold]{f.name:<14}[/] {val_display}")
+            if inline_value:
+                credentials[f.name] = inline_value
+    console.print()
+
+    help_answer = await prompt_or_cancel(
+        "(anton) Do you need instructions on how to obtain these credentials?",
+        choices=["y", "n"], default="n",
+    )
+    if help_answer is None:
+        return None
+    if help_answer.strip().lower() == "y":
+        await show_credential_help(
+            console, session, display_name, None, fields,
+        )
+
+    # Prompt for any secret fields not provided inline
+    for f, raw in zip(fields, spec.fields):
+        if not f.secret:
+            continue
+        if (raw.value or "").strip():
+            continue
+        value = await prompt_or_cancel(f"(anton) {f.name}", password=True)
+        if value is None:
+            return None
+        if value:
+            credentials[f.name] = value
+
+    # Prompt for any required non-secret fields not provided inline
+    for f, raw in zip(fields, spec.fields):
+        if f.secret:
+            continue
+        if not f.required:
+            continue
+        if f.name in credentials:
+            continue
+        value = await prompt_or_cancel(f"(anton) {f.name}")
+        if value is None:
+            return None
+        if value:
+            credentials[f.name] = value
+
+    # Offer to collect optional non-secret fields
+    for f, raw in zip(fields, spec.fields):
+        if f.secret or f.required or f.name in credentials:
+            continue
+        value = await prompt_or_cancel(f"(anton) {f.name} (optional — press Enter to skip)")
+        if value is None:
+            return None
+        if value:
+            credentials[f.name] = value
+
+    if not credentials:
+        console.print("[anton.warning]        No credentials collected. Aborting.[/]")
+        console.print()
+        return None
+
+    # Build engine slug and write definition to ~/.anton/datasources.md
+    slug = re.sub(r"[^\w]", "_", display_name.lower()).strip("_")
+    field_lines = "\n".join(
+        f"  - {{ name: {f.name}, required: {str(f.required).lower()}, "
+        f'secret: {str(f.secret).lower()}, description: "{f.description}" }}'
+        for f in fields
+    )
+    test_snippet_yaml = ""
+    if test_snippet:
+        indented = "\n".join(f"  {line}" for line in test_snippet.splitlines())
+        test_snippet_yaml = f"test_snippet: |\n{indented}\n"
+
+    yaml_block = (
+        f"\n---\n\n## {display_name}\n"
+        "```yaml\n"
+        f"engine: {slug}\n"
+        f"display_name: {display_name}\n"
+        + (f"pip: {pip_pkg}\n" if pip_pkg else "")
+        + f"fields:\n{field_lines}\n"
+        + test_snippet_yaml
+        + "```\n"
+    )
+    user_ds_path = Path("~/.anton/datasources.md").expanduser()
+    tmp_path = user_ds_path.with_suffix(".tmp")
+
+    existing = (
+        user_ds_path.read_text(encoding="utf-8") if user_ds_path.is_file() else ""
+    )
+    existing = remove_engine_block(existing, slug)
+
+    tmp_path.write_text(existing + yaml_block, encoding="utf-8")
+
+    parsed = registry.validate_file(tmp_path)
+    if slug in parsed:
+        shutil.move(str(tmp_path), str(user_ds_path))
+    else:
+        tmp_path.unlink(missing_ok=True)
+        console.print(
+            "[anton.warning]Could not validate engine definition — "
+            "credentials saved but engine not written to datasources.md.[/]"
+        )
+
+    registry.reload()
+    engine_def = registry.get(slug)
+    if engine_def is None:
+        engine_def = DatasourceEngine(
+            engine=slug,
+            display_name=display_name,
+            pip=pip_pkg,
+            fields=fields,
+            test_snippet=test_snippet,
+        )
+
+    missing_required = [f.name for f in fields if f.required and f.name not in credentials]
+    if missing_required:
+        console.print(
+            "[anton.warning]    Cannot save — missing required fields: "
+            f"{', '.join(missing_required)}. Aborting.[/]"
+        )
+        console.print()
+        return None
+
+    return engine_def, credentials

From b8c17d1bb9d7592137ef12f306e91b42f9b5f505 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:53:57 +0200
Subject: [PATCH 109/134] Split ds commands

---
 anton/commands/datasource/helpers.py | 69 ++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 anton/commands/datasource/helpers.py

diff --git a/anton/commands/datasource/helpers.py b/anton/commands/datasource/helpers.py
new file mode 100644
index 00000000..c44f5e7d
--- /dev/null
+++ b/anton/commands/datasource/helpers.py
@@ -0,0 +1,69 @@
+"""Shared helpers for datasource commands."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from rich.markdown import Markdown
+from rich.padding import Padding
+
+if TYPE_CHECKING:
+    from rich.console import Console
+    from anton.chat import ChatSession
+
+
+async def show_credential_help(
+    console: "Console",
+    session: "ChatSession",
+    service_name: str,
+    current_field,
+    all_fields: list,
+) -> None:
+    """Use the LLM to explain how to obtain credentials."""
+    field_descriptions = ", ".join(
+        f"{f.name} ({f.description})" for f in all_fields
+    )
+    storage_note = (
+        "The credentials will be stored securely in Anton's Local Vault — "
+        "do NOT suggest storage tips, password managers, or safe-keeping advice."
+    )
+    if current_field is not None:
+        prompt = (
+            f"I'm connecting to {service_name} and need to provide: {field_descriptions}\n\n"
+            f"I need help with the '{current_field.name}' field"
+            f" ({current_field.description}).\n\n"
+            "Give me a brief step-by-step guide on where and how to get this credential. "
+            f"Be concise — numbered steps, no fluff. {storage_note}"
+        )
+        heading = f"[anton.cyan](anton)[/] How to get [bold]{current_field.name}[/]:"
+    else:
+        prompt = (
+            f"I'm connecting to {service_name} and need these credentials: {field_descriptions}\n\n"
+            "Give me a brief step-by-step guide on where and how to obtain each of these. "
+            f"Be concise — numbered steps, no fluff. {storage_note}"
+        )
+        heading = f"[anton.cyan](anton)[/] How to get credentials for [bold]{service_name}[/]:"
+
+    console.print()
+    console.print("[anton.muted]        Looking up instructions…[/]")
+
+    try:
+        resp = await session._llm.plan(
+            system="You are a helpful assistant that guides users through obtaining credentials for services.",
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt,
+                }
+            ],
+            max_tokens=512,
+        )
+        help_text = (resp.content or "").strip()
+    except Exception:
+        help_text = "Sorry, couldn't fetch help right now. Try checking the service's documentation."
+
+    console.print()
+    console.print(heading)
+    console.print()
+    console.print(Padding(Markdown(help_text), (0, 0, 0, 8)))
+    console.print()

From c5f65e8d84a45a27649765ff80fd6916d06d2f50 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:54:02 +0200
Subject: [PATCH 110/134] Split ds commands

---
 anton/commands/datasource/manage.py | 120 ++++++++++++++++++
 anton/commands/datasource/verify.py | 187 ++++++++++++++++++++++++++++
 2 files changed, 307 insertions(+)
 create mode 100644 anton/commands/datasource/manage.py
 create mode 100644 anton/commands/datasource/verify.py

diff --git a/anton/commands/datasource/manage.py b/anton/commands/datasource/manage.py
new file mode 100644
index 00000000..ed8e8c0a
--- /dev/null
+++ b/anton/commands/datasource/manage.py
@@ -0,0 +1,120 @@
+"""List and remove datasource commands."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from anton.core.datasources.data_vault import DataVault, LocalDataVault
+from anton.core.datasources.datasource_registry import DatasourceRegistry
+from anton.utils.datasources import parse_connection_slug, remove_engine_block, restore_namespaced_env
+from anton.utils.prompt import prompt_or_cancel
+
+if TYPE_CHECKING:
+    from rich.console import Console
+
+
+def handle_list_data_sources(console: "Console", vault: DataVault | None = None) -> None:
+    """Print all saved Local Vault connections in a table with status."""
+    from rich.table import Table
+
+    vault = vault or LocalDataVault()
+    registry = DatasourceRegistry()
+    conns = vault.list_connections()
+    console.print()
+    if not conns:
+        console.print("[anton.muted]No data sources connected yet.[/]")
+        console.print("[anton.muted]Use /connect to add one.[/]")
+        console.print()
+        return
+
+    table = Table(title="Local Vault — Saved Connections", show_lines=False)
+    table.add_column("Name", style="bold")
+    table.add_column("Source")
+    table.add_column("Status")
+
+    for c in conns:
+        slug = f"{c['engine']}-{c['name']}"
+        engine_def = registry.get(c["engine"])
+        source = engine_def.display_name if engine_def else c["engine"]
+        fields = vault.load(c["engine"], c["name"]) or {}
+
+        if not fields:
+            status = "[yellow]incomplete[/]"
+        elif engine_def and engine_def.auth_method != "choice":
+            required = [f.name for f in engine_def.fields if f.required]
+            missing = [name for name in required if name not in fields]
+            status = "[yellow]incomplete[/]" if missing else "[green]saved[/]"
+        else:
+            status = "[green]saved[/]"
+
+        table.add_row(slug, source, status)
+
+    console.print(table)
+    console.print()
+
+
+async def handle_remove_data_source(console: "Console", slug: str, vault: DataVault | None = None) -> None:
+    """Delete a connection from the Local Vault by slug (engine-name)."""
+    vault = vault or LocalDataVault()
+    registry = DatasourceRegistry()
+
+    if not slug:
+        connections = vault.list_connections()
+        if not connections:
+            console.print("[anton.muted]No saved connections to remove.[/]")
+            console.print()
+            return
+        console.print()
+        console.print("[anton.cyan](anton)[/] Which connection do you want to remove?\n")
+        for i, c in enumerate(connections, 1):
+            conn_slug = f"{c['engine']}-{c['name']}"
+            engine_def = registry.get(c["engine"])
+            label = engine_def.display_name if engine_def else c["engine"]
+            console.print(f"          [bold]{i:>2}.[/bold] {conn_slug} [dim]({label})[/]")
+        console.print()
+        choices = [str(i) for i in range(1, len(connections) + 1)]
+        pick = await prompt_or_cancel("(anton) Enter a number", choices=choices)
+        if pick is None:
+            console.print("[anton.muted]Cancelled.[/]")
+            console.print()
+            return
+        picked = connections[int(pick) - 1]
+        slug = f"{picked['engine']}-{picked['name']}"
+
+    parsed = parse_connection_slug(slug, [e.engine for e in registry.all_engines()], vault=vault)
+    if parsed is None:
+        console.print(
+            f"[anton.warning]Invalid name '{slug}'. Use engine-name format.[/]"
+        )
+        console.print()
+        return
+    engine, name = parsed
+    if vault.load(engine, name) is None:
+        console.print(f"[anton.warning]No connection '{slug}' found.[/]")
+        console.print()
+        return
+
+    confirm = await prompt_or_cancel(
+        f"(anton) Remove '{slug}' from Local Vault?",
+        choices=["y", "n"], default="n",
+    )
+    if confirm is not None and confirm.strip().lower() == "y":
+        vault.delete(engine, name)
+        restore_namespaced_env(vault)
+        engine_def = registry.get(engine)
+        if engine_def is not None and engine_def.custom:
+            remaining = [
+                c for c in vault.list_connections() if c["engine"] == engine
+            ]
+            if not remaining:
+                user_path = DatasourceRegistry._USER_PATH
+                if user_path.is_file():
+                    updated = remove_engine_block(
+                        user_path.read_text(encoding="utf-8"), engine
+                    )
+                    user_path.write_text(updated, encoding="utf-8")
+                    registry.reload()
+        console.print(f"[anton.success]Removed {slug}.[/]")
+    else:
+        console.print("[anton.muted]Cancelled.[/]")
+    console.print()
diff --git a/anton/commands/datasource/verify.py b/anton/commands/datasource/verify.py
new file mode 100644
index 00000000..3980e3f3
--- /dev/null
+++ b/anton/commands/datasource/verify.py
@@ -0,0 +1,187 @@
+"""Connection testing commands."""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import TYPE_CHECKING
+
+from anton.core.datasources.data_vault import DataVault, LocalDataVault
+from anton.core.datasources.datasource_registry import DatasourceEngine, DatasourceField, DatasourceRegistry
+from anton.utils.datasources import parse_connection_slug, register_secret_vars, restore_namespaced_env
+from anton.utils.prompt import prompt_or_cancel
+
+if TYPE_CHECKING:
+    from rich.console import Console
+    from anton.core.backends.manager import ScratchpadManager
+
+
+async def run_connection_test(
+    console: "Console",
+    scratchpads: "ScratchpadManager",
+    vault: "DataVault",
+    engine_def: "DatasourceEngine",
+    credentials: dict[str, str],
+    retry_fields: "list[DatasourceField]",
+) -> bool:
+    """Inject flat DS_* vars, run engine_def.test_snippet, restore env.
+
+    Returns True on success, False if the user declines retry after failure.
+    Mutates credentials in-place when the user re-enters secrets on retry.
+    """
+    while True:
+        console.print()
+        console.print("[anton.cyan](anton)[/] Got it. Testing connection…")
+
+        vault.clear_ds_env()
+        for key, value in credentials.items():
+            os.environ[f"DS_{key.upper()}"] = value
+        register_secret_vars(engine_def)  # flat mode, for scrubbing during test
+
+        try:
+            pad = await scratchpads.get_or_create("__datasource_test__")
+            await pad.reset()
+            if engine_def.pip:
+                if isinstance(engine_def.pip, list):
+                    pip_pkgs = engine_def.pip
+                else:
+                    pip_pkgs = engine_def.pip.split()
+                install_result = await pad.install_packages(pip_pkgs)
+                if "failed" in (install_result or "").lower():
+                    console.print()
+                    console.print(f"[anton.warning](anton)[/] Package install issue: {install_result[:200]}")
+
+            cell = None
+            for attempt in range(3):
+                cell = await pad.execute(engine_def.test_snippet)
+                if cell.error and "ModuleNotFoundError" in cell.error:
+                    match = re.search(r"No module named '([^']+)'", cell.error)
+                    if match:
+                        missing = match.group(1).split(".")[0]
+                        await pad.install_packages([missing])
+                        continue
+                break
+        finally:
+            restore_namespaced_env(vault)
+
+        if cell.error or (cell.stdout.strip() != "ok" and cell.stderr.strip()):
+            error_text = cell.error or cell.stderr.strip() or cell.stdout.strip()
+            last_line = next(
+                (ln for ln in reversed(error_text.splitlines()) if ln.strip()), error_text
+            )
+            console.print()
+            console.print("[anton.warning](anton)[/] ✗ Connection failed.")
+            console.print()
+            console.print(f"        Error: {last_line}")
+            console.print()
+            retry = await prompt_or_cancel(
+                "(anton) Would you like to re-enter your credentials?",
+                choices=["y", "n"], default="n",
+            )
+            if retry is None or retry.strip().lower() != "y":
+                return False
+            console.print()
+            for f in retry_fields:
+                if not f.secret:
+                    continue
+                value = await prompt_or_cancel(f"(anton) {f.name}", password=True)
+                if value is None:
+                    return False
+                if value:
+                    credentials[f.name] = value
+            continue
+
+        console.print("[anton.success]        ✓ Connected successfully![/]")
+        return True
+
+
+async def handle_test_datasource(
+    console: "Console",
+    scratchpads: "ScratchpadManager",
+    slug: str,
+    vault: DataVault | None = None,
+) -> None:
+    """Test an existing Local Vault connection by running its test_snippet."""
+    if not slug:
+        console.print(
+            "[anton.warning]Usage: /test <engine-name>[/]"
+        )
+        console.print()
+        return
+
+    vault = vault or LocalDataVault()
+    registry = DatasourceRegistry()
+    parsed = parse_connection_slug(slug, [e.engine for e in registry.all_engines()], vault=vault)
+    if parsed is None:
+        console.print(
+            f"[anton.warning]Invalid name '{slug}'. Use engine-name format.[/]"
+        )
+        console.print()
+        return
+    engine, name = parsed
+    fields = vault.load(engine, name)
+    if fields is None:
+        console.print(
+            f"[anton.warning]No connection '{slug}' found in Local Vault.[/]"
+        )
+        console.print()
+        return
+
+    engine_def = registry.get(engine)
+    if engine_def is None:
+        console.print(
+            f"[anton.warning]Unknown engine '{engine}'. Cannot test.[/]"
+        )
+        console.print()
+        return
+
+    if not engine_def.test_snippet:
+        console.print(
+            f"[anton.warning]No test snippet defined for '{engine}'. Cannot test.[/]"
+        )
+        console.print()
+        return
+
+    console.print()
+    console.print(
+        f"[anton.cyan](anton)[/] Testing connection [bold]{slug}[/bold]…"
+    )
+
+    vault.clear_ds_env()
+    vault.inject_env(engine, name, flat=True)
+    register_secret_vars(engine_def)  # flat names for scrubbing during test
+
+    cell = None
+    try:
+        pad = await scratchpads.get_or_create("__datasource_test__")
+        await pad.reset()
+        if engine_def.pip:
+            await pad.install_packages([engine_def.pip])
+        cell = await pad.execute(engine_def.test_snippet)
+    finally:
+        restore_namespaced_env(vault)
+
+    if cell is None or cell.error or (
+        cell.stdout.strip() != "ok" and cell.stderr.strip()
+    ):
+        error_text = ""
+        if cell is not None:
+            error_text = cell.error or cell.stderr.strip() or cell.stdout.strip()
+        first_line = (
+            next((ln for ln in error_text.splitlines() if ln.strip()), error_text)
+            if error_text
+            else "unknown error"
+        )
+        console.print()
+        console.print(
+            f"[anton.warning](anton)[/] ✗ Connection test failed for"
+            f" [bold]{slug}[/bold]."
+        )
+        console.print()
+        console.print(f"        Error: {first_line}")
+    else:
+        console.print(
+            f"[anton.success]        ✓ Connection test passed for"
+            f" [bold]{slug}[/bold]![/]"
+        )
+    console.print()

From 324b0e60cfc7bf777fb1e7beb23de42ab7109551 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:54:13 +0200
Subject: [PATCH 111/134] Vault interface

---
 anton/core/datasources/data_vault.py | 46 ++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/anton/core/datasources/data_vault.py b/anton/core/datasources/data_vault.py
index e5e75229..dd1eb73a 100644
--- a/anton/core/datasources/data_vault.py
+++ b/anton/core/datasources/data_vault.py
@@ -5,6 +5,7 @@
 import re
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import Protocol, runtime_checkable
 
 
 def _sanitize(value: str) -> str:
@@ -24,8 +25,47 @@ def _slug_env_prefix(engine: str, name: str) -> str:
     return "DS_" + re.sub(r"[^\w]", "_", raw).upper()
 
 
-class DataVault:
-    """Manages data source connection credentials in ~/.anton/data_vault/."""
+@runtime_checkable
+class DataVault(Protocol):
+    """Interface for credential storage backends.
+
+    The local implementation (LocalDataVault) stores JSON files in
+    ~/.anton/data_vault/. Cloud implementations can satisfy this protocol
+    with any backend (database, secrets manager, etc.) scoped to a user
+    or tenant.
+    """
+
+    def save(self, engine: str, name: str, credentials: dict[str, str]) -> object:
+        """Persist credentials for engine/name. Returns an implementation-defined path/key."""
+        ...
+
+    def load(self, engine: str, name: str) -> dict[str, str] | None:
+        """Return the fields dict for a connection, or None if not found."""
+        ...
+
+    def delete(self, engine: str, name: str) -> bool:
+        """Remove a connection. Returns True if it existed."""
+        ...
+
+    def list_connections(self) -> list[dict[str, str]]:
+        """Return [{engine, name, created_at}] for all stored connections."""
+        ...
+
+    def inject_env(self, engine: str, name: str, *, flat: bool = False) -> list[str] | None:
+        """Load credentials and set DS_* environment variables."""
+        ...
+
+    def clear_ds_env(self) -> None:
+        """Remove all DS_* variables from os.environ."""
+        ...
+
+    def next_connection_number(self, engine: str) -> int:
+        """Return the next auto-increment number for an engine (1-based)."""
+        ...
+
+
+class LocalDataVault:
+    """File-based credential store in ~/.anton/data_vault/."""
 
     def __init__(self, vault_dir: Path | None = None) -> None:
         self._dir = vault_dir or Path("~/.anton/data_vault").expanduser()
@@ -140,7 +180,7 @@ def next_connection_number(self, engine: str) -> int:
         ]
         max_n = 0
         for fname in existing:
-            suffix = fname[len(prefix) :]
+            suffix = fname[len(prefix):]
             if suffix.isdigit():
                 max_n = max(max_n, int(suffix))
         return max_n + 1

From 1c9160e2cf1afdc08654372760fb1bd162d88ba3 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:54:24 +0200
Subject: [PATCH 112/134] Use local vault

---
 anton/chat.py              | 12 +++++++-----
 anton/tools.py             |  6 +++---
 anton/utils/datasources.py |  4 ++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 84a95e02..791d0377 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -70,7 +70,7 @@
     list_datasources,
     test_llm,
 )
-from anton.core.datasources.data_vault import DataVault
+from anton.core.datasources.data_vault import LocalDataVault
 from anton.utils.datasources import (
     register_secret_vars,
 )
@@ -935,7 +935,7 @@ async def _chat_loop(
 
     # Inject all Local Vault connections as namespaced DS_* env vars so every
     # scratchpad subprocess inherits them. Must happen before any ChatSession is created.
-    dv = DataVault()
+    dv = LocalDataVault()
     dreg = DatasourceRegistry()
     for conn in dv.list_connections():
         dv.inject_env(conn["engine"], conn["name"])  # flat=False by default
@@ -1217,14 +1217,15 @@ def _bottom_toolbar():
                         session._scratchpads,
                         session,
                         prefill=arg or None,
+                        vault=session._data_vault,
                     )
                     continue
                 elif cmd == "/list":
-                    handle_list_data_sources(console)
+                    handle_list_data_sources(console, vault=session._data_vault)
                     continue
                 elif cmd == "/remove":
                     arg = parts[1].strip() if len(parts) > 1 else ""
-                    await handle_remove_data_source(console, arg)
+                    await handle_remove_data_source(console, arg, vault=session._data_vault)
                     continue
                 elif cmd == "/edit":
                     arg = parts[1].strip() if len(parts) > 1 else ""
@@ -1239,12 +1240,13 @@ def _bottom_toolbar():
                             session._scratchpads,
                             session,
                             datasource_name=arg,
+                            vault=session._data_vault,
                         )
                     continue
                 elif cmd == "/test":
                     arg = parts[1].strip() if len(parts) > 1 else ""
                     await handle_test_datasource(
-                        console, session._scratchpads, arg
+                        console, session._scratchpads, arg, vault=session._data_vault
                     )
                     continue
                 elif cmd == "/skill":
diff --git a/anton/tools.py b/anton/tools.py
index f3c97692..c86aa79a 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -31,10 +31,9 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     )
 
     from anton.commands.datasource import handle_connect_datasource
-    from anton.core.datasources.data_vault import DataVault
 
-    # Check which connections exist before
-    vault = DataVault()
+    from anton.core.datasources.data_vault import LocalDataVault
+    vault = session._data_vault or LocalDataVault()
     before = {f"{c['engine']}-{c['name']}" for c in vault.list_connections()}
 
     # Clear any stale status from a previous run
@@ -48,6 +47,7 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
         prefill=engine,
         known_variables=known_variables or None,
         from_tool_call=True,
+        vault=vault,
     )
 
     # Check if a new connection was actually added
diff --git a/anton/utils/datasources.py b/anton/utils/datasources.py
index 70a63141..c027c99e 100644
--- a/anton/utils/datasources.py
+++ b/anton/utils/datasources.py
@@ -5,7 +5,7 @@
 import yaml
 from typing import TYPE_CHECKING
 
-from anton.core.datasources.data_vault import DataVault, _slug_env_prefix
+from anton.core.datasources.data_vault import DataVault, LocalDataVault, _slug_env_prefix
 from anton.core.datasources.datasource_registry import DatasourceRegistry, _YAML_BLOCK_RE
 
 if TYPE_CHECKING:
@@ -114,7 +114,7 @@ def build_datasource_context(vault: DataVault, active_only: str | None = None) -
     If active_only is set, only the matching slug is included.
     """
     try:
-        vault = vault or DataVault()
+        vault = vault or LocalDataVault()
         conns = vault.list_connections()
     except Exception:
         return ""

From a9b6689499226dfa2622dc45b0e6c40015ebdf8e Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:54:31 +0200
Subject: [PATCH 113/134] Fix tests

---
 tests/conftest.py          | 21 +++++++++++++++++++--
 tests/test_chat.py         | 13 +++++++------
 tests/test_chat_context.py | 30 ++++++++++++++++--------------
 3 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 56058f5b..9fa2287e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,27 @@
 from __future__ import annotations
 
-from unittest.mock import AsyncMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
-from anton.core.llm.provider import LLMResponse, ToolCall, Usage
+from anton.core.llm.provider import LLMResponse, ProviderConnectionInfo, ToolCall, Usage
+
+
+def make_mock_llm() -> AsyncMock:
+    """Return an AsyncMock LLM client with coding_provider configured for sync use.
+
+    ``AsyncMock`` makes all child attributes ``AsyncMock`` too, which means
+    ``coding_provider.export_connection_info()`` would return a coroutine —
+    but ``ChatSession.__init__`` calls it synchronously.  This helper fixes
+    that by explicitly wiring ``coding_provider`` with a plain ``MagicMock``.
+    """
+    mock = AsyncMock()
+    mock.coding_provider = MagicMock()
+    mock.coding_provider.export_connection_info = MagicMock(
+        return_value=ProviderConnectionInfo(provider="anthropic", api_key="test")
+    )
+    mock.coding_model = "claude-sonnet-4-6"
+    return mock
 
 
 @pytest.fixture()
diff --git a/tests/test_chat.py b/tests/test_chat.py
index 82a11276..de13a815 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -6,6 +6,7 @@
 
 from anton.chat import ChatSession
 from anton.core.session import ChatSessionConfig
+from tests.conftest import make_mock_llm
 from anton.core.llm.provider import (
     ContextOverflowError,
     LLMResponse,
@@ -29,7 +30,7 @@ def _text_response(text: str) -> LLMResponse:
 class TestChatSession:
     async def test_conversational_turn(self):
         """Text-only response for casual conversation."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hey! How can I help?"))
 
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
@@ -40,7 +41,7 @@ async def test_conversational_turn(self):
 
     async def test_history_grows_across_turns(self):
         """Multiple turns accumulate in history."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _text_response("Hi there!"),
@@ -78,7 +79,7 @@ async def _gen(**kwargs):
 class TestChatSessionStreaming:
     async def test_turn_stream_yields_text_deltas(self):
         """Streaming turn yields text deltas and updates history."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
 
         async def _stream(**kwargs):
             yield StreamTextDelta(text="Hello ")
@@ -124,7 +125,7 @@ async def _plan_stream(**kwargs):
                     )
                 )
 
-        session = ChatSession(ChatSessionConfig(llm_client=AsyncMock()))
+        session = ChatSession(ChatSessionConfig(llm_client=make_mock_llm()))
         session._llm.plan_stream = _plan_stream
         session._llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — done"))
         session._summarize_history = AsyncMock()
@@ -145,7 +146,7 @@ async def _plan_stream(**kwargs):
                 )
             )
 
-        session = ChatSession(ChatSessionConfig(llm_client=AsyncMock()))
+        session = ChatSession(ChatSessionConfig(llm_client=make_mock_llm()))
         session._llm.plan_stream = _plan_stream
         session._llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — done"))
         session._summarize_history = AsyncMock()
@@ -166,7 +167,7 @@ async def _plan_stream(**kwargs):
                 )
             )
 
-        session = ChatSession(ChatSessionConfig(llm_client=AsyncMock()))
+        session = ChatSession(ChatSessionConfig(llm_client=make_mock_llm()))
         session._llm.plan_stream = _plan_stream
         session._llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — done"))
         session._summarize_history = AsyncMock()
diff --git a/tests/test_chat_context.py b/tests/test_chat_context.py
index 84909d25..8939c30c 100644
--- a/tests/test_chat_context.py
+++ b/tests/test_chat_context.py
@@ -5,11 +5,13 @@
 import urllib.error
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock
+from tests.conftest import make_mock_llm
 
 import pytest
 
 from anton.chat import ChatSession, _handle_connect
 from anton.core.session import ChatSessionConfig
+from anton.core.llm.prompt_builder import SystemPromptContext
 from anton.minds_client import describe_minds_connection_error
 from anton.config.settings import AntonSettings
 from anton.core.tools.tool_defs import MEMORIZE_TOOL
@@ -88,7 +90,7 @@ def test_tool_definition_structure(self):
 
     async def test_memorize_creates_rule(self, cortex, memory_dirs):
         """When LLM calls memorize, a rule is created in memory."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _memorize_response(
@@ -114,7 +116,7 @@ async def test_memorize_creates_rule(self, cortex, memory_dirs):
 
     async def test_memorize_creates_lesson(self, cortex, memory_dirs):
         """When LLM calls memorize with kind=lesson, a lesson is created."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _memorize_response(
@@ -143,7 +145,7 @@ async def test_memory_injected_into_system_prompt(self, cortex, memory_dirs):
         hc = Hippocampus(project_dir)
         hc.encode_rule("Use httpx instead of requests", kind="always", confidence="high", source="user")
 
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
@@ -156,7 +158,7 @@ async def test_memory_injected_into_system_prompt(self, cortex, memory_dirs):
 
     async def test_no_cortex_excludes_memorize_tool(self):
         """Without cortex or self_awareness, memorize tool is not offered."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm, self_awareness=None, cortex=None))
@@ -170,7 +172,7 @@ async def test_no_cortex_excludes_memorize_tool(self):
 
     async def test_cortex_includes_memorize_tool(self, cortex):
         """With cortex, memorize tool is included."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm, cortex=cortex))
@@ -184,7 +186,7 @@ async def test_cortex_includes_memorize_tool(self, cortex):
 
     async def test_tool_result_in_history(self, cortex, memory_dirs):
         """memorize tool result appears in conversation history."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _memorize_response(
@@ -212,7 +214,7 @@ async def test_anton_md_injected_into_system_prompt(self, ws, cortex):
         """anton.md content is injected into the system prompt."""
         ws.anton_md_path.write_text("This project uses Django and PostgreSQL")
 
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
         session = ChatSession(ChatSessionConfig(
@@ -231,7 +233,7 @@ async def test_empty_anton_md_no_section(self, ws, cortex):
         """Empty anton.md doesn't add a section to the prompt."""
         ws.anton_md_path.write_text("")
 
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
         session = ChatSession(ChatSessionConfig(
@@ -249,12 +251,12 @@ async def test_empty_anton_md_no_section(self, ws, cortex):
 class TestRuntimeContext:
     async def test_runtime_context_injected_into_system_prompt(self):
         """Runtime context (provider/model) appears in the system prompt."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
         session = ChatSession(ChatSessionConfig(
             llm_client=mock_llm,
-            runtime_context="- Provider: anthropic\n- Planning model: claude-sonnet-4-6\n- Coding model: claude-opus-4-6",
+            system_prompt_context=SystemPromptContext(runtime_context="- Provider: anthropic\n- Planning model: claude-sonnet-4-6\n- Coding model: claude-opus-4-6"),
         ))
         await session.turn("hi")
 
@@ -266,12 +268,12 @@ async def test_runtime_context_injected_into_system_prompt(self):
 
     async def test_system_prompt_warns_not_to_ask_about_llm(self):
         """System prompt includes instruction to never ask which LLM to use."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
         session = ChatSession(ChatSessionConfig(
             llm_client=mock_llm,
-            runtime_context="- Provider: anthropic",
+            system_prompt_context=SystemPromptContext(runtime_context="- Provider: anthropic"),
         ))
         await session.turn("hi")
 
@@ -281,10 +283,10 @@ async def test_system_prompt_warns_not_to_ask_about_llm(self):
 
     async def test_conversation_discipline_in_prompt(self):
         """System prompt includes conversation discipline rules."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hello!"))
 
-        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, runtime_context=""))
+        session = ChatSession(ChatSessionConfig(llm_client=mock_llm, system_prompt_context=SystemPromptContext()))
         await session.turn("hi")
 
         call_kwargs = mock_llm.plan.call_args

From db6cf26850b231e578e76ed7bd301868978a5cd7 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:54:36 +0200
Subject: [PATCH 114/134] Fix tests

---
 tests/test_chat_scratchpad.py |  19 +-
 tests/test_datasource.py      | 458 ++++++++++++++++------------------
 2 files changed, 232 insertions(+), 245 deletions(-)

diff --git a/tests/test_chat_scratchpad.py b/tests/test_chat_scratchpad.py
index aec784f7..22b43ebb 100644
--- a/tests/test_chat_scratchpad.py
+++ b/tests/test_chat_scratchpad.py
@@ -2,6 +2,7 @@
 
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
+from tests.conftest import make_mock_llm
 
 import pytest
 
@@ -69,7 +70,7 @@ def test_packages_property_is_array_of_strings(self):
 
     async def test_scratchpad_tool_in_tools(self, workspace):
         """scratchpad should always be in _build_tools() output."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("Hi!"))
 
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm, workspace=workspace))
@@ -87,7 +88,7 @@ async def test_scratchpad_tool_in_tools(self, workspace):
 class TestScratchpadExecViaChat:
     async def test_scratchpad_exec_via_chat(self, workspace):
         """exec action flows through and returns output."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _scratchpad_response("Let me compute.", "exec", "main", "print(7 * 6)"),
@@ -113,7 +114,7 @@ async def test_scratchpad_exec_via_chat(self, workspace):
 class TestScratchpadViewViaChat:
     async def test_scratchpad_view_via_chat(self, workspace):
         """view action returns cell history."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _scratchpad_response("Running code.", "exec", "analysis", "x = 10\nprint(x)"),
@@ -142,7 +143,7 @@ async def test_scratchpad_view_via_chat(self, workspace):
 class TestScratchpadRemoveViaChat:
     async def test_scratchpad_remove_via_chat(self, workspace):
         """remove action cleans up the scratchpad."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _scratchpad_response("Creating.", "exec", "tmp", "print('hi')"),
@@ -168,7 +169,7 @@ async def test_scratchpad_remove_via_chat(self, workspace):
 class TestScratchpadDumpViaChat:
     async def test_scratchpad_dump_via_chat(self, workspace):
         """dump action flows through chat, returns markdown with code fences."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 # First: exec some code
@@ -216,7 +217,7 @@ class TestScratchpadDumpStreaming:
     async def test_scratchpad_dump_streams_tool_result(self, workspace):
         """dump action yields a StreamToolResult for display, but sends a short
         summary back to the LLM to avoid it parroting the full notebook."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — task done"))
 
         call_count = 0
@@ -271,7 +272,7 @@ async def test_scratchpad_in_streaming_path(self, workspace):
         tool_response = _scratchpad_response("Computing.", "exec", "s", "print(99)")
         final_response = _text_response("Got 99.")
 
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(return_value=_text_response("STATUS: COMPLETE — task done"))
 
         call_count = 0
@@ -307,7 +308,7 @@ def fake_plan_stream(**kwargs):
 class TestScratchpadInstallViaChat:
     async def test_install_action_dispatch(self, workspace):
         """install action flows through chat and returns pip output."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _scratchpad_response(
@@ -333,7 +334,7 @@ async def test_install_action_dispatch(self, workspace):
 
     async def test_install_empty_packages_via_chat(self, workspace):
         """install with no packages returns a message without crashing."""
-        mock_llm = AsyncMock()
+        mock_llm = make_mock_llm()
         mock_llm.plan = AsyncMock(
             side_effect=[
                 _scratchpad_response("Installing.", "install", "main", packages=[]),
diff --git a/tests/test_datasource.py b/tests/test_datasource.py
index 9d2da8b1..e480625c 100644
--- a/tests/test_datasource.py
+++ b/tests/test_datasource.py
@@ -32,7 +32,7 @@
     parse_connection_slug,
 )
 from anton.cli import app as cli_app
-from anton.core.datasources.data_vault import DataVault, _slug_env_prefix
+from anton.core.datasources.data_vault import DataVault, LocalDataVault, _slug_env_prefix
 from anton.core.datasources.datasource_registry import (
     DatasourceEngine,
     DatasourceRegistry,
@@ -47,7 +47,7 @@ def vault_dir(tmp_path):
 
 @pytest.fixture()
 def vault(vault_dir):
-    return DataVault(vault_dir=vault_dir)
+    return LocalDataVault(vault_dir=vault_dir)
 
 
 @pytest.fixture()
@@ -167,11 +167,17 @@ async def _default_generate_object(schema_class, **kwargs):
                 "override session._llm.generate_object in this test"
             )
 
+        from anton.core.llm.provider import ProviderConnectionInfo
         mock_llm = AsyncMock()
         plan_response = MagicMock()
         plan_response.content = "UNKNOWN"
         mock_llm.plan = AsyncMock(return_value=plan_response)
         mock_llm.generate_object = AsyncMock(side_effect=_default_generate_object)
+        mock_llm.coding_provider = MagicMock()
+        mock_llm.coding_provider.export_connection_info = MagicMock(
+            return_value=ProviderConnectionInfo(provider="anthropic", api_key="test")
+        )
+        mock_llm.coding_model = "claude-sonnet-4-6"
         session = ChatSession(ChatSessionConfig(llm_client=mock_llm))
         session._scratchpads = AsyncMock()
         return session
@@ -596,30 +602,31 @@ async def test_unknown_engine_returns_early(
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=DataVault(vault_dir=vault_dir)),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", return_value="MySQL"),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=LocalDataVault(vault_dir=vault_dir)),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.prompt_or_cancel", return_value="MySQL"),
+            patch("anton.commands.datasource.custom.prompt_or_cancel", return_value="MySQL"),
         ):
             result = await handle_connect_datasource(
                 console, session._scratchpads, session
             )
 
         assert result is session
-        assert DataVault(vault_dir=vault_dir).list_connections() == []
+        assert LocalDataVault(vault_dir=vault_dir).list_connections() == []
 
     @pytest.mark.asyncio
     async def test_partial_save_on_skip(self, registry, vault_dir, make_session):
         """Answering 'skip' at the bulk prompt saves partial credentials and returns without testing."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         responses = iter(["PostgreSQL", "n", "skip"])
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -641,7 +648,7 @@ async def test_successful_connection_saves_and_injects_history(
         """Happy path: test passes, credentials saved, history entry added."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -659,10 +666,10 @@ async def test_successful_connection_saves_and_injects_history(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -693,20 +700,25 @@ async def test_test_failed_decline_sets_status(
         """
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad(make_cell(stdout="", error="connection refused"))
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
 
         # Engine pick + decline retry after the test fails
         responses = iter(["PostgreSQL", "n"])
+        _responses_next = lambda *a, **kw: next(responses)
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
-                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+                "anton.commands.datasource.connect.prompt_or_cancel",
+                new=AsyncMock(side_effect=_responses_next),
+            ),
+            patch(
+                "anton.commands.datasource.verify.prompt_or_cancel",
+                new=AsyncMock(side_effect=_responses_next),
             ),
         ):
             await handle_connect_datasource(
@@ -737,7 +749,7 @@ async def test_fully_prefilled_known_variables_skips_help_prompt(
         """
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -748,10 +760,10 @@ async def test_fully_prefilled_known_variables_skips_help_prompt(
         responses = iter(["PostgreSQL"])
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -788,7 +800,7 @@ async def test_credentials_pasted_at_help_prompt(
         """
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         # Mock the LLM to return a structured extraction for the paste.
         # connect_collector.extract_variables now uses generate_object
@@ -824,10 +836,10 @@ async def test_credentials_pasted_at_help_prompt(
         responses = iter(["PostgreSQL", pasted])
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -875,7 +887,7 @@ async def test_from_tool_call_does_not_append_to_history(
         history_len_before = len(session._history)
 
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
 
@@ -892,10 +904,10 @@ async def test_from_tool_call_does_not_append_to_history(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -923,7 +935,7 @@ async def test_failed_test_offers_retry(
         """Connection test failure prompts for retry; success on second attempt saves."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad(side_effect=[
             make_cell(stdout="", stderr="password authentication failed"),
@@ -946,10 +958,14 @@ async def test_failed_test_offers_retry(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch(
+                "anton.commands.datasource.connect.prompt_or_cancel",
+                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+            ),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.verify.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -968,7 +984,7 @@ async def test_failed_test_no_retry_returns_without_saving(
         """Declining retry on failed test leaves vault empty."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad(make_cell(stdout="", error="connection refused"))
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -987,10 +1003,14 @@ async def test_failed_test_no_retry_returns_without_saving(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch(
+                "anton.commands.datasource.connect.prompt_or_cancel",
+                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
+            ),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.verify.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -1008,7 +1028,7 @@ async def test_ds_env_injected_after_successful_connect(
         """After a successful connect, namespaced DS_* vars are injected."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -1026,10 +1046,10 @@ async def test_ds_env_injected_after_successful_connect(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -1045,7 +1065,7 @@ async def test_auth_method_choice_selects_fields(
         """Selecting an auth method filters to that method's fields only."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -1053,10 +1073,10 @@ async def test_auth_method_choice_selects_fields(
         responses = iter(["HubSpot", "1", "n", "pat-na1-abc123"])
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -1078,7 +1098,7 @@ async def test_bulk_key_value_extraction(
         """A single bulk response with key=value pairs fills multiple fields at once."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         # Mock the LLM extraction to return a typed Pydantic result
         # (connect_collector now uses generate_object with a schema).
@@ -1109,10 +1129,10 @@ async def test_bulk_key_value_extraction(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -1181,7 +1201,7 @@ async def test_register_and_scrub_on_connect(
         self, registry, vault_dir, monkeypatch, make_pad
     ):
         """After _handle_connect_datasource, the new secret var is immediately scrubbed."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         session = MagicMock()
         session._history = []
         session._cortex = None
@@ -1203,10 +1223,10 @@ async def test_register_and_scrub_on_connect(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -1260,15 +1280,15 @@ def test_active_datasource_defaults_to_none(self, make_session):
     @pytest.mark.asyncio
     async def test_reconnect_sets_active_datasource(self, vault_dir, make_session):
         """Reconnecting to a slug via prefill sets session._active_datasource."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("hubspot", "2", {"access_token": "pat-xxx"})
 
         session = make_session()
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry"),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry"),
         ):
             result = await handle_connect_datasource(
                 console, session._scratchpads, session, prefill="hubspot-2"
@@ -1281,7 +1301,7 @@ async def test_reconnect_all_namespaced_vars_available(
         self, vault_dir, make_session
     ):
         """After reconnect, ALL saved connections remain available as namespaced vars."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "oracle",
             "1",
@@ -1298,8 +1318,8 @@ async def test_reconnect_all_namespaced_vars_available(
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry"),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry"),
         ):
             result = await handle_connect_datasource(
                 console, session._scratchpads, session, prefill="hubspot-2"
@@ -1313,69 +1333,63 @@ async def test_reconnect_all_namespaced_vars_available(
 
     def test_build_datasource_context_no_filter(self, vault_dir):
         """Without active_only, all vault entries appear in the context."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("oracle", "1", {"host": "oracle.host"})
         vault.save("hubspot", "2", {"access_token": "pat-xxx"})
 
-        with patch("anton.utils.datasources.DataVault", return_value=vault):
-            ctx = build_datasource_context()
+        ctx = build_datasource_context(vault)
 
         assert "oracle-1" in ctx
         assert "hubspot-2" in ctx
 
     def test_build_datasource_context_active_only_filters(self, vault_dir):
         """With active_only set, only the matching slug appears."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("oracle", "1", {"host": "oracle.host"})
         vault.save("hubspot", "2", {"access_token": "pat-xxx"})
 
-        with patch("anton.utils.datasources.DataVault", return_value=vault):
-            ctx = build_datasource_context(active_only="hubspot-2")
+        ctx = build_datasource_context(vault, active_only="hubspot-2")
 
         assert "hubspot-2" in ctx
         assert "oracle-1" not in ctx
 
     def test_build_datasource_context_active_only_empty_when_no_match(self, vault_dir):
         """If active_only doesn't match any slug, the section has no entries."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("oracle", "1", {"host": "oracle.host"})
 
-        with patch("anton.utils.datasources.DataVault", return_value=vault):
-            ctx = build_datasource_context(active_only="hubspot-99")
+        ctx = build_datasource_context(vault, active_only="hubspot-99")
 
         assert "oracle-1" not in ctx
 
     def test_build_datasource_context_shows_namespaced_vars(self, vault_dir):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgres", "prod_db", {"host": "pg.example.com", "password": "s3cr3t"}
         )
 
-        with patch("anton.utils.datasources.DataVault", return_value=vault):
-            ctx = build_datasource_context()
+        ctx = build_datasource_context(vault)
 
         assert "DS_POSTGRES_PROD_DB__HOST" in ctx
         assert "DS_POSTGRES_PROD_DB__PASSWORD" in ctx
         assert "DS_HOST" not in ctx
 
     def test_build_datasource_context_shows_slug_and_engine_label(self, vault_dir):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("postgres", "prod_db", {"host": "pg.example.com"})
 
-        with patch("anton.utils.datasources.DataVault", return_value=vault):
-            ctx = build_datasource_context()
+        ctx = build_datasource_context(vault)
 
         assert "postgres-prod_db" in ctx
         assert "(postgres)" in ctx
 
     def test_multi_source_context_shows_both_connections(self, vault_dir):
         """Both connections are visible in the context with their namespaced vars."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("postgres", "prod_db", {"host": "pg.example.com"})
         vault.save("hubspot", "main", {"access_token": "pat-abc"})
 
-        with patch("anton.utils.datasources.DataVault", return_value=vault):
-            ctx = build_datasource_context()
+        ctx = build_datasource_context(vault)
 
         assert "postgres-prod_db" in ctx
         assert "DS_POSTGRES_PROD_DB__HOST" in ctx
@@ -1402,15 +1416,14 @@ def test_command_registered(self, cmd_name):
 class TestHandleListDataSources:
     def test_empty_vault_shows_message(self, vault_dir):
         console = MagicMock()
-        with patch("anton.commands.datasource.DataVault", return_value=DataVault(vault_dir=vault_dir)):
-            handle_list_data_sources(console)
+        handle_list_data_sources(console, vault=LocalDataVault(vault_dir=vault_dir))
         printed = " ".join(str(c) for c in console.print.call_args_list)
         assert "No data sources" in printed or "connect" in printed
 
     def test_complete_connection_shows_saved_with_engine_name(
         self, vault_dir, registry
     ):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -1426,11 +1439,8 @@ def test_complete_connection_shows_saved_with_engine_name(
         buf = io.StringIO()
         rich_console = Console(file=buf, highlight=False, markup=False)
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
-            handle_list_data_sources(rich_console)
+        with patch("anton.commands.datasource.manage.DatasourceRegistry", return_value=registry):
+            handle_list_data_sources(rich_console, vault=vault)
 
         output = buf.getvalue()
         assert "postgresql-prod_db" in output
@@ -1438,18 +1448,15 @@ def test_complete_connection_shows_saved_with_engine_name(
         assert "PostgreSQL" in output  # engine display_name shown
 
     def test_incomplete_connection_shows_incomplete(self, vault_dir, registry):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         # Missing required fields: database, user, password
         vault.save("postgresql", "partial", {"host": "db.example.com"})
 
         buf = io.StringIO()
         rich_console = Console(file=buf, highlight=False, markup=False)
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
-            handle_list_data_sources(rich_console)
+        with patch("anton.commands.datasource.manage.DatasourceRegistry", return_value=registry):
+            handle_list_data_sources(rich_console, vault=vault)
 
         output = buf.getvalue()
         assert "incomplete" in output.lower()
@@ -1458,7 +1465,7 @@ def test_incomplete_connection_shows_incomplete(self, vault_dir, registry):
 class TestHandleTestDatasource:
     @pytest.mark.asyncio
     async def test_success_path(self, vault_dir, registry, make_cell, make_pad):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -1475,18 +1482,15 @@ async def test_success_path(self, vault_dir, registry, make_cell, make_pad):
         scratchpads = AsyncMock()
         scratchpads.get_or_create = AsyncMock(return_value=pad)
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
-            await handle_test_datasource(console, scratchpads, "postgresql-prod_db")
+        with patch("anton.commands.datasource.verify.DatasourceRegistry", return_value=registry):
+            await handle_test_datasource(console, scratchpads, "postgresql-prod_db", vault=vault)
 
         printed = " ".join(str(c) for c in console.print.call_args_list)
         assert "✓" in printed or "passed" in printed.lower()
 
     @pytest.mark.asyncio
     async def test_failure_path(self, vault_dir, registry, make_cell, make_pad):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -1503,26 +1507,20 @@ async def test_failure_path(self, vault_dir, registry, make_cell, make_pad):
         scratchpads = AsyncMock()
         scratchpads.get_or_create = AsyncMock(return_value=pad)
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
-            await handle_test_datasource(console, scratchpads, "postgresql-prod_db")
+        with patch("anton.commands.datasource.verify.DatasourceRegistry", return_value=registry):
+            await handle_test_datasource(console, scratchpads, "postgresql-prod_db", vault=vault)
 
         printed = " ".join(str(c) for c in console.print.call_args_list)
         assert "✗" in printed or "failed" in printed.lower()
 
     @pytest.mark.asyncio
     async def test_unknown_connection(self, vault_dir, registry):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         console = MagicMock()
         scratchpads = AsyncMock()
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
-            await handle_test_datasource(console, scratchpads, "postgresql-ghost")
+        with patch("anton.commands.datasource.verify.DatasourceRegistry", return_value=registry):
+            await handle_test_datasource(console, scratchpads, "postgresql-ghost", vault=vault)
 
         printed = " ".join(str(c) for c in console.print.call_args_list)
         assert "not found" in printed.lower() or "No connection" in printed
@@ -1532,11 +1530,7 @@ async def test_empty_slug_shows_usage(self, vault_dir, registry):
         console = MagicMock()
         scratchpads = AsyncMock()
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=DataVault(vault_dir=vault_dir)),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
-            await handle_test_datasource(console, scratchpads, "")
+        await handle_test_datasource(console, scratchpads, "", vault=LocalDataVault(vault_dir=vault_dir))
 
         printed = " ".join(str(c) for c in console.print.call_args_list)
         assert "Usage" in printed or "test" in printed
@@ -1548,7 +1542,7 @@ async def test_existing_values_loaded(
         self, registry, vault_dir, make_session, make_cell, make_pad
     ):
         """Edit shows existing non-secret values as defaults."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -1578,10 +1572,10 @@ async def test_existing_values_loaded(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(prompt_values)),
             ),
         ):
@@ -1602,7 +1596,7 @@ async def test_enter_preserves_secret_value(
         self, registry, vault_dir, make_session, make_cell, make_pad
     ):
         """Pressing Enter on a secret field keeps the existing value."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         original_pass = "original_secret_pass"
         vault.save(
             "postgresql",
@@ -1633,10 +1627,10 @@ async def test_enter_preserves_secret_value(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(prompt_values)),
             ),
         ):
@@ -1656,13 +1650,13 @@ async def test_unknown_slug_returns_session(
         self, registry, vault_dir, make_session
     ):
         """Editing a non-existent slug returns the session unchanged."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         session = make_session()
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
         ):
             result = await handle_connect_datasource(
                 console,
@@ -1683,11 +1677,10 @@ async def test_confirmation_yes_deletes(self, vault, registry):
         console = Console(quiet=True)
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", new=AsyncMock(return_value="y")),
+            patch("anton.commands.datasource.manage.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.manage.prompt_or_cancel", new=AsyncMock(return_value="y")),
         ):
-            await handle_remove_data_source(console, "postgresql-prod_db")
+            await handle_remove_data_source(console, "postgresql-prod_db", vault=vault)
 
         assert vault.load("postgresql", "prod_db") is None
 
@@ -1697,34 +1690,32 @@ async def test_confirmation_no_preserves(self, vault, registry):
         console = Console(quiet=True)
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", new=AsyncMock(return_value="n")),
+            patch("anton.commands.datasource.manage.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.manage.prompt_or_cancel", new=AsyncMock(return_value="n")),
         ):
-            await handle_remove_data_source(console, "postgresql-prod_db")
+            await handle_remove_data_source(console, "postgresql-prod_db", vault=vault)
 
         assert vault.load("postgresql", "prod_db") is not None
 
     @pytest.mark.asyncio
     async def test_unknown_name_shows_message(self, vault_dir, registry):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.manage.DatasourceRegistry", return_value=registry),
         ):
-            await handle_remove_data_source(console, "postgresql-ghost")
+            await handle_remove_data_source(console, "postgresql-ghost", vault=vault)
 
         printed = " ".join(str(c) for c in console.print.call_args_list)
         assert "not found" in printed.lower() or "No connection" in printed
 
     @pytest.mark.asyncio
     async def test_invalid_format_shows_warning(self, vault_dir):
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         console = MagicMock()
 
-        with patch("anton.commands.datasource.DataVault", return_value=vault):
+        with patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault):
             await handle_remove_data_source(console, "nohyphen")
 
         printed = " ".join(str(c) for c in console.print.call_args_list)
@@ -1738,7 +1729,7 @@ async def test_connect_clears_previous_ds_vars(
     ):
         """After a successful new connect, stale DS_* vars are cleared."""
         monkeypatch.setenv("DS_ACCESS_TOKEN", "old-token")
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         session = make_session()
         console = MagicMock()
 
@@ -1758,10 +1749,10 @@ async def test_connect_clears_previous_ds_vars(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
         ):
@@ -1775,7 +1766,7 @@ async def test_two_same_type_connections_no_collision(
         self, registry, vault_dir, make_session
     ):
         """Both same-type connections remain available as distinct namespaced vars."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "db1",
@@ -1803,8 +1794,8 @@ async def test_two_same_type_connections_no_collision(
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
         ):
             await handle_connect_datasource(
                 console,
@@ -1826,8 +1817,8 @@ async def test_test_data_source_no_arg_shows_usage(self, vault_dir, registry):
         scratchpads = AsyncMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=DataVault(vault_dir=vault_dir)),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=LocalDataVault(vault_dir=vault_dir)),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
         ):
             await handle_test_datasource(console, scratchpads, "")
 
@@ -1843,9 +1834,10 @@ async def test_edit_data_source_no_arg_safe(
         console = MagicMock()
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=DataVault(vault_dir=vault_dir)),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", return_value="UnknownEngine"),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=LocalDataVault(vault_dir=vault_dir)),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.prompt_or_cancel", return_value="UnknownEngine"),
+            patch("anton.commands.datasource.custom.prompt_or_cancel", return_value="some description"),
         ):
             updated = await handle_connect_datasource(
                 console,
@@ -1891,7 +1883,7 @@ def test_slug_with_empty_name_part(self):
 
     def test_fallback_to_vault_for_custom_engine(self, tmp_path):
         """Custom engine not in registry is resolved via vault fallback."""
-        vault = DataVault(vault_dir=tmp_path / "vault")
+        vault = LocalDataVault(vault_dir=tmp_path / "vault")
         vault.save("my_custom_db", "prod", {"host": "localhost"})
         result = parse_connection_slug(
             "my_custom_db-prod",
@@ -1902,7 +1894,7 @@ def test_fallback_to_vault_for_custom_engine(self, tmp_path):
 
     def test_registry_match_takes_priority_over_vault(self, tmp_path):
         """Registry prefix match wins even when vault also has the slug."""
-        vault = DataVault(vault_dir=tmp_path / "vault")
+        vault = LocalDataVault(vault_dir=tmp_path / "vault")
         vault.save("postgresql", "prod", {"host": "localhost"})
         result = parse_connection_slug(
             "postgresql-prod",
@@ -1913,7 +1905,7 @@ def test_registry_match_takes_priority_over_vault(self, tmp_path):
 
     def test_no_match_returns_none_with_vault(self, tmp_path):
         """Truly unknown slug returns None even with vault supplied."""
-        vault = DataVault(vault_dir=tmp_path / "vault")
+        vault = LocalDataVault(vault_dir=tmp_path / "vault")
         result = parse_connection_slug(
             "ghost-engine-1",
             known_engines=["postgresql"],
@@ -1945,7 +1937,7 @@ class TestTemporaryFlatExecution:
 
     def test_restore_namespaced_env_clears_flat_and_reinjects(self, vault_dir):
         """_restore_namespaced_env replaces flat vars with namespaced vars."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("postgres", "analytics", {"host": "analytics.example.com"})
 
         vault.inject_env("postgres", "analytics", flat=True)
@@ -1960,7 +1952,7 @@ def test_restore_namespaced_env_clears_flat_and_reinjects(self, vault_dir):
 
     def test_restore_namespaced_env_reinjects_all_connections(self, vault_dir):
         """_restore_namespaced_env restores ALL saved connections, not just one."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("postgres", "prod_db", {"host": "prod.example.com"})
         vault.save("hubspot", "main", {"access_token": "pat-abc"})
 
@@ -1978,7 +1970,7 @@ async def test_test_datasource_injects_flat_then_restores_namespaced(
         self, vault_dir, registry, make_pad
     ):
         """handle_test_datasource uses flat vars during snippet, then restores namespaced."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -2009,12 +2001,9 @@ async def capture_execute(snippet):
         scratchpads = AsyncMock()
         scratchpads.get_or_create = AsyncMock(return_value=pad)
 
-        with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-        ):
+        with patch("anton.commands.datasource.verify.DatasourceRegistry", return_value=registry):
             await handle_test_datasource(
-                MagicMock(), scratchpads, "postgresql-prod_db"
+                MagicMock(), scratchpads, "postgresql-prod_db", vault=vault
             )
 
         # During execution: flat var was set, namespaced was absent
@@ -2032,7 +2021,7 @@ class TestStaleDsRegistrationState:
 
     def test_remove_clears_stale_secret_vars(self, vault_dir, registry):
         """After removing a connection, its secret var names leave _DS_SECRET_VARS."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -2065,7 +2054,7 @@ def test_remove_clears_stale_secret_vars(self, vault_dir, registry):
 
     def test_edit_connection_refreshes_secret_vars(self, vault_dir, registry):
         """Overwriting a connection via vault.save rebuilds registration without duplication."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -2111,7 +2100,7 @@ def test_edit_connection_refreshes_secret_vars(self, vault_dir, registry):
 
     def test_reconnect_no_duplicate_secret_vars(self, vault_dir, registry):
         """Calling _restore_namespaced_env multiple times does not grow _DS_SECRET_VARS."""
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save(
             "postgresql",
             "prod_db",
@@ -2202,10 +2191,10 @@ async def test_missing_required_non_secret_field_prompts_user(
 
         with (
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.custom.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             result = await handle_add_custom_datasource(
@@ -2242,10 +2231,10 @@ async def test_missing_required_secret_field_prompts_user(
 
         with (
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.custom.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             result = await handle_add_custom_datasource(
@@ -2287,10 +2276,10 @@ async def test_incomplete_custom_datasource_not_saved(self, tmp_path, make_sessi
 
         with (
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.custom.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
             ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             result = await handle_add_custom_datasource(
@@ -2350,7 +2339,7 @@ async def test_custom_with_test_snippet_success(
         """Custom datasource with test_snippet: test passes → connection saved."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -2373,17 +2362,16 @@ async def test_custom_with_test_snippet_success(
             ["0", "My API Service", "I have an API key", "n", "my_secret_key"]
         )
 
+        poc = AsyncMock(side_effect=lambda *a, **kw: next(responses))
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
             patch(
-                "anton.commands.datasource.DatasourceRegistry",
+                "anton.commands.datasource.connect.DatasourceRegistry",
                 return_value=self._make_registry(tmp_path),
             ),
-            patch(
-                "anton.commands.datasource.prompt_or_cancel",
-                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
-            ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             result = await handle_connect_datasource(
@@ -2406,7 +2394,7 @@ async def test_custom_with_test_snippet_fail_no_retry(
         """Custom datasource: test fails and user declines retry → not saved."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad(make_cell(stdout="", stderr="connection refused"))
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -2429,17 +2417,17 @@ async def test_custom_with_test_snippet_fail_no_retry(
             ["0", "My API Service", "I have an API key", "n", "bad_key", "n"]
         )
 
+        poc = AsyncMock(side_effect=lambda *a, **kw: next(responses))
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
             patch(
-                "anton.commands.datasource.DatasourceRegistry",
+                "anton.commands.datasource.connect.DatasourceRegistry",
                 return_value=self._make_registry(tmp_path),
             ),
-            patch(
-                "anton.commands.datasource.prompt_or_cancel",
-                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
-            ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.verify.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             result = await handle_connect_datasource(
@@ -2456,7 +2444,7 @@ async def test_custom_with_test_snippet_fail_retry_success(
         """Custom datasource: test fails, user retries with corrected creds → saved."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad(side_effect=[
             make_cell(stdout="", stderr="invalid key"),
@@ -2490,17 +2478,17 @@ async def test_custom_with_test_snippet_fail_retry_success(
             ]
         )
 
+        poc = AsyncMock(side_effect=lambda *a, **kw: next(responses))
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
             patch(
-                "anton.commands.datasource.DatasourceRegistry",
+                "anton.commands.datasource.connect.DatasourceRegistry",
                 return_value=self._make_registry(tmp_path),
             ),
-            patch(
-                "anton.commands.datasource.prompt_or_cancel",
-                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
-            ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.verify.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             result = await handle_connect_datasource(
@@ -2521,7 +2509,7 @@ async def test_custom_without_test_snippet_saves(
         """Custom datasource without test_snippet: saves directly, no scratchpad call."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad()
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
@@ -2542,17 +2530,16 @@ async def test_custom_without_test_snippet_saves(
 
         responses = iter(["0", "My API Service", "I have an API key", "n", "my_key"])
 
+        poc = AsyncMock(side_effect=lambda *a, **kw: next(responses))
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
             patch(
-                "anton.commands.datasource.DatasourceRegistry",
+                "anton.commands.datasource.connect.DatasourceRegistry",
                 return_value=self._make_registry(tmp_path),
             ),
-            patch(
-                "anton.commands.datasource.prompt_or_cancel",
-                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
-            ),
-            patch("anton.commands.datasource.Path") as mock_path_cls,
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.custom.Path") as mock_path_cls,
         ):
             self._mock_ds_path(mock_path_cls, tmp_path)
             await handle_connect_datasource(console, session._scratchpads, session)
@@ -2582,7 +2569,7 @@ async def test_edit_failed_test_does_not_corrupt_vault(
         """edit with bad creds + test fails + user declines retry → original creds intact."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("postgresql", "prod_db", self.OLD_CREDS)
 
         pad = make_pad(make_cell(stdout="", stderr="connection refused"))
@@ -2593,13 +2580,12 @@ async def test_edit_failed_test_does_not_corrupt_vault(
             ["", "", "", "", "bad-pass", "", "n"]
         )  # field values, then retry?
 
+        poc = AsyncMock(side_effect=lambda *a, **kw: next(responses))
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch(
-                "anton.commands.datasource.prompt_or_cancel",
-                new=AsyncMock(side_effect=lambda *a, **kw: next(responses)),
-            ),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=poc),
+            patch("anton.commands.datasource.verify.prompt_or_cancel", new=poc),
         ):
             result = await handle_connect_datasource(
                 console,
@@ -2620,7 +2606,7 @@ async def test_edit_successful_test_persists_new_credentials(
         """edit with valid creds + test passes → new creds saved to vault."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
         vault.save("postgresql", "prod_db", self.OLD_CREDS)
 
         pad = make_pad()
@@ -2638,10 +2624,10 @@ async def test_edit_successful_test_persists_new_credentials(
         )
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(prompt_responses)),
             ),
         ):
@@ -2663,7 +2649,7 @@ async def test_connection_test_error_summary_uses_meaningful_line(
     ):
         """Error display shows last non-empty line (exception msg), not traceback header."""
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         traceback_text = (
             "Traceback (most recent call last):\n"
@@ -2690,9 +2676,9 @@ async def test_connection_test_error_summary_uses_meaningful_line(
         }
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", new=AsyncMock(return_value="n")),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.verify.prompt_or_cancel", new=AsyncMock(return_value="n")),
         ):
             result = await run_connection_test(
                 console,
@@ -2718,12 +2704,12 @@ async def test_esc_on_engine_selection_returns_session_unchanged(
         """Pressing Esc on the engine-selection prompt returns the session with no vault writes."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", new=AsyncMock(return_value=None)),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=AsyncMock(return_value=None)),
         ):
             result = await handle_connect_datasource(
                 console, session._scratchpads, session
@@ -2739,15 +2725,15 @@ async def test_esc_on_retry_does_not_save(
         """Pressing Esc at the retry prompt makes _run_connection_test return False."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         pad = make_pad(make_cell(stdout="", error="bad creds"))
         session._scratchpads.get_or_create = AsyncMock(return_value=pad)
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", new=AsyncMock(return_value=None)),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.verify.prompt_or_cancel", new=AsyncMock(return_value=None)),
         ):
             engine_def = registry.get("postgresql")
             credentials = {
@@ -2776,15 +2762,15 @@ async def test_esc_on_do_you_have_these_returns_session(
         """Pressing Esc after engine selection (on 'do you have these?') returns session."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         poc_calls = iter(["PostgreSQL", None])  # engine selected, then Esc
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
             patch(
-                "anton.commands.datasource.prompt_or_cancel",
+                "anton.commands.datasource.connect.prompt_or_cancel",
                 new=AsyncMock(side_effect=lambda *a, **kw: next(poc_calls)),
             ),
         ):
@@ -2802,7 +2788,7 @@ async def test_fuzzy_match_prompt_has_context_text(
         """The fuzzy-match confirmation prompt includes context text and uses (y/n)."""
         session = make_session()
         console = MagicMock()
-        vault = DataVault(vault_dir=vault_dir)
+        vault = LocalDataVault(vault_dir=vault_dir)
 
         captured_labels: list[str] = []
 
@@ -2811,9 +2797,9 @@ def _capture(label, **kw):
             return None  # Esc on every prompt to bail out
 
         with (
-            patch("anton.commands.datasource.DataVault", return_value=vault),
-            patch("anton.commands.datasource.DatasourceRegistry", return_value=registry),
-            patch("anton.commands.datasource.prompt_or_cancel", new=AsyncMock(side_effect=_capture)),
+            patch("anton.commands.datasource.connect.LocalDataVault", return_value=vault),
+            patch("anton.commands.datasource.connect.DatasourceRegistry", return_value=registry),
+            patch("anton.commands.datasource.connect.prompt_or_cancel", new=AsyncMock(side_effect=_capture)),
         ):
             # "PostgreeSQL" triggers fuzzy match against "PostgreSQL"
             await handle_connect_datasource(

From 571a8c19ad0cbe56514f3a44018416c9e557f256 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Tue, 14 Apr 2026 17:54:41 +0200
Subject: [PATCH 115/134] Fix tests

---
 tests/test_prompt_builder_skills.py | 4 ++--
 tests/test_session_skills_init.py   | 6 +++---
 tests/test_skills_e2e.py            | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_prompt_builder_skills.py b/tests/test_prompt_builder_skills.py
index 87631eb6..092b78da 100644
--- a/tests/test_prompt_builder_skills.py
+++ b/tests/test_prompt_builder_skills.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.skills import Skill, SkillStore
 
 
@@ -40,7 +40,7 @@ def populated_store(tmp_path: Path) -> SkillStore:
 def _build_prompt(builder: ChatSystemPromptBuilder, **overrides) -> str:
     defaults = dict(
         current_datetime="2026-04-10T12:00:00+00:00",
-        runtime_context="test runtime",
+        system_prompt_context=SystemPromptContext(runtime_context="test runtime"),
         proactive_dashboards=False,
         output_dir="/tmp/anton_out",
     )
diff --git a/tests/test_session_skills_init.py b/tests/test_session_skills_init.py
index f60a579a..d1bcdd08 100644
--- a/tests/test_session_skills_init.py
+++ b/tests/test_session_skills_init.py
@@ -16,7 +16,7 @@
 
 import pytest
 
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.skills import Skill, SkillStore
 from anton.core.tools.recall_skill import RECALL_SKILL_TOOL, handle_recall_skill
 from anton.core.tools.registry import ToolRegistry
@@ -67,7 +67,7 @@ def test_section_appears_when_store_passed(
         builder = ChatSystemPromptBuilder()
         prompt = builder.build(
             current_datetime="2026-04-10",
-            runtime_context="test",
+            system_prompt_context=SystemPromptContext(runtime_context="test"),
             proactive_dashboards=False,
             output_dir="/tmp/x",
             skill_store=store_with_one_skill,
@@ -79,7 +79,7 @@ def test_section_omitted_when_no_store(self):
         builder = ChatSystemPromptBuilder()
         prompt = builder.build(
             current_datetime="2026-04-10",
-            runtime_context="test",
+            system_prompt_context=SystemPromptContext(runtime_context="test"),
             proactive_dashboards=False,
             output_dir="/tmp/x",
             skill_store=None,
diff --git a/tests/test_skills_e2e.py b/tests/test_skills_e2e.py
index 78f40615..3ec5fe8f 100644
--- a/tests/test_skills_e2e.py
+++ b/tests/test_skills_e2e.py
@@ -24,7 +24,7 @@
 from rich.console import Console
 
 from anton.commands.skills import _SkillDraft, handle_skill_save
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.skills import SkillStore
 from anton.core.tools.recall_skill import RECALL_SKILL_TOOL
 from anton.core.tools.registry import ToolRegistry
@@ -123,7 +123,7 @@ async def test_full_skills_loop(console, store_root):
     builder = ChatSystemPromptBuilder()
     prompt = builder.build(
         current_datetime="2026-04-10T13:00:00+00:00",
-        runtime_context="test",
+        system_prompt_context=SystemPromptContext(runtime_context="test"),
         proactive_dashboards=False,
         output_dir="/tmp/x",
         skill_store=fresh_store,

From b7478adaa7ac8b25e603d727e2bb0c37c5aa3cab Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 12:36:33 -0700
Subject: [PATCH 116/134] updated the boot script to load and dump session
 state

---
 anton/core/backends/scratchpad_boot.py | 44 +++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/anton/core/backends/scratchpad_boot.py b/anton/core/backends/scratchpad_boot.py
index 255d82ad..46a19aa3 100644
--- a/anton/core/backends/scratchpad_boot.py
+++ b/anton/core/backends/scratchpad_boot.py
@@ -4,14 +4,52 @@
 import sys
 import traceback
 
+import dill
+
 from anton.core.backends.wire import (
     CELL_DELIM,
     RESULT_START,
     RESULT_END,
 )
 
+
+# --- Python session persistence and namespace injection ---
+PERSIST_SESSION = os.environ.get("ANTON_SCRATCHPAD_PERSIST_SESSION", "false").lower() in {"1", "true", "yes", "on"}
+SESSION_PATH = os.environ.get("ANTON_SCRATCHPAD_SESSION_PATH", "/anton_scratchpad_session.pkl")
+
+
+def _load_namespace() -> tuple[dict, str | None]:
+    if not PERSIST_SESSION:
+        return {"__builtins__": __builtins__}, None
+    try:
+        with open(SESSION_PATH, "rb") as f:
+            ns = dill.load(f)
+        if not isinstance(ns, dict):
+            raise TypeError("Session file did not contain a namespace dict")
+        ns.setdefault("__builtins__", __builtins__)
+        return ns, None
+    except FileNotFoundError:
+        return {"__builtins__": __builtins__}, None
+    except Exception:
+        return (
+            {"__builtins__": __builtins__},
+            "Failed to load scratchpad session; starting fresh.\n" + traceback.format_exc(),
+        )
+
+
+def _dump_namespace(ns: dict) -> str | None:
+    if not PERSIST_SESSION:
+        return None
+    try:
+        with open(SESSION_PATH, "wb") as f:
+            dill.dump(ns, f)
+        return None
+    except Exception:
+        return "Failed to dump scratchpad session.\n" + traceback.format_exc()
+
+
 # Persistent namespace across cells
-namespace = {"__builtins__": __builtins__}
+namespace, _ = _load_namespace()
 namespace["_anton_explainability_queries"] = []
 
 # --- Inject get_llm() for LLM access from scratchpad code ---
@@ -690,6 +728,10 @@ def emit(self, record):
             stdout_val[:_MAX_OUTPUT]
             + f"\n\n... (truncated, {len(stdout_val)} chars total)"
         )
+
+    # Persist session after each cell.
+    _dump_namespace(namespace)
+
     result = {
         "stdout": stdout_val,
         "stderr": err_buf.getvalue(),

From 20b4157881f54e1d51d6c28458e7465a38f2796b Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 12:36:48 -0700
Subject: [PATCH 117/134] added dill as a project dependency

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index d6f4d1e6..ab29c2ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ dependencies = [
     "prompt-toolkit>=3.0",
     "packaging>=21.0",
     "pyyaml>=6.0",
+    "dill==0.3.8",
 ]
 
 [project.optional-dependencies]

From 7b93239f8289b474937e2034b164d5975c210f5c Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 16:13:56 -0700
Subject: [PATCH 118/134] removed output_dir and introduced
 resource_storage_instructions

---
 anton/core/llm/prompt_builder.py | 18 ++++++++----------
 anton/core/llm/prompts.py        |  4 ++--
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index 4fda0843..e5453733 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from pathlib import Path
 from typing import TYPE_CHECKING
 
 from .prompts import (
@@ -20,15 +19,18 @@
 class SystemPromptContext:
     """Bundled prompt-injection points for the system prompt.
 
-    Three levels with increasing importance (later = stronger influence):
+    Four levels with increasing importance (later = stronger influence):
       1. ``prefix``  — prepended before the base prompt
       2. ``runtime_context`` — interpolated into the RUNTIME IDENTITY section
-      3. ``suffix``  — appended after all other sections
+      3. ``resource_storage_context`` — free-text instructions on where to
+         store generated resources (visualizations, HTML files, data exports)
+      4. ``suffix``  — appended after all other sections
     """
 
     runtime_context: str = ""
     prefix: str = ""
     suffix: str = ""
+    resource_storage_context: str = ""
 
 
 class ChatSystemPromptBuilder:
@@ -108,16 +110,15 @@ def _build_visualizations_section(
         self,
         *,
         proactive_dashboards: bool,
-        output_path: str,
+        resource_storage_context: str,
     ) -> str:
         visualizations_output_format_prompt = (
             VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT
             if proactive_dashboards
             else VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT
         )
-        # The output-format prompt can reference `{output_path}`.
         output_format = visualizations_output_format_prompt.format(
-            output_path=output_path
+            resource_storage_context=resource_storage_context,
         )
         return BASE_VISUALIZATIONS_PROMPT.format(output_format=output_format)
 
@@ -127,7 +128,6 @@ def build(
         current_datetime: str,
         system_prompt_context: SystemPromptContext,
         proactive_dashboards: bool,
-        output_dir: str,
         tool_defs: list["ToolDef"] | None = None,
         memory_context: str = "",
         project_context: str = "",
@@ -135,11 +135,9 @@ def build(
         datasource_context: str = "",
         skill_store: "SkillStore | None" = None,
     ) -> str:
-        output_path = f"{Path(str(output_dir)).as_posix().rstrip('/')}/"
-
         visualizations_section = self._build_visualizations_section(
             proactive_dashboards=proactive_dashboards,
-            output_path=output_path,
+            resource_storage_context=system_prompt_context.resource_storage_context,
         )
 
         prompt = ""
diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
index 7911aaa7..df2c62d0 100644
--- a/anton/core/llm/prompts.py
+++ b/anton/core/llm/prompts.py
@@ -304,7 +304,7 @@
 Output format:
 - Unless the user explicitly asks for a different format, always output visualizations \
 as polished, single-file HTML pages — never raw PNGs or bare image files.
-Save output to `{output_path}` (create it if needed).
+{resource_storage_context}
 
 Visual design:
 - Make it look good by default. Use a dark theme (#0d1117 background, #e6edf3 text), \
@@ -362,7 +362,7 @@
 - For large datasets, summarize the top N and offer to show more.
 - When the user EXPLICITLY asks for a chart, dashboard, plot, or HTML visualization, \
 THEN build it as a self-contained HTML file with inlined CSS, JS, and data. \
-Save to `{output_path}`.
+{resource_storage_context}
 Use Apache ECharts (CDN), dark theme (#0d1117), and follow standard dashboard best practices. \
 If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \
 Never split CSS or chart logic into separate files — only large data payloads.\

From a7292988ef6770e1d504ef63762483fb363572eb Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 16:14:11 -0700
Subject: [PATCH 119/134] removed output_dir from session

---
 anton/core/session.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/anton/core/session.py b/anton/core/session.py
index af0f156d..74afa0a5 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -77,7 +77,6 @@ class ChatSessionConfig:
     history_store: HistoryStore | None = None
     session_id: str | None = None
     proactive_dashboards: bool = False
-    output_dir: str = ""
     tools: list[ToolDef] = field(default_factory=list)
 
 
@@ -99,7 +98,6 @@ def __init__(self, config: ChatSessionConfig) -> None:
         self._system_prompt_context = config.system_prompt_context
         self._proactive_dashboards = config.proactive_dashboards
         self._extra_tools = config.tools
-        self._output_dir = config.output_dir
         self._workspace = config.workspace
         self._data_vault = config.data_vault
         self._console = config.console
@@ -304,7 +302,6 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
 
         prompt_builder = ChatSystemPromptBuilder()
         prompt = prompt_builder.build(
-            output_dir=self._output_dir,
             current_datetime=_current_datetime,
             system_prompt_context=self._system_prompt_context,
             proactive_dashboards=self._proactive_dashboards,

From adcc49b25b3a22ecda07dc733b17585fc2c381e7 Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 16:14:29 -0700
Subject: [PATCH 120/134] added storage instructions via callers

---
 anton/chat.py         | 6 +++++-
 anton/chat_session.py | 7 +++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index b8736f0e..898a877f 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -986,12 +986,16 @@ async def _chat_loop(
     # Build runtime context so the LLM knows what it's running on
     runtime_context = build_runtime_context(settings)
 
+    output_path = f"{settings.output_dir.rstrip('/')}/"
     session = ChatSession(ChatSessionConfig(
         llm_client=state["llm_client"],
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
-        system_prompt_context=SystemPromptContext(runtime_context=runtime_context),
+        system_prompt_context=SystemPromptContext(
+            runtime_context=runtime_context,
+            resource_storage_context=f"Save output to `{output_path}` (create it if needed).",
+        ),
         workspace=workspace,
         console=console,
         history_store=history_store,
diff --git a/anton/chat_session.py b/anton/chat_session.py
index 4ee6e387..11964a24 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -86,16 +86,19 @@ def rebuild_session(
         if settings.coding_provider == "anthropic"
         else settings.openai_api_key
     ) or ""
+    output_path = f"{settings.output_dir.rstrip('/')}/"
     return ChatSession(ChatSessionConfig(
         llm_client=state["llm_client"],
         self_awareness=self_awareness,
         cortex=cortex,
         episodic=episodic,
-        system_prompt_context=SystemPromptContext(runtime_context=runtime_context),
+        system_prompt_context=SystemPromptContext(
+            runtime_context=runtime_context,
+            resource_storage_context=f"Save output to `{output_path}` (create it if needed).",
+        ),
         workspace=workspace,
         console=console,
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
-        output_dir=settings.output_dir,
     ))

From 593d060f4eae8488c3181bf180322a259913d4df Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 16:14:36 -0700
Subject: [PATCH 121/134] fixed tests

---
 tests/test_prompt_builder_skills.py | 5 ++---
 tests/test_session_skills_init.py   | 8 +++-----
 tests/test_skills_e2e.py            | 5 ++---
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/tests/test_prompt_builder_skills.py b/tests/test_prompt_builder_skills.py
index 87631eb6..6796e16e 100644
--- a/tests/test_prompt_builder_skills.py
+++ b/tests/test_prompt_builder_skills.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.skills import Skill, SkillStore
 
 
@@ -40,9 +40,8 @@ def populated_store(tmp_path: Path) -> SkillStore:
 def _build_prompt(builder: ChatSystemPromptBuilder, **overrides) -> str:
     defaults = dict(
         current_datetime="2026-04-10T12:00:00+00:00",
-        runtime_context="test runtime",
+        system_prompt_context=SystemPromptContext(runtime_context="test runtime"),
         proactive_dashboards=False,
-        output_dir="/tmp/anton_out",
     )
     defaults.update(overrides)
     return builder.build(**defaults)
diff --git a/tests/test_session_skills_init.py b/tests/test_session_skills_init.py
index f60a579a..76c584af 100644
--- a/tests/test_session_skills_init.py
+++ b/tests/test_session_skills_init.py
@@ -16,7 +16,7 @@
 
 import pytest
 
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.skills import Skill, SkillStore
 from anton.core.tools.recall_skill import RECALL_SKILL_TOOL, handle_recall_skill
 from anton.core.tools.registry import ToolRegistry
@@ -67,9 +67,8 @@ def test_section_appears_when_store_passed(
         builder = ChatSystemPromptBuilder()
         prompt = builder.build(
             current_datetime="2026-04-10",
-            runtime_context="test",
+            system_prompt_context=SystemPromptContext(runtime_context="test"),
             proactive_dashboards=False,
-            output_dir="/tmp/x",
             skill_store=store_with_one_skill,
         )
         assert "## Procedural memory" in prompt
@@ -79,9 +78,8 @@ def test_section_omitted_when_no_store(self):
         builder = ChatSystemPromptBuilder()
         prompt = builder.build(
             current_datetime="2026-04-10",
-            runtime_context="test",
+            system_prompt_context=SystemPromptContext(runtime_context="test"),
             proactive_dashboards=False,
-            output_dir="/tmp/x",
             skill_store=None,
         )
         assert "Procedural memory" not in prompt
diff --git a/tests/test_skills_e2e.py b/tests/test_skills_e2e.py
index 78f40615..3c673ac4 100644
--- a/tests/test_skills_e2e.py
+++ b/tests/test_skills_e2e.py
@@ -24,7 +24,7 @@
 from rich.console import Console
 
 from anton.commands.skills import _SkillDraft, handle_skill_save
-from anton.core.llm.prompt_builder import ChatSystemPromptBuilder
+from anton.core.llm.prompt_builder import ChatSystemPromptBuilder, SystemPromptContext
 from anton.core.memory.skills import SkillStore
 from anton.core.tools.recall_skill import RECALL_SKILL_TOOL
 from anton.core.tools.registry import ToolRegistry
@@ -123,9 +123,8 @@ async def test_full_skills_loop(console, store_root):
     builder = ChatSystemPromptBuilder()
     prompt = builder.build(
         current_datetime="2026-04-10T13:00:00+00:00",
-        runtime_context="test",
+        system_prompt_context=SystemPromptContext(runtime_context="test"),
         proactive_dashboards=False,
-        output_dir="/tmp/x",
         skill_store=fresh_store,
     )
     assert "## Procedural memory" in prompt

From 305155186ac7679e076f18092a12d97a7b47505c Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 16:15:53 -0700
Subject: [PATCH 122/134] removed unusued code

---
 anton/chat_session.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/anton/chat_session.py b/anton/chat_session.py
index 11964a24..50107655 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -81,11 +81,6 @@ def rebuild_session(
     refresh_knowledge(settings, cortex)
 
     runtime_context = build_runtime_context(settings)
-    api_key = (
-        settings.anthropic_api_key
-        if settings.coding_provider == "anthropic"
-        else settings.openai_api_key
-    ) or ""
     output_path = f"{settings.output_dir.rstrip('/')}/"
     return ChatSession(ChatSessionConfig(
         llm_client=state["llm_client"],

From a10d89908533479b34f22d661dee59fc97a9800f Mon Sep 17 00:00:00 2001
From: Minura Punchihewa <minurapunchihewa17@gmail.com>
Date: Tue, 14 Apr 2026 16:28:28 -0700
Subject: [PATCH 123/134] renamed param to output_context

---
 anton/chat.py                    |  2 +-
 anton/chat_session.py            |  2 +-
 anton/core/llm/prompt_builder.py | 10 +++++-----
 anton/core/llm/prompts.py        |  4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index 898a877f..8814db62 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -994,7 +994,7 @@ async def _chat_loop(
         episodic=episodic,
         system_prompt_context=SystemPromptContext(
             runtime_context=runtime_context,
-            resource_storage_context=f"Save output to `{output_path}` (create it if needed).",
+            output_context=f"Save output to `{output_path}` (create it if needed).",
         ),
         workspace=workspace,
         console=console,
diff --git a/anton/chat_session.py b/anton/chat_session.py
index 50107655..846ec0da 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -89,7 +89,7 @@ def rebuild_session(
         episodic=episodic,
         system_prompt_context=SystemPromptContext(
             runtime_context=runtime_context,
-            resource_storage_context=f"Save output to `{output_path}` (create it if needed).",
+            output_context=f"Save output to `{output_path}` (create it if needed).",
         ),
         workspace=workspace,
         console=console,
diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index e5453733..d7340fe6 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -22,7 +22,7 @@ class SystemPromptContext:
     Four levels with increasing importance (later = stronger influence):
       1. ``prefix``  — prepended before the base prompt
       2. ``runtime_context`` — interpolated into the RUNTIME IDENTITY section
-      3. ``resource_storage_context`` — free-text instructions on where to
+      3. ``output_context`` — free-text instructions on where to
          store generated resources (visualizations, HTML files, data exports)
       4. ``suffix``  — appended after all other sections
     """
@@ -30,7 +30,7 @@ class SystemPromptContext:
     runtime_context: str = ""
     prefix: str = ""
     suffix: str = ""
-    resource_storage_context: str = ""
+    output_context: str = ""
 
 
 class ChatSystemPromptBuilder:
@@ -110,7 +110,7 @@ def _build_visualizations_section(
         self,
         *,
         proactive_dashboards: bool,
-        resource_storage_context: str,
+        output_context: str,
     ) -> str:
         visualizations_output_format_prompt = (
             VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT
@@ -118,7 +118,7 @@ def _build_visualizations_section(
             else VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT
         )
         output_format = visualizations_output_format_prompt.format(
-            resource_storage_context=resource_storage_context,
+            output_context=output_context,
         )
         return BASE_VISUALIZATIONS_PROMPT.format(output_format=output_format)
 
@@ -137,7 +137,7 @@ def build(
     ) -> str:
         visualizations_section = self._build_visualizations_section(
             proactive_dashboards=proactive_dashboards,
-            resource_storage_context=system_prompt_context.resource_storage_context,
+            output_context=system_prompt_context.output_context,
         )
 
         prompt = ""
diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
index df2c62d0..3596eb85 100644
--- a/anton/core/llm/prompts.py
+++ b/anton/core/llm/prompts.py
@@ -304,7 +304,7 @@
 Output format:
 - Unless the user explicitly asks for a different format, always output visualizations \
 as polished, single-file HTML pages — never raw PNGs or bare image files.
-{resource_storage_context}
+{output_context}
 
 Visual design:
 - Make it look good by default. Use a dark theme (#0d1117 background, #e6edf3 text), \
@@ -362,7 +362,7 @@
 - For large datasets, summarize the top N and offer to show more.
 - When the user EXPLICITLY asks for a chart, dashboard, plot, or HTML visualization, \
 THEN build it as a self-contained HTML file with inlined CSS, JS, and data. \
-{resource_storage_context}
+{output_context}
 Use Apache ECharts (CDN), dark theme (#0d1117), and follow standard dashboard best practices. \
 If the dataset is very large (>100KB), write it to a separate .js file in the same directory. \
 Never split CSS or chart logic into separate files — only large data payloads.\

From 71a1a5474e56a01ab9da81ceb9727fdbb127c3ca Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 01:01:53 -0500
Subject: [PATCH 124/134] send fingerprint on analytics

---
 anton/analytics.py | 73 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/anton/analytics.py b/anton/analytics.py
index 182eb59e..f87fe556 100644
--- a/anton/analytics.py
+++ b/anton/analytics.py
@@ -1,21 +1,45 @@
 """Fire-and-forget anonymous analytics events.
 
 Every call spawns a daemon thread that issues a single GET request to the
-configured analytics URL.  The request carries only the action name and a
-timestamp — no PII, no payload beyond what the query string contains.
+configured analytics URL.  The request carries only the action name, a
+timestamp, and an anonymous machine fingerprint — no PII, no payload
+beyond what the query string contains.
 
 Guarantees:
   • Never blocks the caller.
   • Never raises — all exceptions are silently swallowed.
   • Daemon threads die automatically when the process exits.
+
+Machine fingerprint
+===================
+
+Each event includes an ``aid`` (Anton Installation ID) — a deterministic
+SHA-256 hash of the machine's MAC address (``uuid.getnode()``).  This is:
+
+  • **Anonymous**: the hash is one-way; the raw MAC never leaves the
+    device.  No hostname, no platform, no PII.
+  • **Stateless**: no file on normal machines — computed from the MAC.
+  • **Stable**: changes only if the primary network adapter changes.
+
+Fallback for Docker / containers: if Python can't find a real MAC
+(detected via the multicast bit), a random UUID is persisted to
+``~/.anton/.installation_id`` so it stays stable across restarts.
+File I/O only happens in this edge case — normal desktops never
+touch disk.
+
+The ``aid`` is truncated to 16 hex characters (~64 bits of entropy) —
+enough to be collision-free across millions of installations, short
+enough to be a readable query parameter.
 """
 
 from __future__ import annotations
 
+import hashlib
 import threading
 import time
 import urllib.parse
 import urllib.request
+import uuid
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -23,8 +47,50 @@
 
 _TIMEOUT = 3  # seconds
 
+# Cached after first computation — the fingerprint never changes within
+# a process, so computing it once is sufficient.
+_cached_aid: str | None = None
+
+
+def get_installation_id() -> str:
+    """Return a deterministic, anonymous machine fingerprint.
+
+    The fingerprint is a truncated SHA-256 of the MAC address on normal
+    machines. If no real MAC is available (Docker containers with stripped
+    networking), a random UUID is persisted to ``~/.anton/.installation_id``
+    as a one-time fallback. Computed once per process and cached.
+
+    Returns:
+        A 16-character hex string (64 bits of entropy).
+    """
+    global _cached_aid
+    if _cached_aid is not None:
+        return _cached_aid
+
+    try:
+        node = uuid.getnode()
+        is_random_fallback = bool(node & (1 << 40))  # multicast bit = Python faked it
+
+        if is_random_fallback:
+            # No real MAC (e.g. Docker with stripped networking).
+            # Persist a UUID to disk so it's stable across restarts.
+            from pathlib import Path
+
+            path = Path("~/.anton/.installation_id").expanduser()
+            if path.is_file():
+                _cached_aid = path.read_text(encoding="utf-8").strip()[:16]
+            else:
+                _cached_aid = uuid.uuid4().hex[:16]
+                path.parent.mkdir(parents=True, exist_ok=True)
+                path.write_text(_cached_aid + "\n", encoding="utf-8")
+        else:
+            _cached_aid = hashlib.sha256(str(node).encode()).hexdigest()[:16]
+    except Exception:
+        _cached_aid = "unknown"
+    return _cached_aid
+
 
-def send_event(settings: AntonSettings, action: str, **extra: str) -> None:
+def send_event(settings: "AntonSettings", action: str, **extra: str) -> None:
     """Send an analytics event in a background thread.
 
     Args:
@@ -41,6 +107,7 @@ def send_event(settings: AntonSettings, action: str, **extra: str) -> None:
 
         params: dict[str, str] = {
             "action": action,
+            "aid": get_installation_id(),
             "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
             "_": str(int(time.time() * 1000)),
         }

From a7371883a91384f305cb5bca7fbf03213d885a89 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 01:42:07 -0500
Subject: [PATCH 125/134] revert to 4nton.ai for now

---
 anton/config/settings.py | 4 ++--
 anton/publisher.py       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/anton/config/settings.py b/anton/config/settings.py
index 5f49e1ee..57eb2b68 100644
--- a/anton/config/settings.py
+++ b/anton/config/settings.py
@@ -70,8 +70,8 @@ class AntonSettings(CoreSettings):
     minds_datasource_engine: str | None = None
     minds_ssl_verify: bool = True
 
-    # Publish service (anton-services API Gateway)
-    publish_url: str = "https://anton.mindsdb.com"
+    # Publish service
+    publish_url: str = "https://4nton.ai"
 
     @field_validator("minds_ssl_verify", mode="before")
     @classmethod
diff --git a/anton/publisher.py b/anton/publisher.py
index bcd0b2aa..b6bf9738 100644
--- a/anton/publisher.py
+++ b/anton/publisher.py
@@ -24,7 +24,7 @@
 _TEXT_EXTENSIONS = {".html", ".htm", ".js", ".css"}
 
 
-DEFAULT_PUBLISH_URL = "https://anton.mindsdb.com"
+DEFAULT_PUBLISH_URL = "https://4nton.ai"
 
 # Patterns that capture relative paths from HTML attributes and CSS url()
 _REF_PATTERNS = [

From f1887f017e42c4faa98a0849b6a7bec5e4468e82 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 02:42:03 -0500
Subject: [PATCH 126/134] Clarify description of Anton AI agent

Updated the description of Anton for clarity.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 10d507ce..f6e86404 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
     ▐   ▐
 ```
 # Meet Anton - an autonomous agent that gets real work done
-Anton is your personal AI agent that works so you don't have to. Tell it what you need in plain language and it takes it from there - sending emails, calling APIs, connecting to data sources, building dashboards, and delivering results. No setup, no plugins, no fuss.
+Anton is a personal AI agent that helps you get actual-work done. Tell it what you need in plain language and it takes it from there - sending emails, calling APIs, connecting to data sources, building dashboards, and delivering results. No setup, no plugins, no fuss.
 
 It doesn't just answer questions. It *does things*: cleans your inbox, builds integrations, analyzes your data, automates workflows - whatever the task requires.
 

From 75a0b5aebefd29155313649de97ee8292daf2a85 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 02:43:05 -0500
Subject: [PATCH 127/134] Update download link for Anton Desktop App on macOS

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f6e86404..d9a5b6e2 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,11 @@ It doesn't just answer questions. It *does things*: cleans your inbox, builds in
 ## Quick start
 **macOS - Desktop App:**
 
-<a href="https://mindsdb-anton.s3.us-east-2.amazonaws.com/anton-latest-universal-signed.pkg">
+<a href="https://mindsdb-anton.s3.us-east-2.amazonaws.com/mac/anton-latest.pkg">
 <img width="64" alt="DesktopApp" src="https://github.com/user-attachments/assets/ed7c1e3a-3700-45cc-a9a8-efb57b43dcfd" />
 </a>
 
- Click [here to download](https://mindsdb-anton.s3.us-east-2.amazonaws.com/anton-latest-universal-signed.pkg) the Anton Desktop App for MacOS.
+ Click [here to download](https://mindsdb-anton.s3.us-east-2.amazonaws.com/mac/anton-latest.pkg) the Anton Desktop App for MacOS.
 
 
 **macOS / Linux - CLI:**

From d45087e1b450edac5c3b407e9812171ece9872d0 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 02:46:29 -0500
Subject: [PATCH 128/134] publish update workflow with versions

---
 anton/chat.py      | 57 +++++++++++++++++++++++++++++++++++++++++++---
 anton/publisher.py | 12 ++++++++--
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index b8736f0e..663066ba 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -335,6 +335,7 @@ async def _handle_publish(
     file_arg: str = "",
 ) -> None:
     """Handle /publish command — publish an HTML report to the web."""
+    import json
     import webbrowser
     from pathlib import Path
 
@@ -436,15 +437,44 @@ async def _handle_publish(
         console.print()
         return
 
-    # 3. Publish
+    # 3. Check if this file was previously published
+    published_json = output_dir / ".published.json"
+    published_map = {}
+    try:
+        if published_json.is_file():
+            published_map = json.loads(published_json.read_text())
+    except Exception:
+        pass
+
+    report_id = None
+    file_key = target.name
+    prev = published_map.get(file_key)
+
+    if prev and prev.get("report_id"):
+        console.print(f"  [anton.muted]Previously published: {prev.get('url', '')}[/]")
+        update_choice = await prompt_or_cancel(
+            "  Update existing report, or publish as new?",
+            choices=["update", "new", "u", "n"],
+            choices_display="update/new",
+            default="update",
+        )
+        if update_choice is None:
+            console.print()
+            return
+        if update_choice in ("update", "u"):
+            report_id = prev["report_id"]
+
+    # 4. Publish
     from rich.live import Live
     from rich.spinner import Spinner
 
-    with Live(Spinner("dots", text="  Publishing...", style="anton.cyan"), console=console, transient=True):
+    action_text = "  Updating..." if report_id else "  Publishing..."
+    with Live(Spinner("dots", text=action_text, style="anton.cyan"), console=console, transient=True):
         try:
             result = publish(
                 target,
                 api_key=settings.minds_api_key,
+                report_id=report_id,
                 publish_url=settings.publish_url,
                 ssl_verify=settings.minds_ssl_verify,
             )
@@ -454,10 +484,31 @@ async def _handle_publish(
             return
 
     view_url = result.get("view_url", "")
-    console.print(f"  [anton.success]Published![/]")
+    returned_report_id = result.get("report_id", "")
+    version = result.get("version", 1)
+    unchanged = result.get("unchanged", False)
+
+    if unchanged:
+        console.print(f"  [anton.muted]Already up to date (v{version})[/]")
+    elif report_id:
+        console.print(f"  [anton.success]Updated! (v{version})[/]")
+    else:
+        console.print(f"  [anton.success]Published![/]")
     console.print(f"  [link={view_url}]{view_url}[/link]")
     console.print()
 
+    # 5. Save mapping
+    if returned_report_id:
+        published_map[file_key] = {
+            "report_id": returned_report_id,
+            "url": view_url,
+            "last_md5": result.get("md5", ""),
+        }
+        try:
+            published_json.write_text(json.dumps(published_map, indent=2))
+        except Exception:
+            pass
+
     if view_url:
         webbrowser.open(view_url)
 
diff --git a/anton/publisher.py b/anton/publisher.py
index b6bf9738..a978a637 100644
--- a/anton/publisher.py
+++ b/anton/publisher.py
@@ -99,18 +99,26 @@ def publish(
     file_path: Path,
     *,
     api_key: str,
+    report_id: str | None = None,
     publish_url: str = DEFAULT_PUBLISH_URL,
     ssl_verify: bool = True,
 ) -> dict:
     """Zip and upload an HTML file/directory. Returns the upload response dict.
 
-    Response keys: user_prefix, md5, view_url, files
+    Args:
+        report_id: If provided, updates an existing report (new version).
+                   If None, creates a new report.
+
+    Response keys: user_prefix, report_id, md5, view_url, version, files
     """
     if not file_path.exists():
         raise FileNotFoundError(f"Path not found: {file_path}")
 
     zipped = _zip_html(file_path)
-    payload = json.dumps({"file_payload": base64.b64encode(zipped).decode()}).encode()
+    payload_dict: dict = {"file_payload": base64.b64encode(zipped).decode()}
+    if report_id:
+        payload_dict["report_id"] = report_id
+    payload = json.dumps(payload_dict).encode()
 
     url = f"{publish_url.rstrip('/')}/upload"
     raw = minds_request(url, api_key, method="POST", payload=payload, verify=ssl_verify)

From 4ccbd4e178335d8a792b46e685e4de63bbe647c4 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 03:24:28 -0500
Subject: [PATCH 129/134] better progress messaging

---
 anton/chat_ui.py | 73 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 22 deletions(-)

diff --git a/anton/chat_ui.py b/anton/chat_ui.py
index 11aad507..ee891a01 100644
--- a/anton/chat_ui.py
+++ b/anton/chat_ui.py
@@ -29,6 +29,9 @@ class _ToolActivity:
     printed: bool = False  # whether the activity line has been printed
     done: bool = False  # whether execution is complete
     start_time: float = 0.0  # monotonic timestamp when execution began
+    work_elapsed: float = 0.0  # actual execution seconds (filled on done)
+    reasoning_elapsed: float = 0.0  # LLM thinking seconds after this step
+    done_line_printed: bool = False  # whether the combined ✔ line was printed
 
 
 # Witty one-liners for non-scratchpad tool display. One is picked at
@@ -377,17 +380,16 @@ def update_progress(
             return
 
         if phase == "scratchpad_done":
-            # Mark the scratchpad line as complete with actual elapsed time
+            # Stash work elapsed — the ✔ line is deferred until
+            # reasoning_done arrives so we can print one combined line.
             for act in reversed(self._activities):
                 if act.name == "scratchpad" and act.printed and not act.done:
-                    elapsed = eta if eta else 0  # eta_seconds carries elapsed time
                     act.done = True
-                    self._stop_spinner()
-                    self._print_done_line(act, elapsed)
+                    act.work_elapsed = eta if eta else 0
                     self._line1_fun = random.choice(THINKING_MESSAGES)  # noqa: S311
                     self._line2_status = random.choice(WORKING_FOOTER_MESSAGES)  # noqa: S311
                     self._line3_peek = ""
-                    self._start_spinner()
+                    self._update_spinner()
                     break
             return
 
@@ -414,14 +416,12 @@ def update_progress(
             return
 
         if phase == "tool_done":
-            # Non-scratchpad tool finished — print ✔ + actual elapsed
+            # Stash work elapsed — combined line printed on reasoning_done.
             elapsed = eta if eta else 0
             for act in reversed(self._activities):
                 if act.name == message and act.printed and not act.done:
                     act.done = True
-                    self._stop_spinner()
-                    self._print_done_line(act, elapsed)
-                    self._start_spinner()
+                    act.work_elapsed = elapsed
                     break
             return
 
@@ -436,9 +436,16 @@ def update_progress(
             return
 
         if phase == "reasoning_done":
-            elapsed = eta if eta else 0
+            reasoning_elapsed = eta if eta else 0
+            # Find the last done-but-not-yet-printed activity and print
+            # the combined ✔ line: worked + reasoned on one line.
             self._stop_spinner()
-            self._print_reasoning_line(elapsed)
+            for act in reversed(self._activities):
+                if act.done and not act.done_line_printed:
+                    act.reasoning_elapsed = reasoning_elapsed
+                    act.done_line_printed = True
+                    self._print_done_line(act, act.work_elapsed, reasoning_elapsed)
+                    break
             self._start_spinner()
             return
 
@@ -452,6 +459,14 @@ def finish(self) -> None:
         """Stop spinner and print the final answer."""
         self._stop_spinner()
 
+        # Flush any activity whose ✔ line was deferred but never got a
+        # reasoning_done (happens for the last tool in a turn — the LLM
+        # goes straight to text, so reasoning_done never fires).
+        for act in self._activities:
+            if act.done and not act.done_line_printed:
+                act.done_line_printed = True
+                self._print_done_line(act, act.work_elapsed)
+
         # Print initial text as muted "inner speech" (if not already printed)
         if self._initial_text and not self._initial_printed:
             if self._activities:
@@ -522,22 +537,36 @@ def _print_activity_line(self, act: _ToolActivity) -> None:
         line.append(label, style="bold")
         self._console.print(line)
 
-    def _print_done_line(self, act: _ToolActivity, elapsed: float) -> None:
-        """Print a completion marker for a finished activity."""
+    def _print_done_line(
+        self,
+        act: _ToolActivity,
+        work_elapsed: float,
+        reasoning_elapsed: float = 0.0,
+    ) -> None:
+        """Print a single combined completion line for a finished activity.
+
+        Format: ``  ✔ (Worked: 1.9s, Reasoned: 7.1s)``
+        If reasoning_elapsed is 0 (e.g. last tool in the turn with no
+        follow-up reasoning), only the work time is shown.
+        """
         line = Text()
         line.append("  \u2714 ", style="green")
-        elapsed_str = f"{elapsed:.1f}s" if elapsed >= 1 else f"{int(elapsed * 1000)}ms"
-        line.append(elapsed_str, style="anton.muted")
-        self._console.print(line)
+        work_str = self._fmt_elapsed(work_elapsed)
+
+        if reasoning_elapsed > 0:
+            reason_str = self._fmt_elapsed(reasoning_elapsed)
+            line.append(f"(Worked: {work_str}, Reasoned: {reason_str})", style="anton.muted")
+        else:
+            line.append(work_str, style="anton.muted")
 
-    def _print_reasoning_line(self, elapsed: float) -> None:
-        """Print the LLM's reasoning time between tool rounds."""
-        line = Text()
-        elapsed_str = f"{elapsed:.1f}s" if elapsed >= 1 else f"{int(elapsed * 1000)}ms"
-        line.append("  Reasoning: ", style="anton.muted")
-        line.append(elapsed_str, style="anton.muted")
         self._console.print(line)
 
+    @staticmethod
+    def _fmt_elapsed(seconds: float) -> str:
+        if seconds >= 1:
+            return f"{seconds:.1f}s"
+        return f"{int(seconds * 1000)}ms"
+
 
 class EscapeWatcher:
     """Detect Escape keypress during streaming via cbreak terminal mode."""

From 71f39cc6f5c6f29912d42b8bee9b0d2ee492cbcd Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 03:33:44 -0500
Subject: [PATCH 130/134] keep track of what has been published, so it can do
 updates instead of a fresh new update everytime

---
 anton/tools.py | 69 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/anton/tools.py b/anton/tools.py
index f3c97692..197af251 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -207,31 +207,88 @@ async def handle_publish_or_preview(session: ChatSession, tc_input: dict) -> str
             "API key setup flow."
         )
 
+    import json as _json
+
     from rich.live import Live
     from rich.spinner import Spinner
 
-    with Live(Spinner("dots", text="  Publishing...", style="anton.cyan"), console=console, transient=True):
+    # Check if this file was previously published — reuse report_id to
+    # update instead of creating a new report every time.
+    output_dir = file_path.parent
+    published_json = output_dir / ".published.json"
+    published_map: dict = {}
+    try:
+        if published_json.is_file():
+            published_map = _json.loads(published_json.read_text())
+    except Exception:
+        pass
+
+    file_key = file_path.name
+    prev = published_map.get(file_key)
+    report_id = prev.get("report_id") if isinstance(prev, dict) else None
+
+    action_text = "  Updating..." if report_id else "  Publishing..."
+    with Live(Spinner("dots", text=action_text, style="anton.cyan"), console=console, transient=True):
         try:
             result = publish(
                 file_path,
                 api_key=settings.minds_api_key,
+                report_id=report_id,
                 publish_url=settings.publish_url,
                 ssl_verify=settings.minds_ssl_verify,
             )
         except Exception as e:
-            console.print(f"  [anton.error]Publish failed: {e}[/]")
-            console.print()
-            return f"PUBLISH FAILED: {e}"
+            if report_id:
+                # The report may have been deleted server-side — retry
+                # without report_id to create a fresh one.
+                try:
+                    result = publish(
+                        file_path,
+                        api_key=settings.minds_api_key,
+                        publish_url=settings.publish_url,
+                        ssl_verify=settings.minds_ssl_verify,
+                    )
+                except Exception as e2:
+                    console.print(f"  [anton.error]Publish failed: {e2}[/]")
+                    console.print()
+                    return f"PUBLISH FAILED: {e2}"
+            else:
+                console.print(f"  [anton.error]Publish failed: {e}[/]")
+                console.print()
+                return f"PUBLISH FAILED: {e}"
 
     view_url = result.get("view_url", "")
-    console.print(f"  [anton.success]Published![/]")
+    returned_report_id = result.get("report_id", "")
+    version = result.get("version", 1)
+    unchanged = result.get("unchanged", False)
+
+    if unchanged:
+        console.print(f"  [anton.muted]Already up to date (v{version})[/]")
+    elif report_id:
+        console.print(f"  [anton.success]Updated! (v{version})[/]")
+    else:
+        console.print(f"  [anton.success]Published![/]")
     console.print(f"  [link={view_url}]{view_url}[/link]")
     console.print()
 
+    # Persist the mapping so future publishes of the same file update
+    # instead of creating a new report.
+    if returned_report_id:
+        published_map[file_key] = {
+            "report_id": returned_report_id,
+            "url": view_url,
+            "last_md5": result.get("md5", ""),
+        }
+        try:
+            published_json.write_text(_json.dumps(published_map, indent=2))
+        except Exception:
+            pass
+
     if view_url:
         webbrowser.open(view_url)
 
-    return f"Published successfully!\nView URL: {view_url}"
+    status = "Updated" if report_id else "Published"
+    return f"{status} successfully!\nView URL: {view_url}"
 
 
 PUBLISH_TOOL = ToolDef(

From 2b456a0734689469aee12bb8bee19874250626e8 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 03:48:52 -0500
Subject: [PATCH 131/134] drop legaccy reports

---
 anton/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anton/chat.py b/anton/chat.py
index 663066ba..03e9360f 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -610,7 +610,7 @@ async def _handle_unpublish(
     with Live(Spinner("dots", text="  Removing...", style="anton.cyan"), console=console, transient=True):
         try:
             unpublish(
-                selected["md5"],
+                selected.get("report_id") or selected["md5"],
                 api_key=settings.minds_api_key,
                 publish_url=settings.publish_url,
                 ssl_verify=settings.minds_ssl_verify,

From 197831c17de9baed84e77db21adb98f2ffa3517d Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Wed, 15 Apr 2026 16:49:19 +0200
Subject: [PATCH 132/134] Fix scratchpad tests

---
 tests/test_scratchpad.py | 162 ++++++++++++++++++++++-----------------
 1 file changed, 92 insertions(+), 70 deletions(-)

diff --git a/tests/test_scratchpad.py b/tests/test_scratchpad.py
index 0b443bf5..833be485 100644
--- a/tests/test_scratchpad.py
+++ b/tests/test_scratchpad.py
@@ -6,17 +6,39 @@
 import pytest
 
 from anton.core.backends.base import Cell
-from anton.core.backends.local import LocalScratchpadRuntime, _compute_timeouts
+from anton.core.backends.local import LocalScratchpadRuntime
+from anton.core.backends.utils import compute_timeouts as _compute_timeouts
 from anton.core.backends.manager import ScratchpadManager
+from anton.core.backends.local import local_scratchpad_runtime_factory
 
 # Alias for brevity in tests
 Scratchpad = LocalScratchpadRuntime
 
+_SCRATCHPAD_DEFAULTS = dict(
+    coding_provider="anthropic",
+    coding_model="claude-sonnet-4-6",
+    coding_api_key="test",
+    coding_base_url="",
+)
+
+_MANAGER_DEFAULTS = dict(
+    runtime_factory=local_scratchpad_runtime_factory,
+    **_SCRATCHPAD_DEFAULTS,
+)
+
+
+def make_scratchpad(name: str, **kwargs) -> LocalScratchpadRuntime:
+    return Scratchpad(name=name, **{**_SCRATCHPAD_DEFAULTS, **kwargs})
+
+
+def make_manager(**kwargs) -> ScratchpadManager:
+    return ScratchpadManager(**{**_MANAGER_DEFAULTS, **kwargs})
+
 
 class TestScratchpadBasicExecution:
     async def test_basic_execution(self):
         """print(42) should return '42' in stdout."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             cell = await pad.execute("print(42)")
@@ -27,7 +49,7 @@ async def test_basic_execution(self):
 
     async def test_state_persists(self):
         """Variable from cell 1 should be available in cell 2."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             await pad.execute("x = 123")
@@ -39,7 +61,7 @@ async def test_state_persists(self):
 
     async def test_error_captured_process_survives(self):
         """Exception doesn't kill process; next cell works."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             cell1 = await pad.execute("raise ValueError('boom')")
@@ -56,7 +78,7 @@ async def test_error_captured_process_survives(self):
 
     async def test_imports_persist(self):
         """import json in cell 1, json.dumps(...) in cell 2."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             await pad.execute("import json")
@@ -70,7 +92,7 @@ async def test_imports_persist(self):
 class TestScratchpadView:
     async def test_view_history(self):
         """view() should show all cells with outputs."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             await pad.execute("x = 10")
@@ -85,7 +107,7 @@ async def test_view_history(self):
 
     async def test_view_empty(self):
         """view() on empty pad returns a message."""
-        pad = Scratchpad(name="empty")
+        pad = make_scratchpad(name="empty")
         await pad.start()
         try:
             output = pad.view()
@@ -97,7 +119,7 @@ async def test_view_empty(self):
 class TestScratchpadReset:
     async def test_reset_clears_state(self):
         """Variables should be gone after reset."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             await pad.execute("x = 42")
@@ -116,7 +138,7 @@ async def test_timeout_kills_process(self, monkeypatch):
         """Long-running code triggers timeout."""
         monkeypatch.setenv("ANTON_CELL_TIMEOUT_DEFAULT", "1")
         monkeypatch.setenv("ANTON_CELL_INACTIVITY_TIMEOUT", "1")
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             cell = await pad.execute("import time; time.sleep(60)")
@@ -127,7 +149,7 @@ async def test_timeout_kills_process(self, monkeypatch):
 
     async def test_output_truncation(self):
         """stdout exceeding _MAX_OUTPUT is capped in the boot script."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             cell = await pad.execute("print('x' * 20000)")
@@ -139,7 +161,7 @@ async def test_output_truncation(self):
 
     async def test_dead_process_detected(self):
         """If process is dead, execute reports it."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         # Kill the process manually
         pad._proc.kill()
@@ -151,7 +173,7 @@ async def test_dead_process_detected(self):
 
     async def test_stderr_captured(self):
         """stderr output is captured separately."""
-        pad = Scratchpad(name="test")
+        pad = make_scratchpad(name="test")
         await pad.start()
         try:
             cell = await pad.execute("import sys; sys.stderr.write('warn\\n')")
@@ -163,7 +185,7 @@ async def test_stderr_captured(self):
 class TestScratchpadManager:
     async def test_get_or_create(self):
         """Auto-creates a scratchpad on first access."""
-        mgr = ScratchpadManager()
+        mgr = make_manager()
         try:
             pad = await mgr.get_or_create("alpha")
             assert pad.name == "alpha"
@@ -177,7 +199,7 @@ async def test_get_or_create(self):
 
     async def test_remove(self):
         """remove() kills and deletes the scratchpad."""
-        mgr = ScratchpadManager()
+        mgr = make_manager()
         try:
             await mgr.get_or_create("beta")
             result = await mgr.remove("beta")
@@ -188,13 +210,13 @@ async def test_remove(self):
 
     async def test_remove_nonexistent(self):
         """remove() on unknown name returns a message."""
-        mgr = ScratchpadManager()
+        mgr = make_manager()
         result = await mgr.remove("nope")
         assert "nope" in result
 
     async def test_close_all(self):
         """close_all() cleans up everything."""
-        mgr = ScratchpadManager()
+        mgr = make_manager()
         await mgr.get_or_create("a")
         await mgr.get_or_create("b")
         assert len(mgr.list_pads()) == 2
@@ -207,7 +229,7 @@ async def test_close_all_does_not_restart_processes(self):
         cancel_all_running() would leave _proc pointing to a new (orphan-prone)
         process. close_all() must leave _proc as None.
         """
-        mgr = ScratchpadManager()
+        mgr = make_manager()
         pad = await mgr.get_or_create("test")
         try:
             await pad.execute("x = 1")
@@ -220,7 +242,7 @@ async def test_close_all_does_not_restart_processes(self):
 class TestScratchpadRenderNotebook:
     async def test_render_notebook_basic(self):
         """Produces markdown with code blocks and output."""
-        pad = Scratchpad(name="main")
+        pad = make_scratchpad(name="main")
         await pad.start()
         try:
             await pad.execute("x = 1")
@@ -237,7 +259,7 @@ async def test_render_notebook_basic(self):
 
     async def test_render_notebook_empty(self):
         """Empty pad returns a message."""
-        pad = Scratchpad(name="empty")
+        pad = make_scratchpad(name="empty")
         await pad.start()
         try:
             md = pad.render_notebook()
@@ -247,7 +269,7 @@ async def test_render_notebook_empty(self):
 
     async def test_render_notebook_skips_empty_cells(self):
         """Whitespace-only cells are filtered out."""
-        pad = Scratchpad(name="gaps")
+        pad = make_scratchpad(name="gaps")
         await pad.start()
         try:
             await pad.execute("print('a')")
@@ -263,7 +285,7 @@ async def test_render_notebook_skips_empty_cells(self):
 
     async def test_render_notebook_truncates_long_output(self):
         """Long stdout shows 'more lines' indicator."""
-        pad = Scratchpad(name="long")
+        pad = make_scratchpad(name="long")
         await pad.start()
         try:
             await pad.execute("for i in range(50): print(i)")
@@ -274,7 +296,7 @@ async def test_render_notebook_truncates_long_output(self):
 
     async def test_render_notebook_error_summary(self):
         """Only last traceback line shown, not full trace."""
-        pad = Scratchpad(name="err")
+        pad = make_scratchpad(name="err")
         await pad.start()
         try:
             await pad.execute("raise ValueError('boom')")
@@ -288,7 +310,7 @@ async def test_render_notebook_error_summary(self):
 
     async def test_render_notebook_hides_stderr_without_error(self):
         """Warnings (stderr only, no error) are filtered out of output sections."""
-        pad = Scratchpad(name="warn")
+        pad = make_scratchpad(name="warn")
         await pad.start()
         try:
             await pad.execute("import sys; sys.stderr.write('some warning\\n')")
@@ -319,7 +341,7 @@ async def test_truncate_output_chars(self):
 class TestCellMetadata:
     async def test_cell_stores_description_and_estimated_time(self):
         """execute() should store description and estimated_time on the Cell."""
-        pad = Scratchpad(name="meta")
+        pad = make_scratchpad(name="meta")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -335,7 +357,7 @@ async def test_cell_stores_description_and_estimated_time(self):
 
     async def test_cell_defaults_empty_metadata(self):
         """Without arguments, description and estimated_time default to empty."""
-        pad = Scratchpad(name="defaults")
+        pad = make_scratchpad(name="defaults")
         await pad.start()
         try:
             cell = await pad.execute("print(1)")
@@ -346,7 +368,7 @@ async def test_cell_defaults_empty_metadata(self):
 
     async def test_view_shows_description_in_header(self):
         """view() should include description in the cell header."""
-        pad = Scratchpad(name="view-desc")
+        pad = make_scratchpad(name="view-desc")
         await pad.start()
         try:
             await pad.execute("print(1)", description="Count to one")
@@ -357,7 +379,7 @@ async def test_view_shows_description_in_header(self):
 
     async def test_view_without_description(self):
         """view() without description falls back to plain header."""
-        pad = Scratchpad(name="view-plain")
+        pad = make_scratchpad(name="view-plain")
         await pad.start()
         try:
             await pad.execute("print(1)")
@@ -368,7 +390,7 @@ async def test_view_without_description(self):
 
     async def test_render_notebook_shows_description(self):
         """render_notebook() should include description in markdown header."""
-        pad = Scratchpad(name="nb-desc")
+        pad = make_scratchpad(name="nb-desc")
         await pad.start()
         try:
             await pad.execute("print(1)", description="Count to one")
@@ -379,7 +401,7 @@ async def test_render_notebook_shows_description(self):
 
     async def test_render_notebook_without_description(self):
         """render_notebook() without description uses plain header."""
-        pad = Scratchpad(name="nb-plain")
+        pad = make_scratchpad(name="nb-plain")
         await pad.start()
         try:
             await pad.execute("print(1)")
@@ -394,7 +416,7 @@ class TestScratchpadEnvironment:
     async def test_env_vars_accessible(self, monkeypatch):
         """Secrets from .anton/.env (in os.environ) are accessible in scratchpad."""
         monkeypatch.setenv("MY_TEST_SECRET", "s3cret_value")
-        pad = Scratchpad(name="env-test")
+        pad = make_scratchpad(name="env-test")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -406,7 +428,7 @@ async def test_env_vars_accessible(self, monkeypatch):
 
     async def test_get_llm_available_when_model_set(self):
         """get_llm() should be injected when ANTON_SCRATCHPAD_MODEL is set."""
-        pad = Scratchpad(name="llm-test", coding_model="claude-test-model")
+        pad = make_scratchpad(name="llm-test", coding_model="claude-test-model")
         await pad.start()
         try:
             cell = await pad.execute("llm = get_llm(); print(llm.model)")
@@ -417,7 +439,7 @@ async def test_get_llm_available_when_model_set(self):
 
     async def test_get_llm_not_available_without_model(self):
         """get_llm() should not be in namespace when no model is configured."""
-        pad = Scratchpad(name="no-llm")
+        pad = make_scratchpad(name="no-llm")
         await pad.start()
         try:
             cell = await pad.execute("get_llm()")
@@ -428,7 +450,7 @@ async def test_get_llm_not_available_without_model(self):
 
     async def test_agentic_loop_available_when_model_set(self):
         """agentic_loop() should be injected alongside get_llm()."""
-        pad = Scratchpad(name="agentic-test", coding_model="claude-test-model")
+        pad = make_scratchpad(name="agentic-test", coding_model="claude-test-model")
         await pad.start()
         try:
             cell = await pad.execute("print(callable(agentic_loop))")
@@ -439,7 +461,7 @@ async def test_agentic_loop_available_when_model_set(self):
 
     async def test_agentic_loop_not_available_without_model(self):
         """agentic_loop() should not be in namespace when no model is configured."""
-        pad = Scratchpad(name="no-agentic")
+        pad = make_scratchpad(name="no-agentic")
         await pad.start()
         try:
             cell = await pad.execute("agentic_loop()")
@@ -450,7 +472,7 @@ async def test_agentic_loop_not_available_without_model(self):
 
     async def test_generate_object_available_when_model_set(self):
         """generate_object() should be available on the LLM wrapper."""
-        pad = Scratchpad(name="genobj-test", coding_model="claude-test-model")
+        pad = make_scratchpad(name="genobj-test", coding_model="claude-test-model")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -466,7 +488,7 @@ async def test_api_key_bridged(self, monkeypatch):
         monkeypatch.setenv("ANTON_ANTHROPIC_API_KEY", "sk-ant-test-123")
         # Remove ANTHROPIC_API_KEY if set, to test the bridge
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
-        pad = Scratchpad(name="key-test", coding_model="test-model")
+        pad = make_scratchpad(name="key-test", coding_model="test-model")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -480,7 +502,7 @@ async def test_api_key_bridged(self, monkeypatch):
 class TestScratchpadVenv:
     async def test_venv_created_on_start(self):
         """Venv directory should be created when the scratchpad starts."""
-        pad = Scratchpad(name="venv-test")
+        pad = make_scratchpad(name="venv-test")
         await pad.start()
         try:
             assert pad._venv_dir is not None
@@ -492,7 +514,7 @@ async def test_venv_created_on_start(self):
 
     async def test_venv_persisted_on_close(self):
         """Venv directory should be preserved when the scratchpad is closed."""
-        pad = Scratchpad(name="venv-close")
+        pad = make_scratchpad(name="venv-close")
         await pad.start()
         venv_dir = pad._venv_dir
         assert os.path.isdir(venv_dir)
@@ -508,7 +530,7 @@ async def test_venv_persisted_on_close(self):
 
     async def test_venv_persists_across_reset(self):
         """Venv should survive a reset (only the process restarts)."""
-        pad = Scratchpad(name="venv-reset")
+        pad = make_scratchpad(name="venv-reset")
         await pad.start()
         venv_dir = pad._venv_dir
         try:
@@ -520,7 +542,7 @@ async def test_venv_persists_across_reset(self):
 
     async def test_subprocess_uses_venv_python(self):
         """The subprocess should run with the venv's Python executable."""
-        pad = Scratchpad(name="venv-exec")
+        pad = make_scratchpad(name="venv-exec")
         await pad.start()
         try:
             cell = await pad.execute("import sys; print(sys.executable)")
@@ -531,7 +553,7 @@ async def test_subprocess_uses_venv_python(self):
 
     async def test_system_packages_available(self):
         """System site-packages should be accessible (e.g. pydantic from parent env)."""
-        pad = Scratchpad(name="venv-syspkg")
+        pad = make_scratchpad(name="venv-syspkg")
         await pad.start()
         try:
             cell = await pad.execute("import pydantic; print(pydantic.__name__)")
@@ -548,7 +570,7 @@ async def test_venv_recycled_on_restart(self, tmp_path):
         """Close + reopen same name → packages remembered."""
         import shutil
         venvs_base = tmp_path / "venvs"
-        pad = Scratchpad(name="recycle", _venvs_base=venvs_base)
+        pad = make_scratchpad(name="recycle", _venvs_base=venvs_base)
         await pad.start()
         await pad.install_packages(["cowsay"])
         venv_dir = pad._venv_dir
@@ -562,7 +584,7 @@ async def test_venv_recycled_on_restart(self, tmp_path):
             assert "cowsay" in f.read()
 
         # Reopen — should recycle the existing venv
-        pad2 = Scratchpad(name="recycle", _venvs_base=venvs_base)
+        pad2 = make_scratchpad(name="recycle", _venvs_base=venvs_base)
         await pad2.start()
         try:
             assert "cowsay" in pad2._installed_packages
@@ -577,7 +599,7 @@ async def test_venv_nuked_on_version_mismatch(self, tmp_path, monkeypatch):
         """Wrong .python_version → recreates venv."""
         import shutil
         venvs_base = tmp_path / "venvs"
-        pad = Scratchpad(name="ver-mismatch", _venvs_base=venvs_base)
+        pad = make_scratchpad(name="ver-mismatch", _venvs_base=venvs_base)
         await pad.start()
         venv_dir = pad._venv_dir
         await pad.close()
@@ -588,7 +610,7 @@ async def test_venv_nuked_on_version_mismatch(self, tmp_path, monkeypatch):
             f.write("2.7\n")
 
         # Reopen — should detect mismatch, nuke, and recreate
-        pad2 = Scratchpad(name="ver-mismatch", _venvs_base=venvs_base)
+        pad2 = make_scratchpad(name="ver-mismatch", _venvs_base=venvs_base)
         await pad2.start()
         try:
             assert pad2._venv_dir is not None
@@ -605,7 +627,7 @@ async def test_venv_nuked_on_corruption(self, tmp_path):
         """Delete Python binary → recreates venv."""
         import shutil
         venvs_base = tmp_path / "venvs"
-        pad = Scratchpad(name="corrupt", _venvs_base=venvs_base)
+        pad = make_scratchpad(name="corrupt", _venvs_base=venvs_base)
         await pad.start()
         venv_dir = pad._venv_dir
         python_path = pad._venv_python
@@ -615,7 +637,7 @@ async def test_venv_nuked_on_corruption(self, tmp_path):
         os.remove(python_path)
 
         # Reopen — should detect corruption, nuke, and recreate
-        pad2 = Scratchpad(name="corrupt", _venvs_base=venvs_base)
+        pad2 = make_scratchpad(name="corrupt", _venvs_base=venvs_base)
         await pad2.start()
         try:
             assert pad2._venv_dir is not None
@@ -631,7 +653,7 @@ async def test_venv_nuked_on_corruption(self, tmp_path):
     async def test_remove_deletes_persistent_venv(self, tmp_path):
         """ScratchpadManager.remove() fully deletes the persistent venv dir."""
         import shutil
-        mgr = ScratchpadManager(workspace_path=tmp_path)
+        mgr = make_manager(workspace_path=tmp_path)
         try:
             pad = await mgr.get_or_create("deleteme")
             venv_dir = pad._venv_dir
@@ -646,7 +668,7 @@ async def test_requirements_saved_on_close(self, tmp_path):
         """requirements.txt is written when pad has installed packages."""
         import shutil
         venvs_base = tmp_path / "venvs"
-        pad = Scratchpad(name="req-save", _venvs_base=venvs_base)
+        pad = make_scratchpad(name="req-save", _venvs_base=venvs_base)
         await pad.start()
         await pad.install_packages(["cowsay"])
         await pad.close()
@@ -662,7 +684,7 @@ async def test_requirements_saved_on_close(self, tmp_path):
 class TestScratchpadInstall:
     async def test_install_packages_success(self):
         """install_packages should install a package into the venv."""
-        pad = Scratchpad(name="install-test")
+        pad = make_scratchpad(name="install-test")
         await pad.start()
         try:
             result = await pad.install_packages(["cowsay"])
@@ -676,7 +698,7 @@ async def test_install_packages_success(self):
 
     async def test_install_empty_list(self):
         """install_packages with empty list returns a message."""
-        pad = Scratchpad(name="install-empty")
+        pad = make_scratchpad(name="install-empty")
         await pad.start()
         try:
             result = await pad.install_packages([])
@@ -686,7 +708,7 @@ async def test_install_empty_list(self):
 
     async def test_install_invalid_package(self):
         """install_packages with a bogus name should report failure."""
-        pad = Scratchpad(name="install-bad")
+        pad = make_scratchpad(name="install-bad")
         await pad.start()
         try:
             result = await pad.install_packages(["this-package-does-not-exist-xyz123"])
@@ -696,7 +718,7 @@ async def test_install_invalid_package(self):
 
     async def test_install_survives_reset(self):
         """Packages installed before a reset should still be available after."""
-        pad = Scratchpad(name="install-reset")
+        pad = make_scratchpad(name="install-reset")
         await pad.start()
         try:
             await pad.install_packages(["cowsay"])
@@ -711,7 +733,7 @@ async def test_install_survives_reset(self):
 class TestProgressAndTimeouts:
     async def test_progress_function_available_in_namespace(self):
         """progress() should be callable in scratchpad code."""
-        pad = Scratchpad(name="progress-ns")
+        pad = make_scratchpad(name="progress-ns")
         await pad.start()
         try:
             cell = await pad.execute("print(callable(progress))")
@@ -724,7 +746,7 @@ async def test_progress_resets_inactivity_timeout(self, monkeypatch):
         """Code that calls progress() frequently should survive even with a short inactivity timeout."""
         monkeypatch.setenv("ANTON_CELL_INACTIVITY_TIMEOUT", "2")
         monkeypatch.setenv("ANTON_CELL_TIMEOUT_DEFAULT", "10")
-        pad = Scratchpad(name="progress-keep-alive")
+        pad = make_scratchpad(name="progress-keep-alive")
         await pad.start()
         try:
             code = (
@@ -744,7 +766,7 @@ async def test_inactivity_timeout_kills_without_progress(self, monkeypatch):
         """Code that sleeps without progress() calls should be killed by inactivity timeout."""
         monkeypatch.setenv("ANTON_CELL_INACTIVITY_TIMEOUT", "2")
         monkeypatch.setenv("ANTON_CELL_TIMEOUT_DEFAULT", "60")
-        pad = Scratchpad(name="no-progress")
+        pad = make_scratchpad(name="no-progress")
         await pad.start()
         try:
             cell = await pad.execute("import time; time.sleep(30)")
@@ -755,7 +777,7 @@ async def test_inactivity_timeout_kills_without_progress(self, monkeypatch):
 
     async def test_execute_streaming_yields_progress(self):
         """execute_streaming() should yield progress strings and a final Cell."""
-        pad = Scratchpad(name="streaming")
+        pad = make_scratchpad(name="streaming")
         await pad.start()
         try:
             code = (
@@ -781,14 +803,14 @@ async def test_execute_streaming_yields_progress(self):
 
     async def test_compute_timeouts_no_estimate(self):
         """No estimate should use defaults."""
-        from anton.core.backends.local import _compute_timeouts
+        from anton.core.backends.utils import compute_timeouts as _compute_timeouts
         total, inactivity = _compute_timeouts(0)
         assert total == 120.0
         assert inactivity == 30.0
 
     async def test_compute_timeouts_with_estimate(self):
         """Estimate should scale total timeout and inactivity with no hard cap."""
-        from anton.core.backends.local import _compute_timeouts
+        from anton.core.backends.utils import compute_timeouts as _compute_timeouts
 
         # Small estimate: max(10*2, 10+30) = max(20, 40) = 40
         total, inactivity = _compute_timeouts(10)
@@ -814,7 +836,7 @@ async def test_compute_timeouts_with_estimate(self):
 class TestSampleFunction:
     async def test_sample_available_in_namespace(self):
         """sample() should be callable in scratchpad code."""
-        pad = Scratchpad(name="sample-ns")
+        pad = make_scratchpad(name="sample-ns")
         await pad.start()
         try:
             cell = await pad.execute("print(callable(sample))")
@@ -825,7 +847,7 @@ async def test_sample_available_in_namespace(self):
 
     async def test_sample_dict_preview(self):
         """sample() on a dict should show keys and truncated values."""
-        pad = Scratchpad(name="sample-dict")
+        pad = make_scratchpad(name="sample-dict")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -842,7 +864,7 @@ async def test_sample_dict_preview(self):
 
     async def test_sample_list_preview(self):
         """sample() on a list should show length and first/last items."""
-        pad = Scratchpad(name="sample-list")
+        pad = make_scratchpad(name="sample-list")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -859,7 +881,7 @@ async def test_sample_list_preview(self):
 
     async def test_sample_string_preview(self):
         """sample() on a string should show length and a preview."""
-        pad = Scratchpad(name="sample-str")
+        pad = make_scratchpad(name="sample-str")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -875,7 +897,7 @@ async def test_sample_string_preview(self):
 
     async def test_sample_full_mode(self):
         """sample(var, mode='full') should show more content."""
-        pad = Scratchpad(name="sample-full")
+        pad = make_scratchpad(name="sample-full")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -891,7 +913,7 @@ async def test_sample_full_mode(self):
 
     async def test_sample_set(self):
         """sample() on a set should show length and items."""
-        pad = Scratchpad(name="sample-set")
+        pad = make_scratchpad(name="sample-set")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -906,7 +928,7 @@ async def test_sample_set(self):
 
     async def test_sample_custom_object(self):
         """sample() on an unknown object should show type and repr."""
-        pad = Scratchpad(name="sample-obj")
+        pad = make_scratchpad(name="sample-obj")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -923,7 +945,7 @@ async def test_sample_custom_object(self):
 
     async def test_sample_bytes(self):
         """sample() on bytes should show length and preview."""
-        pad = Scratchpad(name="sample-bytes")
+        pad = make_scratchpad(name="sample-bytes")
         await pad.start()
         try:
             cell = await pad.execute("sample(b'hello world')")
@@ -935,7 +957,7 @@ async def test_sample_bytes(self):
 
     async def test_sample_named(self):
         """sample() with _name parameter should include the label."""
-        pad = Scratchpad(name="sample-named")
+        pad = make_scratchpad(name="sample-named")
         await pad.start()
         try:
             cell = await pad.execute(
@@ -950,7 +972,7 @@ async def test_sample_named(self):
 
     async def test_sample_empty_dict(self):
         """sample() on an empty dict should not crash."""
-        pad = Scratchpad(name="sample-empty")
+        pad = make_scratchpad(name="sample-empty")
         await pad.start()
         try:
             cell = await pad.execute("sample({})")
@@ -961,7 +983,7 @@ async def test_sample_empty_dict(self):
 
     async def test_sample_empty_list(self):
         """sample() on an empty list should not crash."""
-        pad = Scratchpad(name="sample-empty-list")
+        pad = make_scratchpad(name="sample-empty-list")
         await pad.start()
         try:
             cell = await pad.execute("sample([])")

From d5c33171704fa96e4d791cec60d09e914bfcfa09 Mon Sep 17 00:00:00 2001
From: ZoranPandovski <zoran.pandovski@gmail.com>
Date: Wed, 15 Apr 2026 16:55:06 +0200
Subject: [PATCH 133/134] Update tests

---
 tests/test_scratchpad.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_scratchpad.py b/tests/test_scratchpad.py
index 833be485..14fef17c 100644
--- a/tests/test_scratchpad.py
+++ b/tests/test_scratchpad.py
@@ -16,8 +16,8 @@
 
 _SCRATCHPAD_DEFAULTS = dict(
     coding_provider="anthropic",
-    coding_model="claude-sonnet-4-6",
-    coding_api_key="test",
+    coding_model="",
+    coding_api_key="",
     coding_base_url="",
 )
 

From 2663fb3bcc449bbc06613219dc460a25cd66a373 Mon Sep 17 00:00:00 2001
From: Jorge Torres <jorge.torres.maldonado@gmail.com>
Date: Wed, 15 Apr 2026 14:11:34 -0500
Subject: [PATCH 134/134] connect telemetry

---
 anton/chat.py  | 18 ++++++++++++++++--
 anton/cli.py   |  6 +++++-
 anton/tools.py | 20 ++++++++++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/anton/chat.py b/anton/chat.py
index ae6b969b..164e8ec8 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -1397,10 +1397,24 @@ def _bottom_toolbar():
 
             _query_count += 1
             _total_questions += 1
+
+            # Determine the LLM provider label for telemetry
+            _provider = settings.planning_provider or ""
+            if _provider == "openai-compatible" and settings.minds_api_key:
+                _llm_provider = "mdb_ai"
+            elif _provider == "anthropic":
+                _llm_provider = "anthropic"
+            elif "gemini" in (settings.planning_model or "").lower():
+                _llm_provider = "gemini"
+            elif _provider in ("openai", "openai-compatible"):
+                _llm_provider = "openai"
+            else:
+                _llm_provider = "other"
+
             if _query_count == 1:
-                send_event(settings, "anton_first_query")
+                send_event(settings, "anton_first_query", llm_provider=_llm_provider)
             else:
-                send_event(settings, "anton_query")
+                send_event(settings, "anton_query", llm_provider=_llm_provider)
 
             display.start()
             t0 = time.monotonic()
diff --git a/anton/cli.py b/anton/cli.py
index 0604dd4c..032918b2 100644
--- a/anton/cli.py
+++ b/anton/cli.py
@@ -302,7 +302,11 @@ def main(
 
     from anton.analytics import send_event
 
-    send_event(settings, "anton_started")
+    send_event(
+        settings,
+        "anton_started",
+        has_mdb_key="1" if settings.minds_api_key else "0",
+    )
 
     if ctx.invoked_subcommand is None:
         from anton.chat import run_chat
diff --git a/anton/tools.py b/anton/tools.py
index 3b59046f..cdccb73e 100644
--- a/anton/tools.py
+++ b/anton/tools.py
@@ -25,6 +25,19 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     if console is None:
         return "Cannot connect datasource — no console available."
 
+    # ── Telemetry: connection attempt ────────────────────────────────
+    _settings = getattr(session, "_settings", None)
+    if _settings is None:
+        try:
+            from anton.config.settings import AntonSettings
+            _settings = AntonSettings()
+        except Exception:
+            _settings = None
+
+    if _settings:
+        from anton.analytics import send_event
+        send_event(_settings, "ds_connect_attempt", engine=engine)
+
     console.print()
     console.print(
         f"[anton.prompt]anton>[/] I can help with that \u2014 let's connect [bold]{engine}[/] to Anton."
@@ -56,6 +69,9 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
 
     if new_connections:
         slug = next(iter(new_connections))
+        # ── Telemetry: connection succeeded ──────────────────────────
+        if _settings:
+            send_event(_settings, "ds_connect_success", engine=engine)
         return (
             f"Successfully connected '{slug}'. The datasource is now available. "
             f"Continue helping the user with their original request using this data source."
@@ -94,6 +110,10 @@ async def handle_connect_datasource(session: ChatSession, tc_input: dict) -> str
     console.print()
 
     if status == "test_failed":
+        # ── Telemetry: connection failed ─────────────────────────────
+        if _settings:
+            from anton.analytics import send_event
+            send_event(_settings, "ds_connect_failed", engine=engine)
         return (
             f"CONNECTION TEST FAILED: The connection test for '{engine}' did not "
             f"succeed and the user declined to re-enter credentials. Nothing was "