From 052cbdd64ebc6822806c1c0f15c489723af2132f Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 00:01:15 -0800
Subject: [PATCH 01/20] feat(chat): integrate advanced vector-backed memory
 system

Replace the simple JSON-based memory in the chat agent with the advanced
MemoryClient that provides semantic search via ChromaDB and embeddings.

Changes:
- Add AdvancedMemory config dataclass with memory system options
- Add CLI options: --advanced-memory, --memory-path, --memory-top-k, etc.
- Refactor _tools.py to support dual backends (simple JSON vs advanced)
- Initialize MemoryClient in chat agent when --advanced-memory enabled
- Auto-fallback to simple memory if [memory] extra not installed

The advanced memory system is enabled by default and provides:
- Semantic search using vector embeddings
- MMR retrieval for diverse results
- Recency weighting and score thresholds
- Automatic fact extraction and reconciliation

Closes #158
---
 agent_cli/_tools.py                    | 410 +++++++++++++++++++------
 agent_cli/agents/chat.py               | 119 ++++++-
 agent_cli/config.py                    |  29 ++
 agent_cli/opts.py                      |  34 ++
 tests/agents/test_interactive.py       |   3 +
 tests/agents/test_interactive_extra.py |   1 +
 6 files changed, 504 insertions(+), 92 deletions(-)

diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index 1a7ce8957..da846d8f1 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
 import os
 import subprocess
@@ -12,8 +13,67 @@
 if TYPE_CHECKING:
     from collections.abc import Callable
 
+    from agent_cli.memory.client import MemoryClient
 
-# Memory system helpers
+
+# --- Advanced Memory State ---
+# These module-level variables are set by init_advanced_memory() when the chat
+# agent starts with --advanced-memory enabled.
+
+_memory_client: MemoryClient | None = None
+_conversation_id: str = "default"
+_event_loop: asyncio.AbstractEventLoop | None = None
+
+
+def init_advanced_memory(
+    client: MemoryClient,
+    conversation_id: str = "default",
+    event_loop: asyncio.AbstractEventLoop | None = None,
+) -> None:
+    """Initialize the advanced memory system.
+
+    Called by the chat agent when --advanced-memory is enabled.
+
+    Args:
+        client: The MemoryClient instance to use for memory operations.
+        conversation_id: The conversation ID for scoping memories.
+        event_loop: The asyncio event loop for running async operations.
+
+    """
+    global _memory_client, _conversation_id, _event_loop
+    _memory_client = client
+    _conversation_id = conversation_id
+    _event_loop = event_loop
+
+
+async def cleanup_advanced_memory() -> None:
+    """Clean up the advanced memory system.
+
+    Called when the chat agent exits.
+    """
+    global _memory_client, _event_loop
+    if _memory_client is not None:
+        await _memory_client.stop()
+        _memory_client = None
+    _event_loop = None
+
+
+def _is_advanced_memory() -> bool:
+    """Check if advanced memory is enabled and initialized."""
+    return _memory_client is not None and _event_loop is not None
+
+
+def _run_async(coro: Any, timeout: float = 30.0) -> Any:
+    """Run an async coroutine from sync context using the stored event loop."""
+    if _event_loop is None:
+        msg = "Event loop not initialized for advanced memory"
+        raise RuntimeError(msg)
+
+    future = asyncio.run_coroutine_threadsafe(coro, _event_loop)
+    return future.result(timeout=timeout)
+
+
+# --- Simple Memory System Helpers ---
 
 
 def _get_memory_file_path() -> Path:
@@ -133,6 +193,41 @@ def execute_code(code: str) -> str:
         return f"Error: Command not found: {code.split()[0]}"
 
 
+def _add_memory_simple(content: str, category: str, tags: str) -> str:
+    """Add memory using the simple JSON-based system."""
+    memories = _load_memories()
+
+    memory = {
+        "id": len(memories) + 1,
+        "content": content,
+        "category": category,
+        "tags": _parse_tags(tags),
+        "timestamp": datetime.now(UTC).isoformat(),
+    }
+
+    memories.append(memory)
+    _save_memories(memories)
+
+    return f"Memory added successfully with ID {memory['id']}"
+
+
+def _add_memory_advanced(content: str, category: str, tags: str) -> str:
+    """Add memory using the advanced vector-backed system."""
+    if _memory_client is None:
+        return "Error: Advanced memory not initialized"
+
+    # Format content with metadata for the advanced system
+    formatted_content = f"[{category}] {content}"
+    if tags:
+        formatted_content += f" (tags: {tags})"
+
+    try:
+        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))
+        return "Memory added successfully (advanced semantic memory)"
+    except Exception as e:
+        return f"Error adding memory: {e}"
+
+
 def add_memory(content: str, category: str = "general", tags: str = "") -> str:
     """Add important information to long-term memory for future conversations.
 
@@ -153,24 +248,66 @@ def add_memory(content: str, category: str = "general", tags: str = "") -> str:
         Confirmation message with the memory ID
 
     """
+    if _is_advanced_memory():
+        return _memory_operation(
+            "adding memory",
+            lambda: _add_memory_advanced(content, category, tags),
+        )
+    return _memory_operation("adding memory", lambda: _add_memory_simple(content, category, tags))
+
+
+def _search_memory_simple(query: str, category: str) -> str:
+    """Search memory using the simple JSON-based system."""
+    memories = _load_memories()
+
+    if not memories:
+        return "No memories found. Memory system not initialized."
+
+    # Simple text-based search
+    query_lower = query.lower()
+    relevant_memories = []
+
+    for memory in memories:
+        # Check if query matches content, tags, or category
+        content_match = query_lower in memory["content"].lower()
+        tag_match = any(query_lower in tag.lower() for tag in memory["tags"])
+        category_match = not category or memory["category"].lower() == category.lower()
+
+        if (content_match or tag_match) and category_match:
+            relevant_memories.append(memory)
 
-    def _add_memory_operation() -> str:
-        memories = _load_memories()
+    if not relevant_memories:
+        return f"No memories found matching '{query}'"
 
-        memory = {
-            "id": len(memories) + 1,
-            "content": content,
-            "category": category,
-            "tags": _parse_tags(tags),
-            "timestamp": datetime.now(UTC).isoformat(),
-        }
+    # Format results
+    results = [_format_memory_summary(memory) for memory in relevant_memories[-5:]]
 
-        memories.append(memory)
-        _save_memories(memories)
+    return "\n".join(results)
 
-        return f"Memory added successfully with ID {memory['id']}"
 
-    return _memory_operation("adding memory", _add_memory_operation)
+def _search_memory_advanced(query: str, category: str) -> str:
+    """Search memory using the advanced vector-backed system with semantic search."""
+    if _memory_client is None:
+        return "Error: Advanced memory not initialized"
+
+    # Include category in search query if provided
+    search_query = f"{category} {query}" if category else query
+
+    try:
+        result = _run_async(
+            _memory_client.search(search_query, conversation_id=_conversation_id),
+        )
+        if not result.entries:
+            return f"No memories found matching '{query}'"
+
+        # Format results with relevance scores
+        lines = []
+        for entry in result.entries:
+            score_info = f" (relevance: {entry.score:.2f})" if entry.score else ""
+            lines.append(f"- {entry.content}{score_info}")
+        return "\n".join(lines)
+    except Exception as e:
+        return f"Error searching memory: {e}"
 
 
 def search_memory(query: str, category: str = "") -> str:
@@ -183,6 +320,7 @@ def search_memory(query: str, category: str = "") -> str:
     - To check if you've discussed a topic before
 
     The search looks through memory content and tags for matches.
+    When using advanced memory, this performs semantic search to find conceptually related information.
 
     Args:
         query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
@@ -192,35 +330,73 @@ def search_memory(query: str, category: str = "") -> str:
         Relevant memories found, or message if none found
 
     """
+    if _is_advanced_memory():
+        return _memory_operation(
+            "searching memory",
+            lambda: _search_memory_advanced(query, category),
+        )
+    return _memory_operation("searching memory", lambda: _search_memory_simple(query, category))
 
-    def _search_memory_operation() -> str:
-        memories = _load_memories()
 
-        if not memories:
-            return "No memories found. Memory system not initialized."
+def _update_memory_simple(memory_id: int, content: str, category: str, tags: str) -> str:
+    """Update memory using the simple JSON-based system."""
+    memories = _load_memories()
 
-        # Simple text-based search
-        query_lower = query.lower()
-        relevant_memories = []
+    if not memories:
+        return "No memories found. Memory system not initialized."
 
-        for memory in memories:
-            # Check if query matches content, tags, or category
-            content_match = query_lower in memory["content"].lower()
-            tag_match = any(query_lower in tag.lower() for tag in memory["tags"])
-            category_match = not category or memory["category"].lower() == category.lower()
+    # Find memory to update
+    memory_to_update = _find_memory_by_id(memories, memory_id)
+    if not memory_to_update:
+        return f"Memory with ID {memory_id} not found."
 
-            if (content_match or tag_match) and category_match:
-                relevant_memories.append(memory)
+    # Update fields if provided
+    if content:
+        memory_to_update["content"] = content
+    if category:
+        memory_to_update["category"] = category
+    if tags:
+        memory_to_update["tags"] = _parse_tags(tags)
 
-        if not relevant_memories:
-            return f"No memories found matching '{query}'"
+    # Add update timestamp
+    memory_to_update["updated_at"] = datetime.now(UTC).isoformat()
 
-        # Format results
-        results = [_format_memory_summary(memory) for memory in relevant_memories[-5:]]
+    _save_memories(memories)
+    return f"Memory ID {memory_id} updated successfully."
 
-        return "\n".join(results)
 
-    return _memory_operation("searching memory", _search_memory_operation)
+def _update_memory_advanced(memory_id: int, content: str, category: str, tags: str) -> str:
+    """Update memory using the advanced system.
+
+    Note: The advanced memory system uses the reconciliation pipeline which
+    automatically manages memory updates through fact extraction. Direct updates
+    are handled by adding new information that supersedes old information.
+    """
+    _ = memory_id  # Advanced system uses reconciliation, not ID-based updates
+    if _memory_client is None:
+        return "Error: Advanced memory not initialized"
+
+    if not content:
+        return (
+            "In advanced memory mode, please provide the updated content. "
+            "The system will automatically reconcile it with existing memories."
+        )
+
+    # Format content with metadata
+    formatted_content = f"[{category or 'general'}] {content}"
+    if tags:
+        formatted_content += f" (tags: {tags})"
+
+    try:
+        # Add the updated information - the advanced system's reconciliation
+        # pipeline will handle updating/replacing related facts
+        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))
+        return (
+            "Memory updated successfully. The advanced memory system has reconciled "
+            "this information with existing memories."
+        )
+    except Exception as e:
+        return f"Error updating memory: {e}"
 
 
 def update_memory(memory_id: int, content: str = "", category: str = "", tags: str = "") -> str:
@@ -232,6 +408,7 @@ def update_memory(memory_id: int, content: str = "", category: str = "", tags: s
     - When the user says "update my memory about..." or "change the memory where..."
 
     Only provide the fields that should be updated - empty fields will keep existing values.
+    In advanced memory mode, the system automatically reconciles updates with existing information.
 
     Args:
         memory_id: The ID of the memory to update (use search_memory or list_all_memories to find IDs)
@@ -243,33 +420,70 @@ def update_memory(memory_id: int, content: str = "", category: str = "", tags: s
         Confirmation message or error if memory ID not found
 
     """
+    if _is_advanced_memory():
+        return _memory_operation(
+            "updating memory",
+            lambda: _update_memory_advanced(memory_id, content, category, tags),
+        )
+    return _memory_operation(
+        "updating memory",
+        lambda: _update_memory_simple(memory_id, content, category, tags),
+    )
 
-    def _update_memory_operation() -> str:
-        memories = _load_memories()
 
-        if not memories:
-            return "No memories found. Memory system not initialized."
+def _list_all_memories_simple(limit: int) -> str:
+    """List all memories using the simple JSON-based system."""
+    memories = _load_memories()
+
+    if not memories:
+        return "No memories stored yet."
+
+    # Sort by ID (newest first) and limit results
+    memories_to_show = sorted(memories, key=lambda x: x["id"], reverse=True)[:limit]
+
+    results = [f"Showing {len(memories_to_show)} of {len(memories)} total memories:\n"]
+    results.extend(_format_memory_detailed(memory) for memory in memories_to_show)
+
+    if len(memories) > limit:
+        results.append(
+            f"... and {len(memories) - limit} more memories. Use a higher limit to see more.",
+        )
 
-        # Find memory to update
-        memory_to_update = _find_memory_by_id(memories, memory_id)
-        if not memory_to_update:
-            return f"Memory with ID {memory_id} not found."
+    return "\n".join(results)
 
-        # Update fields if provided
-        if content:
-            memory_to_update["content"] = content
-        if category:
-            memory_to_update["category"] = category
-        if tags:
-            memory_to_update["tags"] = _parse_tags(tags)
 
-        # Add update timestamp
-        memory_to_update["updated_at"] = datetime.now(UTC).isoformat()
+def _list_all_memories_advanced(limit: int) -> str:
+    """List all memories using the advanced vector-backed system."""
+    if _memory_client is None:
+        return "Error: Advanced memory not initialized"
 
-        _save_memories(memories)
-        return f"Memory ID {memory_id} updated successfully."
+    try:
+        entries = _memory_client.list_all(
+            conversation_id=_conversation_id,
+            include_summary=False,
+        )
 
-    return _memory_operation("updating memory", _update_memory_operation)
+        if not entries:
+            return "No memories stored yet."
+
+        # Limit results
+        entries_to_show = entries[:limit]
+
+        results = [f"Showing {len(entries_to_show)} of {len(entries)} total memories:\n"]
+        for entry in entries_to_show:
+            created_at = entry.get("created_at", "unknown")
+            role = entry.get("role", "memory")
+            content = entry.get("content", "")
+            results.append(f"- [{role}] {content} (created: {created_at})")
+
+        if len(entries) > limit:
+            results.append(
+                f"\n... and {len(entries) - limit} more memories. Use a higher limit to see more.",
+            )
+
+        return "\n".join(results)
+    except Exception as e:
+        return f"Error listing memories: {e}"
 
 
 def list_all_memories(limit: int = 10) -> str:
@@ -289,27 +503,61 @@ def list_all_memories(limit: int = 10) -> str:
         Formatted list of all memories with IDs, content, categories, and tags
 
     """
+    if _is_advanced_memory():
+        return _memory_operation("listing memories", lambda: _list_all_memories_advanced(limit))
+    return _memory_operation("listing memories", lambda: _list_all_memories_simple(limit))
 
-    def _list_all_memories_operation() -> str:
-        memories = _load_memories()
 
-        if not memories:
-            return "No memories stored yet."
+def _list_memory_categories_simple() -> str:
+    """List categories using the simple JSON-based system."""
+    memories = _load_memories()
 
-        # Sort by ID (newest first) and limit results
-        memories_to_show = sorted(memories, key=lambda x: x["id"], reverse=True)[:limit]
+    if not memories:
+        return "No memories found. Memory system not initialized."
 
-        results = [f"Showing {len(memories_to_show)} of {len(memories)} total memories:\n"]
-        results.extend(_format_memory_detailed(memory) for memory in memories_to_show)
+    # Count categories
+    categories: dict[str, int] = {}
+    for memory in memories:
+        category = memory["category"]
+        categories[category] = categories.get(category, 0) + 1
 
-        if len(memories) > limit:
-            results.append(
-                f"... and {len(memories) - limit} more memories. Use a higher limit to see more.",
-            )
+    if not categories:
+        return "No memory categories found."
 
-        return "\n".join(results)
+    results = ["Memory Categories:"]
+    for category, count in sorted(categories.items()):
+        results.append(f"- {category}: {count} memories")
 
-    return _memory_operation("listing memories", _list_all_memories_operation)
+    return "\n".join(results)
+
+
+def _list_memory_categories_advanced() -> str:
+    """List categories using the advanced vector-backed system."""
+    if _memory_client is None:
+        return "Error: Advanced memory not initialized"
+
+    try:
+        entries = _memory_client.list_all(
+            conversation_id=_conversation_id,
+            include_summary=False,
+        )
+
+        if not entries:
+            return "No memories found. Memory system not initialized."
+
+        # Count by role (user, assistant, memory)
+        roles: dict[str, int] = {}
+        for entry in entries:
+            role = entry.get("role", "memory")
+            roles[role] = roles.get(role, 0) + 1
+
+        results = ["Memory Types (advanced memory system):"]
+        for role, count in sorted(roles.items()):
+            results.append(f"- {role}: {count} entries")
+
+        return "\n".join(results)
+    except Exception as e:
+        return f"Error listing categories: {e}"
 
 
 def list_memory_categories() -> str:
@@ -326,29 +574,9 @@ def list_memory_categories() -> str:
         Summary of memory categories with counts (e.g., "personal: 5 memories")
 
     """
-
-    def _list_categories_operation() -> str:
-        memories = _load_memories()
-
-        if not memories:
-            return "No memories found. Memory system not initialized."
-
-        # Count categories
-        categories: dict[str, int] = {}
-        for memory in memories:
-            category = memory["category"]
-            categories[category] = categories.get(category, 0) + 1
-
-        if not categories:
-            return "No memory categories found."
-
-        results = ["Memory Categories:"]
-        for category, count in sorted(categories.items()):
-            results.append(f"- {category}: {count} memories")
-
-        return "\n".join(results)
-
-    return _memory_operation("listing categories", _list_categories_operation)
+    if _is_advanced_memory():
+        return _memory_operation("listing categories", _list_memory_categories_advanced)
+    return _memory_operation("listing categories", _list_memory_categories_simple)
 
 
 def tools() -> list:
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 507e22c4b..265534100 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -25,7 +25,7 @@
 import typer
 
 from agent_cli import config, opts
-from agent_cli._tools import tools
+from agent_cli._tools import cleanup_advanced_memory, init_advanced_memory, tools
 from agent_cli.cli import app
 from agent_cli.core import process
 from agent_cli.core.audio import setup_devices
@@ -53,6 +53,81 @@
 
 LOGGER = logging.getLogger(__name__)
 
+
+def _get_conversation_id(history_cfg: config.History) -> str:
+    """Generate a stable conversation ID from history configuration.
+
+    Uses a hash of the history directory path to ensure consistency across sessions.
+    """
+    import hashlib  # noqa: PLC0415
+
+    if history_cfg.history_dir:
+        return hashlib.md5(
+            str(Path(history_cfg.history_dir).resolve()).encode(),
+            usedforsecurity=False,
+        ).hexdigest()[:12]
+    return "default"
+
+
+def _try_init_advanced_memory(
+    advanced_memory_cfg: config.AdvancedMemory,
+    history_cfg: config.History,
+    openai_llm_cfg: config.OpenAILLM,
+    quiet: bool,
+) -> object | None:
+    """Try to initialize the advanced memory system.
+
+    Returns the MemoryClient if successful, None otherwise.
+    """
+    from agent_cli.memory.client import MemoryClient  # noqa: PLC0415
+
+    # Determine memory path
+    memory_path = advanced_memory_cfg.memory_path
+    if memory_path is None:
+        if history_cfg.history_dir:
+            memory_path = Path(history_cfg.history_dir).expanduser() / "vector_memory"
+        else:
+            memory_path = Path.home() / ".config" / "agent-cli" / "memory" / "vector_db"
+
+    # Determine OpenAI base URL for embeddings
+    openai_base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"
+
+    if not quiet:
+        console.print("[dim]Initializing advanced memory system...[/dim]")
+
+    memory_client = MemoryClient(
+        memory_path=memory_path,
+        openai_base_url=openai_base_url,
+        embedding_model=advanced_memory_cfg.embedding_model,
+        embedding_api_key=openai_llm_cfg.openai_api_key,
+        chat_api_key=openai_llm_cfg.openai_api_key,
+        default_top_k=advanced_memory_cfg.top_k,
+        score_threshold=advanced_memory_cfg.score_threshold,
+        recency_weight=advanced_memory_cfg.recency_weight,
+        mmr_lambda=advanced_memory_cfg.mmr_lambda,
+        enable_summarization=advanced_memory_cfg.enable_summarization,
+        enable_git_versioning=advanced_memory_cfg.enable_git_versioning,
+        max_entries=advanced_memory_cfg.max_entries,
+        start_watcher=False,
+    )
+
+    # Start the memory client's file watcher
+    memory_client.start()
+
+    # Generate conversation ID and initialize tools
+    conversation_id = _get_conversation_id(history_cfg)
+    init_advanced_memory(
+        memory_client,
+        conversation_id,
+        asyncio.get_running_loop(),
+    )
+
+    if not quiet:
+        console.print("[green]Advanced memory system initialized[/green]")
+
+    return memory_client
+
+
 # --- Conversation History ---
 
 
@@ -318,8 +393,11 @@ async def _async_main(
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
     gemini_tts_cfg: config.GeminiTTS,
+    advanced_memory_cfg: config.AdvancedMemory,
 ) -> None:
     """Main async function, consumes parsed arguments."""
+    memory_client = None
+
     try:
         device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
         if device_info is None:
@@ -329,6 +407,27 @@ async def _async_main(
         if audio_out_cfg.enable_tts:
             audio_out_cfg.output_device_index = tts_output_device_index
 
+        # Initialize advanced memory if enabled
+        if advanced_memory_cfg.enabled:
+            try:
+                memory_client = _try_init_advanced_memory(
+                    advanced_memory_cfg,
+                    history_cfg,
+                    openai_llm_cfg,
+                    general_cfg.quiet,
+                )
+            except (ImportError, Exception) as e:
+                msg = (
+                    "Advanced memory not available. Install with: uv pip install agent-cli[memory]"
+                    if isinstance(e, ImportError)
+                    else f"Failed to initialize advanced memory: {e}"
+                )
+                if not general_cfg.quiet:
+                    console.print(f"[yellow]{msg}[/yellow]")
+                    console.print("[yellow]Falling back to simple memory system.[/yellow]")
+                if not isinstance(e, ImportError):
+                    LOGGER.warning("Failed to initialize advanced memory: %s", e)
+
         # Load conversation history
         conversation_history = []
         if history_cfg.history_dir:
@@ -371,6 +470,10 @@ async def _async_main(
         if not general_cfg.quiet:
             console.print_exception()
         raise
+    finally:
+        # Clean up advanced memory client
+        if memory_client is not None:
+            await cleanup_advanced_memory()
 
 
 @app.command("chat")
@@ -433,6 +536,12 @@ def chat(
         " Set to 0 to disable history.",
         rich_help_panel="History Options",
     ),
+    # --- Advanced Memory Options ---
+    advanced_memory: bool = opts.ADVANCED_MEMORY,
+    memory_path: Path | None = opts.MEMORY_PATH,
+    memory_embedding_model: str = opts.MEMORY_EMBEDDING_MODEL,
+    memory_top_k: int = opts.MEMORY_TOP_K,
+    memory_score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
     # --- General Options ---
     save_file: Path | None = opts.SAVE_FILE,
     log_level: str = opts.LOG_LEVEL,
@@ -535,6 +644,13 @@ def chat(
             history_dir=history_dir,
             last_n_messages=last_n_messages,
         )
+        advanced_memory_cfg = config.AdvancedMemory(
+            enabled=advanced_memory,
+            memory_path=memory_path,
+            embedding_model=memory_embedding_model,
+            top_k=memory_top_k,
+            score_threshold=memory_score_threshold,
+        )
 
         asyncio.run(
             _async_main(
@@ -553,5 +669,6 @@ def chat(
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
+                advanced_memory_cfg=advanced_memory_cfg,
             ),
         )
diff --git a/agent_cli/config.py b/agent_cli/config.py
index 65c078dfa..7230153de 100644
--- a/agent_cli/config.py
+++ b/agent_cli/config.py
@@ -224,6 +224,35 @@ def _expand_user_path(cls, v: str | None) -> Path | None:
         return None
 
 
+# --- Panel: Advanced Memory Options ---
+
+
+class AdvancedMemory(BaseModel):
+    """Configuration for advanced vector-backed memory system.
+
+    The advanced memory system uses ChromaDB with vector embeddings for
+    semantic search, providing better retrieval than simple keyword matching.
+    """
+
+    enabled: bool = True
+    memory_path: Path | None = None
+    embedding_model: str = "text-embedding-3-small"
+    top_k: int = 5
+    score_threshold: float = 0.35
+    recency_weight: float = 0.2
+    mmr_lambda: float = 0.7
+    enable_summarization: bool = True
+    enable_git_versioning: bool = False
+    max_entries: int = 500
+
+    @field_validator("memory_path", mode="before")
+    @classmethod
+    def _expand_user_path(cls, v: str | None) -> Path | None:
+        if v:
+            return Path(v).expanduser()
+        return None
+
+
 def _config_path(config_path_str: str | None = None) -> Path | None:
     """Return a usable config path, expanding user directories."""
     if config_path_str:
diff --git a/agent_cli/opts.py b/agent_cli/opts.py
index 1002066de..1cd8229d0 100644
--- a/agent_cli/opts.py
+++ b/agent_cli/opts.py
@@ -381,6 +381,40 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
     rich_help_panel="General Options",
 )
 
+# --- Advanced Memory Options ---
+ADVANCED_MEMORY: bool = typer.Option(
+    True,  # noqa: FBT003
+    "--advanced-memory/--no-advanced-memory",
+    help="Use advanced vector-backed memory with semantic search. "
+    "Auto-falls back to simple memory if dependencies not installed.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_PATH: Path | None = typer.Option(
+    None,
+    "--memory-path",
+    help="Path for advanced memory database storage. Default: ~/.config/agent-cli/memory/vector_db",
+    rich_help_panel="Memory Options",
+)
+MEMORY_EMBEDDING_MODEL: str = typer.Option(
+    DEFAULT_OPENAI_EMBEDDING_MODEL,
+    "--memory-embedding-model",
+    help="Embedding model for semantic memory search.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_TOP_K: int = typer.Option(
+    5,
+    "--memory-top-k",
+    help="Number of memories to retrieve per search.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_SCORE_THRESHOLD: float = typer.Option(
+    0.35,
+    "--memory-score-threshold",
+    help="Minimum relevance score threshold for memory retrieval (0.0-1.0).",
+    rich_help_panel="Memory Options",
+)
+
+
 # --- Server Options ---
 SERVER_HOST: str = typer.Option(
     "0.0.0.0",  # noqa: S104
diff --git a/tests/agents/test_interactive.py b/tests/agents/test_interactive.py
index bc4cc7292..d25c79701 100644
--- a/tests/agents/test_interactive.py
+++ b/tests/agents/test_interactive.py
@@ -140,6 +140,7 @@ async def test_async_main_list_devices(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
+            advanced_memory_cfg=config.AdvancedMemory(enabled=False),
         )
         mock_setup_devices.assert_called_once()
 
@@ -209,6 +210,7 @@ async def test_async_main_list_output_devices(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
+            advanced_memory_cfg=config.AdvancedMemory(enabled=False),
         )
         mock_setup_devices.assert_called_once()
 
@@ -302,6 +304,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
+            advanced_memory_cfg=config.AdvancedMemory(enabled=False),
         )
 
         # Verify that the core functions were called
diff --git a/tests/agents/test_interactive_extra.py b/tests/agents/test_interactive_extra.py
index 6d14bafec..a51419a62 100644
--- a/tests/agents/test_interactive_extra.py
+++ b/tests/agents/test_interactive_extra.py
@@ -260,5 +260,6 @@ async def test_async_main_exception_handling():
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
+                advanced_memory_cfg=config.AdvancedMemory(enabled=False),
             )
         mock_console.print_exception.assert_called_once()

From 06fd1ac6bd62eb9f5ae0b72d485f94c7a2f03afb Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 4 Jan 2026 08:02:13 +0000
Subject: [PATCH 02/20] Update auto-generated docs

---
 README.md             | 30 ++++++++++++++++++++++++++++++
 docs/commands/chat.md | 10 ++++++++++
 2 files changed, 40 insertions(+)

diff --git a/README.md b/README.md
index f0a5b2d5f..d9f5827bb 100644
--- a/README.md
+++ b/README.md
@@ -1529,6 +1529,36 @@ uv tool install "agent-cli[vad]"
 │                                   history.                                   │
 │                                   [default: 50]                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --advanced-memory       --no-advanced-memo…             Use advanced         │
+│                                                         vector-backed memory │
+│                                                         with semantic        │
+│                                                         search. Auto-falls   │
+│                                                         back to simple       │
+│                                                         memory if            │
+│                                                         dependencies not     │
+│                                                         installed.           │
+│                                                         [default:            │
+│                                                         advanced-memory]     │
+│ --memory-path                                  PATH     Path for advanced    │
+│                                                         memory database      │
+│                                                         storage. Default:    │
+│                                                         ~/.config/agent-cli… │
+│ --memory-embedding-…                           TEXT     Embedding model for  │
+│                                                         semantic memory      │
+│                                                         search.              │
+│                                                         [default:            │
+│                                                         text-embedding-3-sm… │
+│ --memory-top-k                                 INTEGER  Number of memories   │
+│                                                         to retrieve per      │
+│                                                         search.              │
+│                                                         [default: 5]         │
+│ --memory-score-thre…                           FLOAT    Minimum relevance    │
+│                                                         score threshold for  │
+│                                                         memory retrieval     │
+│                                                         (0.0-1.0).           │
+│                                                         [default: 0.35]      │
+╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ General Options ────────────────────────────────────────────────────────────╮
 │ --save-file           PATH  Save TTS response audio to WAV file.             │
 │ --log-level           TEXT  Set logging level.                               │
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index fc3a9fbb8..8bface657 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -165,6 +165,16 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 | `--history-dir` | `~/.config/agent-cli/history` | Directory to store conversation history. |
 | `--last-n-messages` | `50` | Number of messages to include in the conversation history. Set to 0 to disable history. |
 
+### Memory Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--advanced-memory/--no-advanced-memory` | `true` | Use advanced vector-backed memory with semantic search. Auto-falls back to simple memory if dependencies not installed. |
+| `--memory-path` | - | Path for advanced memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-embedding-model` | `text-embedding-3-small` | Embedding model for semantic memory search. |
+| `--memory-top-k` | `5` | Number of memories to retrieve per search. |
+| `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+
 ### General Options
 
 | Option | Default | Description |

From c432bf965dbccaabe0efd2a8ba8524eb840834bb Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 00:04:14 -0800
Subject: [PATCH 03/20] docs: update documentation for advanced memory
 integration

- Add Memory System section to chat.md explaining the new advanced
  memory feature with semantic search
- Add cross-links between chat, memory, and architecture docs
- Regenerate auto-generated options tables to include new Memory Options
---
 README.md                   | 1858 ++++++++++++++++-------------------
 docs/architecture/memory.md |    3 +-
 docs/commands/chat.md       |   36 +-
 docs/commands/memory.md     |    2 +
 4 files changed, 902 insertions(+), 997 deletions(-)

diff --git a/README.md b/README.md
index d9f5827bb..f3f2572d3 100644
--- a/README.md
+++ b/README.md
@@ -391,14 +391,14 @@ agent-cli config edit
 
  Manage agent-cli configuration files.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Commands ───────────────────────────────────────────────────────────────────╮
-│ init   Create a new config file with all options commented out.              │
-│ edit   Open the config file in your default editor.                          │
-│ show   Display the config file location and contents.                        │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
+│ init   Create a new config file with all options commented out.                        │
+│ edit   Open the config file in your default editor.                                    │
+│ show   Display the config file location and contents.                                  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -457,53 +457,49 @@ the `[defaults]` section of your configuration file.
 
  Correct text from clipboard using a local or remote LLM.
 
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  The text to correct. If not provided, reads from         │
-│                     clipboard.                                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  The text to correct. If not provided, reads from clipboard.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -548,120 +544,102 @@ the `[defaults]` section of your configuration file.
 
  Wyoming ASR Client for streaming microphone audio to a transcription server.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --extra-instructions                TEXT  Additional instructions for the    │
-│                                           LLM to process the transcription.  │
-│ --llm                   --no-llm          Use an LLM to process the          │
-│                                           transcript.                        │
-│                                           [default: no-llm]                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Recovery ─────────────────────────────────────────────────────────────╮
-│ --from-file                                PATH     Transcribe audio from a  │
-│                                                     saved WAV file instead   │
-│                                                     of recording.            │
-│ --last-recording                           INTEGER  Transcribe a saved       │
-│                                                     recording. Use 1 for     │
-│                                                     most recent, 2 for       │
-│                                                     second-to-last, etc. Use │
-│                                                     0 to disable (default).  │
-│                                                     [default: 0]             │
-│ --save-recording    --no-save-recording             Save the audio recording │
-│                                                     to disk for recovery.    │
-│                                                     [default:                │
-│                                                     save-recording]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --clipboard              --no-clipboard          Copy result to clipboard.   │
-│                                                  [default: clipboard]        │
-│ --log-level                                TEXT  Set logging level.          │
-│                                                  [default: WARNING]          │
-│ --log-file                                 TEXT  Path to a file to write     │
-│                                                  logs to.                    │
-│ --quiet              -q                          Suppress console output     │
-│                                                  from rich.                  │
-│ --config                                   TEXT  Path to a TOML              │
-│                                                  configuration file.         │
-│ --print-args                                     Print the command line      │
-│                                                  arguments, including        │
-│                                                  variables taken from the    │
-│                                                  configuration file.         │
-│ --transcription-log                        PATH  Path to log transcription   │
-│                                                  results with timestamps,    │
-│                                                  hostname, model, and raw    │
-│                                                  output.                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --extra-instructions                TEXT  Additional instructions for the LLM to       │
+│                                           process the transcription.                   │
+│ --llm                   --no-llm          Use an LLM to process the transcript.        │
+│                                           [default: no-llm]                            │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
+│ --from-file                                PATH     Transcribe audio from a saved WAV  │
+│                                                     file instead of recording.         │
+│ --last-recording                           INTEGER  Transcribe a saved recording. Use  │
+│                                                     1 for most recent, 2 for           │
+│                                                     second-to-last, etc. Use 0 to      │
+│                                                     disable (default).                 │
+│                                                     [default: 0]                       │
+│ --save-recording    --no-save-recording             Save the audio recording to disk   │
+│                                                     for recovery.                      │
+│                                                     [default: save-recording]          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --clipboard              --no-clipboard          Copy result to clipboard.             │
+│                                                  [default: clipboard]                  │
+│ --log-level                                TEXT  Set logging level.                    │
+│                                                  [default: WARNING]                    │
+│ --log-file                                 TEXT  Path to a file to write logs to.      │
+│ --quiet              -q                          Suppress console output from rich.    │
+│ --config                                   TEXT  Path to a TOML configuration file.    │
+│ --print-args                                     Print the command line arguments,     │
+│                                                  including variables taken from the    │
+│                                                  configuration file.                   │
+│ --transcription-log                        PATH  Path to log transcription results     │
+│                                                  with timestamps, hostname, model, and │
+│                                                  raw output.                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -718,9 +696,8 @@ uv tool install "agent-cli[vad]"
 
  Run a continuous transcription daemon with voice activity detection.
 
- This command runs indefinitely, capturing audio from your microphone,
- detecting speech segments using Silero VAD, transcribing them, and logging
- results with timestamps.
+ This command runs indefinitely, capturing audio from your microphone, detecting speech
+ segments using Silero VAD, transcribing them, and logging results with timestamps.
 
  Examples: # Basic daemon agent-cli transcribe-daemon
 
@@ -732,121 +709,105 @@ uv tool install "agent-cli[vad]"
   agent-cli transcribe-daemon --llm --role notes
 
   # Custom log file and audio directory
-  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir
-  ~/audio
-
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --role               -r                     TEXT   Role name for logging     │
-│                                                    (e.g., 'meeting',         │
-│                                                    'notes', 'user').         │
-│                                                    [default: user]           │
-│ --silence-threshold  -s                     FLOAT  Seconds of silence to end │
-│                                                    a speech segment.         │
-│                                                    [default: 1.0]            │
-│ --min-segment        -m                     FLOAT  Minimum speech duration   │
-│                                                    in seconds to trigger a   │
-│                                                    segment.                  │
-│                                                    [default: 0.25]           │
-│ --vad-threshold                             FLOAT  VAD speech detection      │
-│                                                    threshold (0.0-1.0).      │
-│                                                    Higher = more aggressive  │
-│                                                    filtering.                │
-│                                                    [default: 0.3]            │
-│ --save-audio             --no-save-audio           Save audio segments as    │
-│                                                    MP3 files.                │
-│                                                    [default: save-audio]     │
-│ --audio-dir                                 PATH   Directory for MP3 files.  │
-│                                                    Default:                  │
-│                                                    ~/.config/agent-cli/audio │
-│ --transcription-log  -t                     PATH   JSON Lines log file path. │
-│                                                    Default:                  │
-│                                                    ~/.config/agent-cli/tran… │
-│ --clipboard              --no-clipboard            Copy each transcription   │
-│                                                    to clipboard.             │
-│                                                    [default: no-clipboard]   │
-│ --help               -h                            Show this message and     │
-│                                                    exit.                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --llm    --no-llm      Use an LLM to process the transcript.                 │
-│                        [default: no-llm]                                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
+
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --role               -r                     TEXT   Role name for logging (e.g.,        │
+│                                                    'meeting', 'notes', 'user').        │
+│                                                    [default: user]                     │
+│ --silence-threshold  -s                     FLOAT  Seconds of silence to end a speech  │
+│                                                    segment.                            │
+│                                                    [default: 1.0]                      │
+│ --min-segment        -m                     FLOAT  Minimum speech duration in seconds  │
+│                                                    to trigger a segment.               │
+│                                                    [default: 0.25]                     │
+│ --vad-threshold                             FLOAT  VAD speech detection threshold      │
+│                                                    (0.0-1.0). Higher = more aggressive │
+│                                                    filtering.                          │
+│                                                    [default: 0.3]                      │
+│ --save-audio             --no-save-audio           Save audio segments as MP3 files.   │
+│                                                    [default: save-audio]               │
+│ --audio-dir                                 PATH   Directory for MP3 files. Default:   │
+│                                                    ~/.config/agent-cli/audio           │
+│ --transcription-log  -t                     PATH   JSON Lines log file path. Default:  │
+│                                                    ~/.config/agent-cli/transcriptions… │
+│ --clipboard              --no-clipboard            Copy each transcription to          │
+│                                                    clipboard.                          │
+│                                                    [default: no-clipboard]             │
+│ --help               -h                            Show this message and exit.         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --llm    --no-llm      Use an LLM to process the transcript.                           │
+│                        [default: no-llm]                                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -890,89 +851,82 @@ uv tool install "agent-cli[vad]"
 
  Convert text to speech using Wyoming or OpenAI-compatible TTS server.
 
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --output-device-index        INTEGER  Index of the audio output device to    │
-│                                       use for TTS.                           │
-│ --output-device-name         TEXT     Output device name keywords for        │
-│                                       partial matching.                      │
-│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, │
-│                                       2.0 = twice as fast, 0.5 = half        │
-│                                       speed).                                │
-│                                       [default: 1.0]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the  │
-│                               GEMINI_API_KEY environment variable.           │
-│                               [env var: GEMINI_API_KEY]                      │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --list-devices          List available audio input and output devices and    │
-│                         exit.                                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.             │
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --output-device-index        INTEGER  Index of the audio output device to use for TTS. │
+│ --output-device-name         TEXT     Output device name keywords for partial          │
+│                                       matching.                                        │
+│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, 2.0 =     │
+│                                       twice as fast, 0.5 = half speed).                │
+│                                       [default: 1.0]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the            │
+│                               GEMINI_API_KEY environment variable.                     │
+│                               [env var: GEMINI_API_KEY]                                │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --list-devices          List available audio input and output devices and exit.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.                       │
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1013,8 +967,7 @@ uv tool install "agent-cli[vad]"
 
  Usage: agent-cli voice-edit [OPTIONS]
 
- Interact with clipboard text via a voice command using local or remote
- services.
+ Interact with clipboard text via a voice command using local or remote services.
 
  Usage:
 
@@ -1025,139 +978,124 @@ uv tool install "agent-cli[vad]"
   • List output devices: agent-cli voice-edit --list-output-devices
   • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: whisper-1]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV     │
-│                                           file.                              │
-│ --clipboard       --no-clipboard          Copy result to clipboard.          │
-│                                           [default: clipboard]               │
-│ --log-level                         TEXT  Set logging level.                 │
-│                                           [default: WARNING]                 │
-│ --log-file                          TEXT  Path to a file to write logs to.   │
-│ --quiet       -q                          Suppress console output from rich. │
-│ --config                            TEXT  Path to a TOML configuration file. │
-│ --print-args                              Print the command line arguments,  │
-│                                           including variables taken from the │
-│                                           configuration file.                │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
+│                                 [default: whisper-1]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV file.         │
+│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
+│                                           [default: clipboard]                         │
+│ --log-level                         TEXT  Set logging level.                           │
+│                                           [default: WARNING]                           │
+│ --log-file                          TEXT  Path to a file to write logs to.             │
+│ --quiet       -q                          Suppress console output from rich.           │
+│ --config                            TEXT  Path to a TOML configuration file.           │
+│ --print-args                              Print the command line arguments, including  │
+│                                           variables taken from the configuration file. │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1203,148 +1141,133 @@ uv tool install "agent-cli[vad]"
 
  Wake word-based voice assistant using local or remote services.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Wake Word ──────────────────────────────────────────────────────────────────╮
-│ --wake-server-ip          TEXT     Wyoming wake word server IP address.      │
-│                                    [default: localhost]                      │
-│ --wake-server-port        INTEGER  Wyoming wake word server port.            │
-│                                    [default: 10400]                          │
-│ --wake-word               TEXT     Name of wake word to detect (e.g.,        │
-│                                    'ok_nabu', 'hey_jarvis').                 │
-│                                    [default: ok_nabu]                        │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: whisper-1]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV     │
-│                                           file.                              │
-│ --clipboard       --no-clipboard          Copy result to clipboard.          │
-│                                           [default: clipboard]               │
-│ --log-level                         TEXT  Set logging level.                 │
-│                                           [default: WARNING]                 │
-│ --log-file                          TEXT  Path to a file to write logs to.   │
-│ --quiet       -q                          Suppress console output from rich. │
-│ --config                            TEXT  Path to a TOML configuration file. │
-│ --print-args                              Print the command line arguments,  │
-│                                           including variables taken from the │
-│                                           configuration file.                │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
+│ --wake-server-ip          TEXT     Wyoming wake word server IP address.                │
+│                                    [default: localhost]                                │
+│ --wake-server-port        INTEGER  Wyoming wake word server port.                      │
+│                                    [default: 10400]                                    │
+│ --wake-word               TEXT     Name of wake word to detect (e.g., 'ok_nabu',       │
+│                                    'hey_jarvis').                                      │
+│                                    [default: ok_nabu]                                  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
+│                                 [default: whisper-1]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV file.         │
+│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
+│                                           [default: clipboard]                         │
+│ --log-level                         TEXT  Set logging level.                           │
+│                                           [default: WARNING]                           │
+│ --log-file                          TEXT  Path to a file to write logs to.             │
+│ --quiet       -q                          Suppress console output from rich.           │
+│ --config                            TEXT  Path to a TOML configuration file.           │
+│ --print-args                              Print the command line arguments, including  │
+│                                           variables taken from the configuration file. │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1397,178 +1320,158 @@ uv tool install "agent-cli[vad]"
 
  An chat agent that you can talk to.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ History Options ────────────────────────────────────────────────────────────╮
-│ --history-dir            PATH     Directory to store conversation history.   │
-│                                   [default: ~/.config/agent-cli/history]     │
-│ --last-n-messages        INTEGER  Number of messages to include in the       │
-│                                   conversation history. Set to 0 to disable  │
-│                                   history.                                   │
-│                                   [default: 50]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Options ─────────────────────────────────────────────────────────────╮
-│ --advanced-memory       --no-advanced-memo…             Use advanced         │
-│                                                         vector-backed memory │
-│                                                         with semantic        │
-│                                                         search. Auto-falls   │
-│                                                         back to simple       │
-│                                                         memory if            │
-│                                                         dependencies not     │
-│                                                         installed.           │
-│                                                         [default:            │
-│                                                         advanced-memory]     │
-│ --memory-path                                  PATH     Path for advanced    │
-│                                                         memory database      │
-│                                                         storage. Default:    │
-│                                                         ~/.config/agent-cli… │
-│ --memory-embedding-…                           TEXT     Embedding model for  │
-│                                                         semantic memory      │
-│                                                         search.              │
-│                                                         [default:            │
-│                                                         text-embedding-3-sm… │
-│ --memory-top-k                                 INTEGER  Number of memories   │
-│                                                         to retrieve per      │
-│                                                         search.              │
-│                                                         [default: 5]         │
-│ --memory-score-thre…                           FLOAT    Minimum relevance    │
-│                                                         score threshold for  │
-│                                                         memory retrieval     │
-│                                                         (0.0-1.0).           │
-│                                                         [default: 0.35]      │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.             │
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ History Options ──────────────────────────────────────────────────────────────────────╮
+│ --history-dir            PATH     Directory to store conversation history.             │
+│                                   [default: ~/.config/agent-cli/history]               │
+│ --last-n-messages        INTEGER  Number of messages to include in the conversation    │
+│                                   history. Set to 0 to disable history.                │
+│                                   [default: 50]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ───────────────────────────────────────────────────────────────────────╮
+│ --advanced-memory           --no-advanced-memory             Use advanced              │
+│                                                              vector-backed memory with │
+│                                                              semantic search.          │
+│                                                              Auto-falls back to simple │
+│                                                              memory if dependencies    │
+│                                                              not installed.            │
+│                                                              [default:                 │
+│                                                              advanced-memory]          │
+│ --memory-path                                       PATH     Path for advanced memory  │
+│                                                              database storage.         │
+│                                                              Default:                  │
+│                                                              ~/.config/agent-cli/memo… │
+│ --memory-embedding-model                            TEXT     Embedding model for       │
+│                                                              semantic memory search.   │
+│                                                              [default:                 │
+│                                                              text-embedding-3-small]   │
+│ --memory-top-k                                      INTEGER  Number of memories to     │
+│                                                              retrieve per search.      │
+│                                                              [default: 5]              │
+│ --memory-score-threshold                            FLOAT    Minimum relevance score   │
+│                                                              threshold for memory      │
+│                                                              retrieval (0.0-1.0).      │
+│                                                              [default: 0.35]           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.                       │
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1616,52 +1519,49 @@ uv tool install "agent-cli[vad]"
  Start the RAG (Retrieval-Augmented Generation) Proxy Server.
 
  This server watches a folder for documents, indexes them, and provides an
- OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
- injecting relevant context from the documents.
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ RAG Configuration ──────────────────────────────────────────────────────────╮
-│ --docs-folder                      PATH     Folder to watch for documents    │
-│                                             [default: ./rag_docs]            │
-│ --chroma-path                      PATH     Path to ChromaDB persistence     │
-│                                             directory                        │
-│                                             [default: ./rag_db]              │
-│ --limit                            INTEGER  Number of document chunks to     │
-│                                             retrieve per query.              │
-│                                             [default: 3]                     │
-│ --rag-tools      --no-rag-tools             Allow agent to fetch full        │
-│                                             documents when snippets are      │
-│                                             insufficient.                    │
-│                                             [default: rag-tools]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
-│                                (e.g., for llama-server:                      │
-│                                http://localhost:8080/v1).                    │
-│                                [env var: OPENAI_BASE_URL]                    │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
-│                                OPENAI_API_KEY environment variable.          │
-│                                [env var: OPENAI_API_KEY]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
-│                                [default: text-embedding-3-small]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ───────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                       │
-│                        [default: 0.0.0.0]                                    │
-│ --port        INTEGER  Port to bind to                                       │
-│                        [default: 8000]                                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                 │
-│                           [default: INFO]                                    │
-│ --config            TEXT  Path to a TOML configuration file.                 │
-│ --print-args              Print the command line arguments, including        │
-│                           variables taken from the configuration file.       │
-╰──────────────────────────────────────────────────────────────────────────────╯
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
+ relevant context from the documents.
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
+│ --docs-folder                      PATH     Folder to watch for documents              │
+│                                             [default: ./rag_docs]                      │
+│ --chroma-path                      PATH     Path to ChromaDB persistence directory     │
+│                                             [default: ./rag_db]                        │
+│ --limit                            INTEGER  Number of document chunks to retrieve per  │
+│                                             query.                                     │
+│                                             [default: 3]                               │
+│ --rag-tools      --no-rag-tools             Allow agent to fetch full documents when   │
+│                                             snippets are insufficient.                 │
+│                                             [default: rag-tools]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
+│                                llama-server: http://localhost:8080/v1).                │
+│                                [env var: OPENAI_BASE_URL]                              │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
+│                                OPENAI_API_KEY environment variable.                    │
+│                                [env var: OPENAI_API_KEY]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
+│                                [default: text-embedding-3-small]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                                 │
+│                        [default: 0.0.0.0]                                              │
+│ --port        INTEGER  Port to bind to                                                 │
+│                        [default: 8000]                                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                           │
+│                           [default: INFO]                                              │
+│ --config            TEXT  Path to a TOML configuration file.                           │
+│ --print-args              Print the command line arguments, including variables taken  │
+│                           from the configuration file.                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1717,107 +1617,91 @@ The `memory proxy` command is the core feature—a middleware server that gives
 
  Start the memory-backed chat proxy server.
 
- This server acts as a middleware between your chat client (e.g., a web UI,
- CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
- Ollama, vLLM).
+ This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
+ plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
 
  Key Features:
 
-  • Simple Markdown Files: Memories are stored as human-readable Markdown
-    files, serving as the ultimate source of truth.
-  • Automatic Version Control: Built-in Git integration automatically commits
-    changes, providing a full history of memory evolution.
-  • Lightweight & Local: Minimal dependencies and runs entirely on your
-    machine.
-  • Proxy Middleware: Works transparently with any OpenAI-compatible
-    /chat/completions endpoint.
+  • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
+    as the ultimate source of truth.
+  • Automatic Version Control: Built-in Git integration automatically commits changes,
+    providing a full history of memory evolution.
+  • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
+  • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
+    endpoint.
 
  How it works:
 
   1 Intercepts POST /v1/chat/completions requests.
-  2 Retrieves relevant memories (facts, previous conversations) from a local
-    vector database (ChromaDB) based on the user's query.
+  2 Retrieves relevant memories (facts, previous conversations) from a local vector
+    database (ChromaDB) based on the user's query.
   3 Injects these memories into the system prompt.
   4 Forwards the augmented request to the real LLM (--openai-base-url).
-  5 Extracts new facts from the conversation in the background and updates the
-    long-term memory store (including handling contradictions).
-
- Use this to give "long-term memory" to any OpenAI-compatible application.
- Point your client's base URL to http://localhost:8100/v1.
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ───────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory      │
-│                                                      store (files + derived  │
-│                                                      vector index).          │
-│                                                      [default: ./memory_db]  │
-│ --default-top-k                             INTEGER  Number of memory        │
-│                                                      entries to retrieve per │
-│                                                      query.                  │
-│                                                      [default: 5]            │
-│ --max-entries                               INTEGER  Maximum stored memory   │
-│                                                      entries per             │
-│                                                      conversation (excluding │
-│                                                      summary).               │
-│                                                      [default: 500]          │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
-│                                                      higher favors           │
-│                                                      relevance, lower favors │
-│                                                      diversity.              │
-│                                                      [default: 0.7]          │
-│ --recency-weight                            FLOAT    Recency score weight    │
-│                                                      (0.0-1.0). Controls     │
-│                                                      freshness vs.           │
-│                                                      relevance. Default 0.2  │
-│                                                      (20% recency, 80%       │
-│                                                      semantic relevance).    │
-│                                                      [default: 0.2]          │
-│ --score-threshold                           FLOAT    Minimum semantic        │
-│                                                      relevance threshold     │
-│                                                      (0.0-1.0). Memories     │
-│                                                      below this score are    │
-│                                                      discarded to reduce     │
-│                                                      noise.                  │
-│                                                      [default: 0.35]         │
-│ --summarization      --no-summarization              Enable automatic fact   │
-│                                                      extraction and          │
-│                                                      summaries.              │
-│                                                      [default:               │
-│                                                      summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git    │
-│                                                      commit of memory        │
-│                                                      changes.                │
-│                                                      [default:               │
-│                                                      git-versioning]         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
-│                                (e.g., for llama-server:                      │
-│                                http://localhost:8080/v1).                    │
-│                                [env var: OPENAI_BASE_URL]                    │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
-│                                OPENAI_API_KEY environment variable.          │
-│                                [env var: OPENAI_API_KEY]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
-│                                [default: text-embedding-3-small]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ───────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                       │
-│                        [default: 0.0.0.0]                                    │
-│ --port        INTEGER  Port to bind to                                       │
-│                        [default: 8100]                                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                 │
-│                           [default: INFO]                                    │
-│ --config            TEXT  Path to a TOML configuration file.                 │
-│ --print-args              Print the command line arguments, including        │
-│                           variables taken from the configuration file.       │
-╰──────────────────────────────────────────────────────────────────────────────╯
+  5 Extracts new facts from the conversation in the background and updates the long-term
+    memory store (including handling contradictions).
+
+ Use this to give "long-term memory" to any OpenAI-compatible application. Point your
+ client's base URL to http://localhost:8100/v1.
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
+│ --memory-path                               PATH     Path to the memory store (files + │
+│                                                      derived vector index).            │
+│                                                      [default: ./memory_db]            │
+│ --default-top-k                             INTEGER  Number of memory entries to       │
+│                                                      retrieve per query.               │
+│                                                      [default: 5]                      │
+│ --max-entries                               INTEGER  Maximum stored memory entries per │
+│                                                      conversation (excluding summary). │
+│                                                      [default: 500]                    │
+│ --mmr-lambda                                FLOAT    MMR lambda (0-1): higher favors   │
+│                                                      relevance, lower favors           │
+│                                                      diversity.                        │
+│                                                      [default: 0.7]                    │
+│ --recency-weight                            FLOAT    Recency score weight (0.0-1.0).   │
+│                                                      Controls freshness vs. relevance. │
+│                                                      Default 0.2 (20% recency, 80%     │
+│                                                      semantic relevance).              │
+│                                                      [default: 0.2]                    │
+│ --score-threshold                           FLOAT    Minimum semantic relevance        │
+│                                                      threshold (0.0-1.0). Memories     │
+│                                                      below this score are discarded to │
+│                                                      reduce noise.                     │
+│                                                      [default: 0.35]                   │
+│ --summarization      --no-summarization              Enable automatic fact extraction  │
+│                                                      and summaries.                    │
+│                                                      [default: summarization]          │
+│ --git-versioning     --no-git-versioning             Enable automatic git commit of    │
+│                                                      memory changes.                   │
+│                                                      [default: git-versioning]         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
+│                                llama-server: http://localhost:8080/v1).                │
+│                                [env var: OPENAI_BASE_URL]                              │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
+│                                OPENAI_API_KEY environment variable.                    │
+│                                [env var: OPENAI_API_KEY]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
+│                                [default: text-embedding-3-small]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                                 │
+│                        [default: 0.0.0.0]                                              │
+│ --port        INTEGER  Port to bind to                                                 │
+│                        [default: 8100]                                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                           │
+│                           [default: INFO]                                              │
+│ --config            TEXT  Path to a TOML configuration file.                           │
+│ --print-args              Print the command line arguments, including variables taken  │
+│                           from the configuration file.                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1868,11 +1752,11 @@ agent-cli memory add -c work "Project deadline is Friday"
 
  Add memories directly without LLM extraction.
 
- This writes facts directly to the memory store, bypassing the LLM-based fact
- extraction. Useful for bulk imports or seeding memories.
+ This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
+ Useful for bulk imports or seeding memories.
 
- The memory proxy file watcher (if running) will auto-index new files.
- Otherwise, they'll be indexed on next memory proxy startup.
+ The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
+ be indexed on next memory proxy startup.
 
  Examples::
 
@@ -1893,35 +1777,29 @@ agent-cli memory add -c work "Project deadline is Friday"
   agent-cli memory add -c work "Project deadline is Friday"
 
 
-╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   memories      [MEMORIES]...  Memories to add. Each argument becomes one    │
-│                                fact.                                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. │
-│                                                     Use '-' for stdin.       │
-│                                                     Supports JSON array,     │
-│                                                     JSON object with         │
-│                                                     'memories' key, or plain │
-│                                                     text (one per line).     │
-│ --conversation-id  -c                         TEXT  Conversation ID to add   │
-│                                                     memories to.             │
-│                                                     [default: default]       │
-│ --memory-path                                 PATH  Path to the memory       │
-│                                                     store.                   │
-│                                                     [default: ./memory_db]   │
-│ --git-versioning       --no-git-versioning          Commit changes to git.   │
-│                                                     [default:                │
-│                                                     git-versioning]          │
-│ --help             -h                               Show this message and    │
-│                                                     exit.                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
+│   memories      [MEMORIES]...  Memories to add. Each argument becomes one fact.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --file             -f                         PATH  Read memories from file. Use '-'   │
+│                                                     for stdin. Supports JSON array,    │
+│                                                     JSON object with 'memories' key,   │
+│                                                     or plain text (one per line).      │
+│ --conversation-id  -c                         TEXT  Conversation ID to add memories    │
+│                                                     to.                                │
+│                                                     [default: default]                 │
+│ --memory-path                                 PATH  Path to the memory store.          │
+│                                                     [default: ./memory_db]             │
+│ --git-versioning       --no-git-versioning          Commit changes to git.             │
+│                                                     [default: git-versioning]          │
+│ --help             -h                               Show this message and exit.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
diff --git a/docs/architecture/memory.md b/docs/architecture/memory.md
index f2cb3600f..3d804bfb0 100644
--- a/docs/architecture/memory.md
+++ b/docs/architecture/memory.md
@@ -39,7 +39,8 @@ A local-first system that gives LLMs persistent memory across conversations, wit
 
 ### Related
 
-- [memory command](../commands/memory.md) - How to run the memory proxy and add memories
+- [chat command](../commands/chat.md) - Voice-based chat agent with integrated advanced memory
+- [memory command](../commands/memory.md) - Memory proxy server for any OpenAI-compatible app
 - [Configuration](../configuration.md) - Config file keys and defaults
 - [RAG System Architecture](rag.md) - Related retrieval stack for documents
 - [rag-proxy command](../commands/rag-proxy.md) - Document retrieval server
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index 8bface657..7d34fc14e 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -189,19 +189,43 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 
 <!-- OUTPUT:END -->
 
-## Available Tools
+## Memory System
 
-The chat agent has access to tools that let it interact with your system:
+The chat agent includes a built-in long-term memory system that allows it to remember information across conversations.
+
+### Advanced Memory (Default)
+
+By default, the chat agent uses the **advanced vector-backed memory system** with semantic search. This provides:
+
+- **Semantic search**: Find relevant memories based on meaning, not just keywords
+- **Recency-aware scoring**: Recent memories are weighted higher
+- **Diversity selection (MMR)**: Avoids redundant memories in context
+- **Automatic reconciliation**: Contradicting facts are updated, not duplicated
 
 > [!NOTE]
-> The memory tools below use a simple, built-in JSON storage system.
-> For the advanced, vector-backed memory system, see the [`memory`](memory.md) command.
+> Advanced memory requires the `[memory]` extra: `pip install "agent-cli[memory]"`.
+> If not installed, the system automatically falls back to simple JSON storage with a warning.
+
+To disable advanced memory and use the simple JSON system:
+```bash
+agent-cli chat --no-advanced-memory
+```
+
+For more details on how the memory system works, see [Memory System Architecture](../architecture/memory.md).
+
+### Simple Memory Fallback
+
+When advanced memory is disabled or unavailable, the agent uses a simple JSON-based storage system with text matching.
+
+## Available Tools
+
+The chat agent has access to tools that let it interact with your system:
 
 - **read_file**: Read file contents
 - **execute_code**: Run a single command (no shell features like pipes or redirects)
 - **duckduckgo_search**: Search the web via DuckDuckGo
-- **add_memory**: Store information for future conversations
-- **search_memory**: Search stored memories
+- **add_memory**: Store information for future conversations (uses [advanced memory](../architecture/memory.md) when enabled)
+- **search_memory**: Search stored memories with semantic search
 - **update_memory**: Update existing memories
 - **list_all_memories**: List all stored memories
 - **list_memory_categories**: Show memory category summary
diff --git a/docs/commands/memory.md b/docs/commands/memory.md
index 5ff97a459..9d97a53a0 100644
--- a/docs/commands/memory.md
+++ b/docs/commands/memory.md
@@ -221,6 +221,8 @@ See [Memory System Architecture](../architecture/memory.md) for the full schema
 
 ## Related
 
+- [chat command](chat.md) - Voice-based chat agent with integrated advanced memory
+- [Memory System Architecture](../architecture/memory.md) - Full technical specification
 - [Configuration](../configuration.md) - Config file keys for memory proxy defaults
 - [rag-proxy](rag-proxy.md) - Document RAG proxy server (contrast with memory)
 - [RAG System Architecture](../architecture/rag.md) - How RAG indexing and retrieval works

From 41c2f41048877c26eff206a1e1f7ba21f3449a11 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 4 Jan 2026 08:05:42 +0000
Subject: [PATCH 04/20] Update auto-generated docs

---
 README.md | 1858 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 990 insertions(+), 868 deletions(-)

diff --git a/README.md b/README.md
index f3f2572d3..d9f5827bb 100644
--- a/README.md
+++ b/README.md
@@ -391,14 +391,14 @@ agent-cli config edit
 
  Manage agent-cli configuration files.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
-│ init   Create a new config file with all options commented out.                        │
-│ edit   Open the config file in your default editor.                                    │
-│ show   Display the config file location and contents.                                  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ───────────────────────────────────────────────────────────────────╮
+│ init   Create a new config file with all options commented out.              │
+│ edit   Open the config file in your default editor.                          │
+│ show   Display the config file location and contents.                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -457,49 +457,53 @@ the `[defaults]` section of your configuration file.
 
  Correct text from clipboard using a local or remote LLM.
 
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  The text to correct. If not provided, reads from clipboard.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  The text to correct. If not provided, reads from         │
+│                     clipboard.                                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -544,102 +548,120 @@ the `[defaults]` section of your configuration file.
 
  Wyoming ASR Client for streaming microphone audio to a transcription server.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --extra-instructions                TEXT  Additional instructions for the LLM to       │
-│                                           process the transcription.                   │
-│ --llm                   --no-llm          Use an LLM to process the transcript.        │
-│                                           [default: no-llm]                            │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
-│ --from-file                                PATH     Transcribe audio from a saved WAV  │
-│                                                     file instead of recording.         │
-│ --last-recording                           INTEGER  Transcribe a saved recording. Use  │
-│                                                     1 for most recent, 2 for           │
-│                                                     second-to-last, etc. Use 0 to      │
-│                                                     disable (default).                 │
-│                                                     [default: 0]                       │
-│ --save-recording    --no-save-recording             Save the audio recording to disk   │
-│                                                     for recovery.                      │
-│                                                     [default: save-recording]          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --clipboard              --no-clipboard          Copy result to clipboard.             │
-│                                                  [default: clipboard]                  │
-│ --log-level                                TEXT  Set logging level.                    │
-│                                                  [default: WARNING]                    │
-│ --log-file                                 TEXT  Path to a file to write logs to.      │
-│ --quiet              -q                          Suppress console output from rich.    │
-│ --config                                   TEXT  Path to a TOML configuration file.    │
-│ --print-args                                     Print the command line arguments,     │
-│                                                  including variables taken from the    │
-│                                                  configuration file.                   │
-│ --transcription-log                        PATH  Path to log transcription results     │
-│                                                  with timestamps, hostname, model, and │
-│                                                  raw output.                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --extra-instructions                TEXT  Additional instructions for the    │
+│                                           LLM to process the transcription.  │
+│ --llm                   --no-llm          Use an LLM to process the          │
+│                                           transcript.                        │
+│                                           [default: no-llm]                  │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Recovery ─────────────────────────────────────────────────────────────╮
+│ --from-file                                PATH     Transcribe audio from a  │
+│                                                     saved WAV file instead   │
+│                                                     of recording.            │
+│ --last-recording                           INTEGER  Transcribe a saved       │
+│                                                     recording. Use 1 for     │
+│                                                     most recent, 2 for       │
+│                                                     second-to-last, etc. Use │
+│                                                     0 to disable (default).  │
+│                                                     [default: 0]             │
+│ --save-recording    --no-save-recording             Save the audio recording │
+│                                                     to disk for recovery.    │
+│                                                     [default:                │
+│                                                     save-recording]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --clipboard              --no-clipboard          Copy result to clipboard.   │
+│                                                  [default: clipboard]        │
+│ --log-level                                TEXT  Set logging level.          │
+│                                                  [default: WARNING]          │
+│ --log-file                                 TEXT  Path to a file to write     │
+│                                                  logs to.                    │
+│ --quiet              -q                          Suppress console output     │
+│                                                  from rich.                  │
+│ --config                                   TEXT  Path to a TOML              │
+│                                                  configuration file.         │
+│ --print-args                                     Print the command line      │
+│                                                  arguments, including        │
+│                                                  variables taken from the    │
+│                                                  configuration file.         │
+│ --transcription-log                        PATH  Path to log transcription   │
+│                                                  results with timestamps,    │
+│                                                  hostname, model, and raw    │
+│                                                  output.                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -696,8 +718,9 @@ uv tool install "agent-cli[vad]"
 
  Run a continuous transcription daemon with voice activity detection.
 
- This command runs indefinitely, capturing audio from your microphone, detecting speech
- segments using Silero VAD, transcribing them, and logging results with timestamps.
+ This command runs indefinitely, capturing audio from your microphone,
+ detecting speech segments using Silero VAD, transcribing them, and logging
+ results with timestamps.
 
  Examples: # Basic daemon agent-cli transcribe-daemon
 
@@ -709,105 +732,121 @@ uv tool install "agent-cli[vad]"
   agent-cli transcribe-daemon --llm --role notes
 
   # Custom log file and audio directory
-  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
-
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --role               -r                     TEXT   Role name for logging (e.g.,        │
-│                                                    'meeting', 'notes', 'user').        │
-│                                                    [default: user]                     │
-│ --silence-threshold  -s                     FLOAT  Seconds of silence to end a speech  │
-│                                                    segment.                            │
-│                                                    [default: 1.0]                      │
-│ --min-segment        -m                     FLOAT  Minimum speech duration in seconds  │
-│                                                    to trigger a segment.               │
-│                                                    [default: 0.25]                     │
-│ --vad-threshold                             FLOAT  VAD speech detection threshold      │
-│                                                    (0.0-1.0). Higher = more aggressive │
-│                                                    filtering.                          │
-│                                                    [default: 0.3]                      │
-│ --save-audio             --no-save-audio           Save audio segments as MP3 files.   │
-│                                                    [default: save-audio]               │
-│ --audio-dir                                 PATH   Directory for MP3 files. Default:   │
-│                                                    ~/.config/agent-cli/audio           │
-│ --transcription-log  -t                     PATH   JSON Lines log file path. Default:  │
-│                                                    ~/.config/agent-cli/transcriptions… │
-│ --clipboard              --no-clipboard            Copy each transcription to          │
-│                                                    clipboard.                          │
-│                                                    [default: no-clipboard]             │
-│ --help               -h                            Show this message and exit.         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --llm    --no-llm      Use an LLM to process the transcript.                           │
-│                        [default: no-llm]                                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir
+  ~/audio
+
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --role               -r                     TEXT   Role name for logging     │
+│                                                    (e.g., 'meeting',         │
+│                                                    'notes', 'user').         │
+│                                                    [default: user]           │
+│ --silence-threshold  -s                     FLOAT  Seconds of silence to end │
+│                                                    a speech segment.         │
+│                                                    [default: 1.0]            │
+│ --min-segment        -m                     FLOAT  Minimum speech duration   │
+│                                                    in seconds to trigger a   │
+│                                                    segment.                  │
+│                                                    [default: 0.25]           │
+│ --vad-threshold                             FLOAT  VAD speech detection      │
+│                                                    threshold (0.0-1.0).      │
+│                                                    Higher = more aggressive  │
+│                                                    filtering.                │
+│                                                    [default: 0.3]            │
+│ --save-audio             --no-save-audio           Save audio segments as    │
+│                                                    MP3 files.                │
+│                                                    [default: save-audio]     │
+│ --audio-dir                                 PATH   Directory for MP3 files.  │
+│                                                    Default:                  │
+│                                                    ~/.config/agent-cli/audio │
+│ --transcription-log  -t                     PATH   JSON Lines log file path. │
+│                                                    Default:                  │
+│                                                    ~/.config/agent-cli/tran… │
+│ --clipboard              --no-clipboard            Copy each transcription   │
+│                                                    to clipboard.             │
+│                                                    [default: no-clipboard]   │
+│ --help               -h                            Show this message and     │
+│                                                    exit.                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --llm    --no-llm      Use an LLM to process the transcript.                 │
+│                        [default: no-llm]                                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -851,82 +890,89 @@ uv tool install "agent-cli[vad]"
 
  Convert text to speech using Wyoming or OpenAI-compatible TTS server.
 
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --output-device-index        INTEGER  Index of the audio output device to use for TTS. │
-│ --output-device-name         TEXT     Output device name keywords for partial          │
-│                                       matching.                                        │
-│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, 2.0 =     │
-│                                       twice as fast, 0.5 = half speed).                │
-│                                       [default: 1.0]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the            │
-│                               GEMINI_API_KEY environment variable.                     │
-│                               [env var: GEMINI_API_KEY]                                │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --list-devices          List available audio input and output devices and exit.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.                       │
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --output-device-index        INTEGER  Index of the audio output device to    │
+│                                       use for TTS.                           │
+│ --output-device-name         TEXT     Output device name keywords for        │
+│                                       partial matching.                      │
+│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, │
+│                                       2.0 = twice as fast, 0.5 = half        │
+│                                       speed).                                │
+│                                       [default: 1.0]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the  │
+│                               GEMINI_API_KEY environment variable.           │
+│                               [env var: GEMINI_API_KEY]                      │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --list-devices          List available audio input and output devices and    │
+│                         exit.                                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.             │
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -967,7 +1013,8 @@ uv tool install "agent-cli[vad]"
 
  Usage: agent-cli voice-edit [OPTIONS]
 
- Interact with clipboard text via a voice command using local or remote services.
+ Interact with clipboard text via a voice command using local or remote
+ services.
 
  Usage:
 
@@ -978,124 +1025,139 @@ uv tool install "agent-cli[vad]"
   • List output devices: agent-cli voice-edit --list-output-devices
   • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
-│                                 [default: whisper-1]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV file.         │
-│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
-│                                           [default: clipboard]                         │
-│ --log-level                         TEXT  Set logging level.                           │
-│                                           [default: WARNING]                           │
-│ --log-file                          TEXT  Path to a file to write logs to.             │
-│ --quiet       -q                          Suppress console output from rich.           │
-│ --config                            TEXT  Path to a TOML configuration file.           │
-│ --print-args                              Print the command line arguments, including  │
-│                                           variables taken from the configuration file. │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: whisper-1]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV     │
+│                                           file.                              │
+│ --clipboard       --no-clipboard          Copy result to clipboard.          │
+│                                           [default: clipboard]               │
+│ --log-level                         TEXT  Set logging level.                 │
+│                                           [default: WARNING]                 │
+│ --log-file                          TEXT  Path to a file to write logs to.   │
+│ --quiet       -q                          Suppress console output from rich. │
+│ --config                            TEXT  Path to a TOML configuration file. │
+│ --print-args                              Print the command line arguments,  │
+│                                           including variables taken from the │
+│                                           configuration file.                │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1141,133 +1203,148 @@ uv tool install "agent-cli[vad]"
 
  Wake word-based voice assistant using local or remote services.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
-│ --wake-server-ip          TEXT     Wyoming wake word server IP address.                │
-│                                    [default: localhost]                                │
-│ --wake-server-port        INTEGER  Wyoming wake word server port.                      │
-│                                    [default: 10400]                                    │
-│ --wake-word               TEXT     Name of wake word to detect (e.g., 'ok_nabu',       │
-│                                    'hey_jarvis').                                      │
-│                                    [default: ok_nabu]                                  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
-│                                 [default: whisper-1]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV file.         │
-│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
-│                                           [default: clipboard]                         │
-│ --log-level                         TEXT  Set logging level.                           │
-│                                           [default: WARNING]                           │
-│ --log-file                          TEXT  Path to a file to write logs to.             │
-│ --quiet       -q                          Suppress console output from rich.           │
-│ --config                            TEXT  Path to a TOML configuration file.           │
-│ --print-args                              Print the command line arguments, including  │
-│                                           variables taken from the configuration file. │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Wake Word ──────────────────────────────────────────────────────────────────╮
+│ --wake-server-ip          TEXT     Wyoming wake word server IP address.      │
+│                                    [default: localhost]                      │
+│ --wake-server-port        INTEGER  Wyoming wake word server port.            │
+│                                    [default: 10400]                          │
+│ --wake-word               TEXT     Name of wake word to detect (e.g.,        │
+│                                    'ok_nabu', 'hey_jarvis').                 │
+│                                    [default: ok_nabu]                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: whisper-1]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV     │
+│                                           file.                              │
+│ --clipboard       --no-clipboard          Copy result to clipboard.          │
+│                                           [default: clipboard]               │
+│ --log-level                         TEXT  Set logging level.                 │
+│                                           [default: WARNING]                 │
+│ --log-file                          TEXT  Path to a file to write logs to.   │
+│ --quiet       -q                          Suppress console output from rich. │
+│ --config                            TEXT  Path to a TOML configuration file. │
+│ --print-args                              Print the command line arguments,  │
+│                                           including variables taken from the │
+│                                           configuration file.                │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1320,158 +1397,178 @@ uv tool install "agent-cli[vad]"
 
  An chat agent that you can talk to.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ History Options ──────────────────────────────────────────────────────────────────────╮
-│ --history-dir            PATH     Directory to store conversation history.             │
-│                                   [default: ~/.config/agent-cli/history]               │
-│ --last-n-messages        INTEGER  Number of messages to include in the conversation    │
-│                                   history. Set to 0 to disable history.                │
-│                                   [default: 50]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Options ───────────────────────────────────────────────────────────────────────╮
-│ --advanced-memory           --no-advanced-memory             Use advanced              │
-│                                                              vector-backed memory with │
-│                                                              semantic search.          │
-│                                                              Auto-falls back to simple │
-│                                                              memory if dependencies    │
-│                                                              not installed.            │
-│                                                              [default:                 │
-│                                                              advanced-memory]          │
-│ --memory-path                                       PATH     Path for advanced memory  │
-│                                                              database storage.         │
-│                                                              Default:                  │
-│                                                              ~/.config/agent-cli/memo… │
-│ --memory-embedding-model                            TEXT     Embedding model for       │
-│                                                              semantic memory search.   │
-│                                                              [default:                 │
-│                                                              text-embedding-3-small]   │
-│ --memory-top-k                                      INTEGER  Number of memories to     │
-│                                                              retrieve per search.      │
-│                                                              [default: 5]              │
-│ --memory-score-threshold                            FLOAT    Minimum relevance score   │
-│                                                              threshold for memory      │
-│                                                              retrieval (0.0-1.0).      │
-│                                                              [default: 0.35]           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.                       │
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ History Options ────────────────────────────────────────────────────────────╮
+│ --history-dir            PATH     Directory to store conversation history.   │
+│                                   [default: ~/.config/agent-cli/history]     │
+│ --last-n-messages        INTEGER  Number of messages to include in the       │
+│                                   conversation history. Set to 0 to disable  │
+│                                   history.                                   │
+│                                   [default: 50]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --advanced-memory       --no-advanced-memo…             Use advanced         │
+│                                                         vector-backed memory │
+│                                                         with semantic        │
+│                                                         search. Auto-falls   │
+│                                                         back to simple       │
+│                                                         memory if            │
+│                                                         dependencies not     │
+│                                                         installed.           │
+│                                                         [default:            │
+│                                                         advanced-memory]     │
+│ --memory-path                                  PATH     Path for advanced    │
+│                                                         memory database      │
+│                                                         storage. Default:    │
+│                                                         ~/.config/agent-cli… │
+│ --memory-embedding-…                           TEXT     Embedding model for  │
+│                                                         semantic memory      │
+│                                                         search.              │
+│                                                         [default:            │
+│                                                         text-embedding-3-sm… │
+│ --memory-top-k                                 INTEGER  Number of memories   │
+│                                                         to retrieve per      │
+│                                                         search.              │
+│                                                         [default: 5]         │
+│ --memory-score-thre…                           FLOAT    Minimum relevance    │
+│                                                         score threshold for  │
+│                                                         memory retrieval     │
+│                                                         (0.0-1.0).           │
+│                                                         [default: 0.35]      │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.             │
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1519,49 +1616,52 @@ uv tool install "agent-cli[vad]"
  Start the RAG (Retrieval-Augmented Generation) Proxy Server.
 
  This server watches a folder for documents, indexes them, and provides an
- OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
- relevant context from the documents.
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
-│ --docs-folder                      PATH     Folder to watch for documents              │
-│                                             [default: ./rag_docs]                      │
-│ --chroma-path                      PATH     Path to ChromaDB persistence directory     │
-│                                             [default: ./rag_db]                        │
-│ --limit                            INTEGER  Number of document chunks to retrieve per  │
-│                                             query.                                     │
-│                                             [default: 3]                               │
-│ --rag-tools      --no-rag-tools             Allow agent to fetch full documents when   │
-│                                             snippets are insufficient.                 │
-│                                             [default: rag-tools]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
-│                                llama-server: http://localhost:8080/v1).                │
-│                                [env var: OPENAI_BASE_URL]                              │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
-│                                OPENAI_API_KEY environment variable.                    │
-│                                [env var: OPENAI_API_KEY]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
-│                                [default: text-embedding-3-small]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                                 │
-│                        [default: 0.0.0.0]                                              │
-│ --port        INTEGER  Port to bind to                                                 │
-│                        [default: 8000]                                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                           │
-│                           [default: INFO]                                              │
-│ --config            TEXT  Path to a TOML configuration file.                           │
-│ --print-args              Print the command line arguments, including variables taken  │
-│                           from the configuration file.                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
+ injecting relevant context from the documents.
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ RAG Configuration ──────────────────────────────────────────────────────────╮
+│ --docs-folder                      PATH     Folder to watch for documents    │
+│                                             [default: ./rag_docs]            │
+│ --chroma-path                      PATH     Path to ChromaDB persistence     │
+│                                             directory                        │
+│                                             [default: ./rag_db]              │
+│ --limit                            INTEGER  Number of document chunks to     │
+│                                             retrieve per query.              │
+│                                             [default: 3]                     │
+│ --rag-tools      --no-rag-tools             Allow agent to fetch full        │
+│                                             documents when snippets are      │
+│                                             insufficient.                    │
+│                                             [default: rag-tools]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
+│                                (e.g., for llama-server:                      │
+│                                http://localhost:8080/v1).                    │
+│                                [env var: OPENAI_BASE_URL]                    │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
+│                                OPENAI_API_KEY environment variable.          │
+│                                [env var: OPENAI_API_KEY]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ───────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                       │
+│                        [default: 0.0.0.0]                                    │
+│ --port        INTEGER  Port to bind to                                       │
+│                        [default: 8000]                                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                 │
+│                           [default: INFO]                                    │
+│ --config            TEXT  Path to a TOML configuration file.                 │
+│ --print-args              Print the command line arguments, including        │
+│                           variables taken from the configuration file.       │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1617,91 +1717,107 @@ The `memory proxy` command is the core feature—a middleware server that gives
 
  Start the memory-backed chat proxy server.
 
- This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
- plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
+ This server acts as a middleware between your chat client (e.g., a web UI,
+ CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
+ Ollama, vLLM).
 
  Key Features:
 
-  • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
-    as the ultimate source of truth.
-  • Automatic Version Control: Built-in Git integration automatically commits changes,
-    providing a full history of memory evolution.
-  • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
-  • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
-    endpoint.
+  • Simple Markdown Files: Memories are stored as human-readable Markdown
+    files, serving as the ultimate source of truth.
+  • Automatic Version Control: Built-in Git integration automatically commits
+    changes, providing a full history of memory evolution.
+  • Lightweight & Local: Minimal dependencies and runs entirely on your
+    machine.
+  • Proxy Middleware: Works transparently with any OpenAI-compatible
+    /chat/completions endpoint.
 
  How it works:
 
   1 Intercepts POST /v1/chat/completions requests.
-  2 Retrieves relevant memories (facts, previous conversations) from a local vector
-    database (ChromaDB) based on the user's query.
+  2 Retrieves relevant memories (facts, previous conversations) from a local
+    vector database (ChromaDB) based on the user's query.
   3 Injects these memories into the system prompt.
   4 Forwards the augmented request to the real LLM (--openai-base-url).
-  5 Extracts new facts from the conversation in the background and updates the long-term
-    memory store (including handling contradictions).
-
- Use this to give "long-term memory" to any OpenAI-compatible application. Point your
- client's base URL to http://localhost:8100/v1.
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory store (files + │
-│                                                      derived vector index).            │
-│                                                      [default: ./memory_db]            │
-│ --default-top-k                             INTEGER  Number of memory entries to       │
-│                                                      retrieve per query.               │
-│                                                      [default: 5]                      │
-│ --max-entries                               INTEGER  Maximum stored memory entries per │
-│                                                      conversation (excluding summary). │
-│                                                      [default: 500]                    │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1): higher favors   │
-│                                                      relevance, lower favors           │
-│                                                      diversity.                        │
-│                                                      [default: 0.7]                    │
-│ --recency-weight                            FLOAT    Recency score weight (0.0-1.0).   │
-│                                                      Controls freshness vs. relevance. │
-│                                                      Default 0.2 (20% recency, 80%     │
-│                                                      semantic relevance).              │
-│                                                      [default: 0.2]                    │
-│ --score-threshold                           FLOAT    Minimum semantic relevance        │
-│                                                      threshold (0.0-1.0). Memories     │
-│                                                      below this score are discarded to │
-│                                                      reduce noise.                     │
-│                                                      [default: 0.35]                   │
-│ --summarization      --no-summarization              Enable automatic fact extraction  │
-│                                                      and summaries.                    │
-│                                                      [default: summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git commit of    │
-│                                                      memory changes.                   │
-│                                                      [default: git-versioning]         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
-│                                llama-server: http://localhost:8080/v1).                │
-│                                [env var: OPENAI_BASE_URL]                              │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
-│                                OPENAI_API_KEY environment variable.                    │
-│                                [env var: OPENAI_API_KEY]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
-│                                [default: text-embedding-3-small]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                                 │
-│                        [default: 0.0.0.0]                                              │
-│ --port        INTEGER  Port to bind to                                                 │
-│                        [default: 8100]                                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                           │
-│                           [default: INFO]                                              │
-│ --config            TEXT  Path to a TOML configuration file.                           │
-│ --print-args              Print the command line arguments, including variables taken  │
-│                           from the configuration file.                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+  5 Extracts new facts from the conversation in the background and updates the
+    long-term memory store (including handling contradictions).
+
+ Use this to give "long-term memory" to any OpenAI-compatible application.
+ Point your client's base URL to http://localhost:8100/v1.
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Configuration ───────────────────────────────────────────────────────╮
+│ --memory-path                               PATH     Path to the memory      │
+│                                                      store (files + derived  │
+│                                                      vector index).          │
+│                                                      [default: ./memory_db]  │
+│ --default-top-k                             INTEGER  Number of memory        │
+│                                                      entries to retrieve per │
+│                                                      query.                  │
+│                                                      [default: 5]            │
+│ --max-entries                               INTEGER  Maximum stored memory   │
+│                                                      entries per             │
+│                                                      conversation (excluding │
+│                                                      summary).               │
+│                                                      [default: 500]          │
+│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
+│                                                      higher favors           │
+│                                                      relevance, lower favors │
+│                                                      diversity.              │
+│                                                      [default: 0.7]          │
+│ --recency-weight                            FLOAT    Recency score weight    │
+│                                                      (0.0-1.0). Controls     │
+│                                                      freshness vs.           │
+│                                                      relevance. Default 0.2  │
+│                                                      (20% recency, 80%       │
+│                                                      semantic relevance).    │
+│                                                      [default: 0.2]          │
+│ --score-threshold                           FLOAT    Minimum semantic        │
+│                                                      relevance threshold     │
+│                                                      (0.0-1.0). Memories     │
+│                                                      below this score are    │
+│                                                      discarded to reduce     │
+│                                                      noise.                  │
+│                                                      [default: 0.35]         │
+│ --summarization      --no-summarization              Enable automatic fact   │
+│                                                      extraction and          │
+│                                                      summaries.              │
+│                                                      [default:               │
+│                                                      summarization]          │
+│ --git-versioning     --no-git-versioning             Enable automatic git    │
+│                                                      commit of memory        │
+│                                                      changes.                │
+│                                                      [default:               │
+│                                                      git-versioning]         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
+│                                (e.g., for llama-server:                      │
+│                                http://localhost:8080/v1).                    │
+│                                [env var: OPENAI_BASE_URL]                    │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
+│                                OPENAI_API_KEY environment variable.          │
+│                                [env var: OPENAI_API_KEY]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ───────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                       │
+│                        [default: 0.0.0.0]                                    │
+│ --port        INTEGER  Port to bind to                                       │
+│                        [default: 8100]                                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                 │
+│                           [default: INFO]                                    │
+│ --config            TEXT  Path to a TOML configuration file.                 │
+│ --print-args              Print the command line arguments, including        │
+│                           variables taken from the configuration file.       │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1752,11 +1868,11 @@ agent-cli memory add -c work "Project deadline is Friday"
 
  Add memories directly without LLM extraction.
 
- This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
- Useful for bulk imports or seeding memories.
+ This writes facts directly to the memory store, bypassing the LLM-based fact
+ extraction. Useful for bulk imports or seeding memories.
 
- The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
- be indexed on next memory proxy startup.
+ The memory proxy file watcher (if running) will auto-index new files.
+ Otherwise, they'll be indexed on next memory proxy startup.
 
  Examples::
 
@@ -1777,29 +1893,35 @@ agent-cli memory add -c work "Project deadline is Friday"
   agent-cli memory add -c work "Project deadline is Friday"
 
 
-╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
-│   memories      [MEMORIES]...  Memories to add. Each argument becomes one fact.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. Use '-'   │
-│                                                     for stdin. Supports JSON array,    │
-│                                                     JSON object with 'memories' key,   │
-│                                                     or plain text (one per line).      │
-│ --conversation-id  -c                         TEXT  Conversation ID to add memories    │
-│                                                     to.                                │
-│                                                     [default: default]                 │
-│ --memory-path                                 PATH  Path to the memory store.          │
-│                                                     [default: ./memory_db]             │
-│ --git-versioning       --no-git-versioning          Commit changes to git.             │
-│                                                     [default: git-versioning]          │
-│ --help             -h                               Show this message and exit.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│   memories      [MEMORIES]...  Memories to add. Each argument becomes one    │
+│                                fact.                                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --file             -f                         PATH  Read memories from file. │
+│                                                     Use '-' for stdin.       │
+│                                                     Supports JSON array,     │
+│                                                     JSON object with         │
+│                                                     'memories' key, or plain │
+│                                                     text (one per line).     │
+│ --conversation-id  -c                         TEXT  Conversation ID to add   │
+│                                                     memories to.             │
+│                                                     [default: default]       │
+│ --memory-path                                 PATH  Path to the memory       │
+│                                                     store.                   │
+│                                                     [default: ./memory_db]   │
+│ --git-versioning       --no-git-versioning          Commit changes to git.   │
+│                                                     [default:                │
+│                                                     git-versioning]          │
+│ --help             -h                               Show this message and    │
+│                                                     exit.                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 

From 0557d36477366dc7eb5b5349de46739ee3fc3c75 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 00:24:26 -0800
Subject: [PATCH 05/20] refactor(chat): simplify memory system by removing
 dual-backend

Remove the simple JSON-based memory system, keeping only the vector-backed
MemoryClient. This simplifies the codebase by eliminating the dual-backend
logic and the --advanced-memory flag.

- Rename AdvancedMemory config to Memory, remove enabled field
- Remove all simple memory functions from _tools.py
- Rename init_advanced_memory/cleanup_advanced_memory to init_memory/cleanup_memory
- Update chat.py to use simplified memory initialization
- Update documentation to remove "advanced" terminology
- Remove obsolete test_memory_tools.py
---
 README.md                              | 1844 +++++++++++-------------
 agent_cli/_tools.py                    |  432 ++----
 agent_cli/agents/chat.py               |   82 +-
 agent_cli/config.py                    |   11 +-
 agent_cli/opts.py                      |   11 +-
 docs/architecture/memory.md            |    2 +-
 docs/commands/chat.md                  |   22 +-
 docs/commands/memory.md                |    2 +-
 tests/agents/test_interactive.py       |    6 +-
 tests/agents/test_interactive_extra.py |    2 +-
 tests/test_memory_tools.py             |  117 --
 11 files changed, 994 insertions(+), 1537 deletions(-)
 delete mode 100644 tests/test_memory_tools.py

diff --git a/README.md b/README.md
index d9f5827bb..3efe0f4b9 100644
--- a/README.md
+++ b/README.md
@@ -391,14 +391,14 @@ agent-cli config edit
 
  Manage agent-cli configuration files.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Commands ───────────────────────────────────────────────────────────────────╮
-│ init   Create a new config file with all options commented out.              │
-│ edit   Open the config file in your default editor.                          │
-│ show   Display the config file location and contents.                        │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
+│ init   Create a new config file with all options commented out.                        │
+│ edit   Open the config file in your default editor.                                    │
+│ show   Display the config file location and contents.                                  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -457,53 +457,49 @@ the `[defaults]` section of your configuration file.
 
  Correct text from clipboard using a local or remote LLM.
 
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  The text to correct. If not provided, reads from         │
-│                     clipboard.                                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  The text to correct. If not provided, reads from clipboard.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -548,120 +544,102 @@ the `[defaults]` section of your configuration file.
 
  Wyoming ASR Client for streaming microphone audio to a transcription server.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --extra-instructions                TEXT  Additional instructions for the    │
-│                                           LLM to process the transcription.  │
-│ --llm                   --no-llm          Use an LLM to process the          │
-│                                           transcript.                        │
-│                                           [default: no-llm]                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Recovery ─────────────────────────────────────────────────────────────╮
-│ --from-file                                PATH     Transcribe audio from a  │
-│                                                     saved WAV file instead   │
-│                                                     of recording.            │
-│ --last-recording                           INTEGER  Transcribe a saved       │
-│                                                     recording. Use 1 for     │
-│                                                     most recent, 2 for       │
-│                                                     second-to-last, etc. Use │
-│                                                     0 to disable (default).  │
-│                                                     [default: 0]             │
-│ --save-recording    --no-save-recording             Save the audio recording │
-│                                                     to disk for recovery.    │
-│                                                     [default:                │
-│                                                     save-recording]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --clipboard              --no-clipboard          Copy result to clipboard.   │
-│                                                  [default: clipboard]        │
-│ --log-level                                TEXT  Set logging level.          │
-│                                                  [default: WARNING]          │
-│ --log-file                                 TEXT  Path to a file to write     │
-│                                                  logs to.                    │
-│ --quiet              -q                          Suppress console output     │
-│                                                  from rich.                  │
-│ --config                                   TEXT  Path to a TOML              │
-│                                                  configuration file.         │
-│ --print-args                                     Print the command line      │
-│                                                  arguments, including        │
-│                                                  variables taken from the    │
-│                                                  configuration file.         │
-│ --transcription-log                        PATH  Path to log transcription   │
-│                                                  results with timestamps,    │
-│                                                  hostname, model, and raw    │
-│                                                  output.                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --extra-instructions                TEXT  Additional instructions for the LLM to       │
+│                                           process the transcription.                   │
+│ --llm                   --no-llm          Use an LLM to process the transcript.        │
+│                                           [default: no-llm]                            │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
+│ --from-file                                PATH     Transcribe audio from a saved WAV  │
+│                                                     file instead of recording.         │
+│ --last-recording                           INTEGER  Transcribe a saved recording. Use  │
+│                                                     1 for most recent, 2 for           │
+│                                                     second-to-last, etc. Use 0 to      │
+│                                                     disable (default).                 │
+│                                                     [default: 0]                       │
+│ --save-recording    --no-save-recording             Save the audio recording to disk   │
+│                                                     for recovery.                      │
+│                                                     [default: save-recording]          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --clipboard              --no-clipboard          Copy result to clipboard.             │
+│                                                  [default: clipboard]                  │
+│ --log-level                                TEXT  Set logging level.                    │
+│                                                  [default: WARNING]                    │
+│ --log-file                                 TEXT  Path to a file to write logs to.      │
+│ --quiet              -q                          Suppress console output from rich.    │
+│ --config                                   TEXT  Path to a TOML configuration file.    │
+│ --print-args                                     Print the command line arguments,     │
+│                                                  including variables taken from the    │
+│                                                  configuration file.                   │
+│ --transcription-log                        PATH  Path to log transcription results     │
+│                                                  with timestamps, hostname, model, and │
+│                                                  raw output.                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -718,9 +696,8 @@ uv tool install "agent-cli[vad]"
 
  Run a continuous transcription daemon with voice activity detection.
 
- This command runs indefinitely, capturing audio from your microphone,
- detecting speech segments using Silero VAD, transcribing them, and logging
- results with timestamps.
+ This command runs indefinitely, capturing audio from your microphone, detecting speech
+ segments using Silero VAD, transcribing them, and logging results with timestamps.
 
  Examples: # Basic daemon agent-cli transcribe-daemon
 
@@ -732,121 +709,105 @@ uv tool install "agent-cli[vad]"
   agent-cli transcribe-daemon --llm --role notes
 
   # Custom log file and audio directory
-  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir
-  ~/audio
-
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --role               -r                     TEXT   Role name for logging     │
-│                                                    (e.g., 'meeting',         │
-│                                                    'notes', 'user').         │
-│                                                    [default: user]           │
-│ --silence-threshold  -s                     FLOAT  Seconds of silence to end │
-│                                                    a speech segment.         │
-│                                                    [default: 1.0]            │
-│ --min-segment        -m                     FLOAT  Minimum speech duration   │
-│                                                    in seconds to trigger a   │
-│                                                    segment.                  │
-│                                                    [default: 0.25]           │
-│ --vad-threshold                             FLOAT  VAD speech detection      │
-│                                                    threshold (0.0-1.0).      │
-│                                                    Higher = more aggressive  │
-│                                                    filtering.                │
-│                                                    [default: 0.3]            │
-│ --save-audio             --no-save-audio           Save audio segments as    │
-│                                                    MP3 files.                │
-│                                                    [default: save-audio]     │
-│ --audio-dir                                 PATH   Directory for MP3 files.  │
-│                                                    Default:                  │
-│                                                    ~/.config/agent-cli/audio │
-│ --transcription-log  -t                     PATH   JSON Lines log file path. │
-│                                                    Default:                  │
-│                                                    ~/.config/agent-cli/tran… │
-│ --clipboard              --no-clipboard            Copy each transcription   │
-│                                                    to clipboard.             │
-│                                                    [default: no-clipboard]   │
-│ --help               -h                            Show this message and     │
-│                                                    exit.                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --llm    --no-llm      Use an LLM to process the transcript.                 │
-│                        [default: no-llm]                                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
+
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --role               -r                     TEXT   Role name for logging (e.g.,        │
+│                                                    'meeting', 'notes', 'user').        │
+│                                                    [default: user]                     │
+│ --silence-threshold  -s                     FLOAT  Seconds of silence to end a speech  │
+│                                                    segment.                            │
+│                                                    [default: 1.0]                      │
+│ --min-segment        -m                     FLOAT  Minimum speech duration in seconds  │
+│                                                    to trigger a segment.               │
+│                                                    [default: 0.25]                     │
+│ --vad-threshold                             FLOAT  VAD speech detection threshold      │
+│                                                    (0.0-1.0). Higher = more aggressive │
+│                                                    filtering.                          │
+│                                                    [default: 0.3]                      │
+│ --save-audio             --no-save-audio           Save audio segments as MP3 files.   │
+│                                                    [default: save-audio]               │
+│ --audio-dir                                 PATH   Directory for MP3 files. Default:   │
+│                                                    ~/.config/agent-cli/audio           │
+│ --transcription-log  -t                     PATH   JSON Lines log file path. Default:  │
+│                                                    ~/.config/agent-cli/transcriptions… │
+│ --clipboard              --no-clipboard            Copy each transcription to          │
+│                                                    clipboard.                          │
+│                                                    [default: no-clipboard]             │
+│ --help               -h                            Show this message and exit.         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --llm    --no-llm      Use an LLM to process the transcript.                           │
+│                        [default: no-llm]                                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -890,89 +851,82 @@ uv tool install "agent-cli[vad]"
 
  Convert text to speech using Wyoming or OpenAI-compatible TTS server.
 
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --output-device-index        INTEGER  Index of the audio output device to    │
-│                                       use for TTS.                           │
-│ --output-device-name         TEXT     Output device name keywords for        │
-│                                       partial matching.                      │
-│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, │
-│                                       2.0 = twice as fast, 0.5 = half        │
-│                                       speed).                                │
-│                                       [default: 1.0]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the  │
-│                               GEMINI_API_KEY environment variable.           │
-│                               [env var: GEMINI_API_KEY]                      │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --list-devices          List available audio input and output devices and    │
-│                         exit.                                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.             │
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --output-device-index        INTEGER  Index of the audio output device to use for TTS. │
+│ --output-device-name         TEXT     Output device name keywords for partial          │
+│                                       matching.                                        │
+│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, 2.0 =     │
+│                                       twice as fast, 0.5 = half speed).                │
+│                                       [default: 1.0]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the            │
+│                               GEMINI_API_KEY environment variable.                     │
+│                               [env var: GEMINI_API_KEY]                                │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --list-devices          List available audio input and output devices and exit.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.                       │
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1013,8 +967,7 @@ uv tool install "agent-cli[vad]"
 
  Usage: agent-cli voice-edit [OPTIONS]
 
- Interact with clipboard text via a voice command using local or remote
- services.
+ Interact with clipboard text via a voice command using local or remote services.
 
  Usage:
 
@@ -1025,139 +978,124 @@ uv tool install "agent-cli[vad]"
   • List output devices: agent-cli voice-edit --list-output-devices
   • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: whisper-1]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV     │
-│                                           file.                              │
-│ --clipboard       --no-clipboard          Copy result to clipboard.          │
-│                                           [default: clipboard]               │
-│ --log-level                         TEXT  Set logging level.                 │
-│                                           [default: WARNING]                 │
-│ --log-file                          TEXT  Path to a file to write logs to.   │
-│ --quiet       -q                          Suppress console output from rich. │
-│ --config                            TEXT  Path to a TOML configuration file. │
-│ --print-args                              Print the command line arguments,  │
-│                                           including variables taken from the │
-│                                           configuration file.                │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
+│                                 [default: whisper-1]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV file.         │
+│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
+│                                           [default: clipboard]                         │
+│ --log-level                         TEXT  Set logging level.                           │
+│                                           [default: WARNING]                           │
+│ --log-file                          TEXT  Path to a file to write logs to.             │
+│ --quiet       -q                          Suppress console output from rich.           │
+│ --config                            TEXT  Path to a TOML configuration file.           │
+│ --print-args                              Print the command line arguments, including  │
+│                                           variables taken from the configuration file. │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1203,148 +1141,133 @@ uv tool install "agent-cli[vad]"
 
  Wake word-based voice assistant using local or remote services.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Wake Word ──────────────────────────────────────────────────────────────────╮
-│ --wake-server-ip          TEXT     Wyoming wake word server IP address.      │
-│                                    [default: localhost]                      │
-│ --wake-server-port        INTEGER  Wyoming wake word server port.            │
-│                                    [default: 10400]                          │
-│ --wake-word               TEXT     Name of wake word to detect (e.g.,        │
-│                                    'ok_nabu', 'hey_jarvis').                 │
-│                                    [default: ok_nabu]                        │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: whisper-1]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV     │
-│                                           file.                              │
-│ --clipboard       --no-clipboard          Copy result to clipboard.          │
-│                                           [default: clipboard]               │
-│ --log-level                         TEXT  Set logging level.                 │
-│                                           [default: WARNING]                 │
-│ --log-file                          TEXT  Path to a file to write logs to.   │
-│ --quiet       -q                          Suppress console output from rich. │
-│ --config                            TEXT  Path to a TOML configuration file. │
-│ --print-args                              Print the command line arguments,  │
-│                                           including variables taken from the │
-│                                           configuration file.                │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
+│ --wake-server-ip          TEXT     Wyoming wake word server IP address.                │
+│                                    [default: localhost]                                │
+│ --wake-server-port        INTEGER  Wyoming wake word server port.                      │
+│                                    [default: 10400]                                    │
+│ --wake-word               TEXT     Name of wake word to detect (e.g., 'ok_nabu',       │
+│                                    'hey_jarvis').                                      │
+│                                    [default: ok_nabu]                                  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
+│                                 [default: whisper-1]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV file.         │
+│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
+│                                           [default: clipboard]                         │
+│ --log-level                         TEXT  Set logging level.                           │
+│                                           [default: WARNING]                           │
+│ --log-file                          TEXT  Path to a file to write logs to.             │
+│ --quiet       -q                          Suppress console output from rich.           │
+│ --config                            TEXT  Path to a TOML configuration file.           │
+│ --print-args                              Print the command line arguments, including  │
+│                                           variables taken from the configuration file. │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1397,178 +1320,144 @@ uv tool install "agent-cli[vad]"
 
  An chat agent that you can talk to.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ History Options ────────────────────────────────────────────────────────────╮
-│ --history-dir            PATH     Directory to store conversation history.   │
-│                                   [default: ~/.config/agent-cli/history]     │
-│ --last-n-messages        INTEGER  Number of messages to include in the       │
-│                                   conversation history. Set to 0 to disable  │
-│                                   history.                                   │
-│                                   [default: 50]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Options ─────────────────────────────────────────────────────────────╮
-│ --advanced-memory       --no-advanced-memo…             Use advanced         │
-│                                                         vector-backed memory │
-│                                                         with semantic        │
-│                                                         search. Auto-falls   │
-│                                                         back to simple       │
-│                                                         memory if            │
-│                                                         dependencies not     │
-│                                                         installed.           │
-│                                                         [default:            │
-│                                                         advanced-memory]     │
-│ --memory-path                                  PATH     Path for advanced    │
-│                                                         memory database      │
-│                                                         storage. Default:    │
-│                                                         ~/.config/agent-cli… │
-│ --memory-embedding-…                           TEXT     Embedding model for  │
-│                                                         semantic memory      │
-│                                                         search.              │
-│                                                         [default:            │
-│                                                         text-embedding-3-sm… │
-│ --memory-top-k                                 INTEGER  Number of memories   │
-│                                                         to retrieve per      │
-│                                                         search.              │
-│                                                         [default: 5]         │
-│ --memory-score-thre…                           FLOAT    Minimum relevance    │
-│                                                         score threshold for  │
-│                                                         memory retrieval     │
-│                                                         (0.0-1.0).           │
-│                                                         [default: 0.35]      │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.             │
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ History Options ──────────────────────────────────────────────────────────────────────╮
+│ --history-dir            PATH     Directory to store conversation history.             │
+│                                   [default: ~/.config/agent-cli/history]               │
+│ --last-n-messages        INTEGER  Number of messages to include in the conversation    │
+│                                   history. Set to 0 to disable history.                │
+│                                   [default: 50]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ───────────────────────────────────────────────────────────────────────╮
+│ --memory-path                   PATH     Path for memory database storage. Default:    │
+│                                          ~/.config/agent-cli/memory/vector_db          │
+│ --memory-embedding-model        TEXT     Embedding model for semantic memory search.   │
+│                                          [default: text-embedding-3-small]             │
+│ --memory-top-k                  INTEGER  Number of memories to retrieve per search.    │
+│                                          [default: 5]                                  │
+│ --memory-score-threshold        FLOAT    Minimum relevance score threshold for memory  │
+│                                          retrieval (0.0-1.0).                          │
+│                                          [default: 0.35]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.                       │
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1616,52 +1505,49 @@ uv tool install "agent-cli[vad]"
  Start the RAG (Retrieval-Augmented Generation) Proxy Server.
 
  This server watches a folder for documents, indexes them, and provides an
- OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
- injecting relevant context from the documents.
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ RAG Configuration ──────────────────────────────────────────────────────────╮
-│ --docs-folder                      PATH     Folder to watch for documents    │
-│                                             [default: ./rag_docs]            │
-│ --chroma-path                      PATH     Path to ChromaDB persistence     │
-│                                             directory                        │
-│                                             [default: ./rag_db]              │
-│ --limit                            INTEGER  Number of document chunks to     │
-│                                             retrieve per query.              │
-│                                             [default: 3]                     │
-│ --rag-tools      --no-rag-tools             Allow agent to fetch full        │
-│                                             documents when snippets are      │
-│                                             insufficient.                    │
-│                                             [default: rag-tools]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
-│                                (e.g., for llama-server:                      │
-│                                http://localhost:8080/v1).                    │
-│                                [env var: OPENAI_BASE_URL]                    │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
-│                                OPENAI_API_KEY environment variable.          │
-│                                [env var: OPENAI_API_KEY]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
-│                                [default: text-embedding-3-small]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ───────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                       │
-│                        [default: 0.0.0.0]                                    │
-│ --port        INTEGER  Port to bind to                                       │
-│                        [default: 8000]                                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                 │
-│                           [default: INFO]                                    │
-│ --config            TEXT  Path to a TOML configuration file.                 │
-│ --print-args              Print the command line arguments, including        │
-│                           variables taken from the configuration file.       │
-╰──────────────────────────────────────────────────────────────────────────────╯
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
+ relevant context from the documents.
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
+│ --docs-folder                      PATH     Folder to watch for documents              │
+│                                             [default: ./rag_docs]                      │
+│ --chroma-path                      PATH     Path to ChromaDB persistence directory     │
+│                                             [default: ./rag_db]                        │
+│ --limit                            INTEGER  Number of document chunks to retrieve per  │
+│                                             query.                                     │
+│                                             [default: 3]                               │
+│ --rag-tools      --no-rag-tools             Allow agent to fetch full documents when   │
+│                                             snippets are insufficient.                 │
+│                                             [default: rag-tools]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
+│                                llama-server: http://localhost:8080/v1).                │
+│                                [env var: OPENAI_BASE_URL]                              │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
+│                                OPENAI_API_KEY environment variable.                    │
+│                                [env var: OPENAI_API_KEY]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
+│                                [default: text-embedding-3-small]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                                 │
+│                        [default: 0.0.0.0]                                              │
+│ --port        INTEGER  Port to bind to                                                 │
+│                        [default: 8000]                                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                           │
+│                           [default: INFO]                                              │
+│ --config            TEXT  Path to a TOML configuration file.                           │
+│ --print-args              Print the command line arguments, including variables taken  │
+│                           from the configuration file.                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1717,107 +1603,91 @@ The `memory proxy` command is the core feature—a middleware server that gives
 
  Start the memory-backed chat proxy server.
 
- This server acts as a middleware between your chat client (e.g., a web UI,
- CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
- Ollama, vLLM).
+ This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
+ plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
 
  Key Features:
 
-  • Simple Markdown Files: Memories are stored as human-readable Markdown
-    files, serving as the ultimate source of truth.
-  • Automatic Version Control: Built-in Git integration automatically commits
-    changes, providing a full history of memory evolution.
-  • Lightweight & Local: Minimal dependencies and runs entirely on your
-    machine.
-  • Proxy Middleware: Works transparently with any OpenAI-compatible
-    /chat/completions endpoint.
+  • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
+    as the ultimate source of truth.
+  • Automatic Version Control: Built-in Git integration automatically commits changes,
+    providing a full history of memory evolution.
+  • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
+  • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
+    endpoint.
 
  How it works:
 
   1 Intercepts POST /v1/chat/completions requests.
-  2 Retrieves relevant memories (facts, previous conversations) from a local
-    vector database (ChromaDB) based on the user's query.
+  2 Retrieves relevant memories (facts, previous conversations) from a local vector
+    database (ChromaDB) based on the user's query.
   3 Injects these memories into the system prompt.
   4 Forwards the augmented request to the real LLM (--openai-base-url).
-  5 Extracts new facts from the conversation in the background and updates the
-    long-term memory store (including handling contradictions).
-
- Use this to give "long-term memory" to any OpenAI-compatible application.
- Point your client's base URL to http://localhost:8100/v1.
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ───────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory      │
-│                                                      store (files + derived  │
-│                                                      vector index).          │
-│                                                      [default: ./memory_db]  │
-│ --default-top-k                             INTEGER  Number of memory        │
-│                                                      entries to retrieve per │
-│                                                      query.                  │
-│                                                      [default: 5]            │
-│ --max-entries                               INTEGER  Maximum stored memory   │
-│                                                      entries per             │
-│                                                      conversation (excluding │
-│                                                      summary).               │
-│                                                      [default: 500]          │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
-│                                                      higher favors           │
-│                                                      relevance, lower favors │
-│                                                      diversity.              │
-│                                                      [default: 0.7]          │
-│ --recency-weight                            FLOAT    Recency score weight    │
-│                                                      (0.0-1.0). Controls     │
-│                                                      freshness vs.           │
-│                                                      relevance. Default 0.2  │
-│                                                      (20% recency, 80%       │
-│                                                      semantic relevance).    │
-│                                                      [default: 0.2]          │
-│ --score-threshold                           FLOAT    Minimum semantic        │
-│                                                      relevance threshold     │
-│                                                      (0.0-1.0). Memories     │
-│                                                      below this score are    │
-│                                                      discarded to reduce     │
-│                                                      noise.                  │
-│                                                      [default: 0.35]         │
-│ --summarization      --no-summarization              Enable automatic fact   │
-│                                                      extraction and          │
-│                                                      summaries.              │
-│                                                      [default:               │
-│                                                      summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git    │
-│                                                      commit of memory        │
-│                                                      changes.                │
-│                                                      [default:               │
-│                                                      git-versioning]         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
-│                                (e.g., for llama-server:                      │
-│                                http://localhost:8080/v1).                    │
-│                                [env var: OPENAI_BASE_URL]                    │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
-│                                OPENAI_API_KEY environment variable.          │
-│                                [env var: OPENAI_API_KEY]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
-│                                [default: text-embedding-3-small]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ───────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                       │
-│                        [default: 0.0.0.0]                                    │
-│ --port        INTEGER  Port to bind to                                       │
-│                        [default: 8100]                                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                 │
-│                           [default: INFO]                                    │
-│ --config            TEXT  Path to a TOML configuration file.                 │
-│ --print-args              Print the command line arguments, including        │
-│                           variables taken from the configuration file.       │
-╰──────────────────────────────────────────────────────────────────────────────╯
+  5 Extracts new facts from the conversation in the background and updates the long-term
+    memory store (including handling contradictions).
+
+ Use this to give "long-term memory" to any OpenAI-compatible application. Point your
+ client's base URL to http://localhost:8100/v1.
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
+│ --memory-path                               PATH     Path to the memory store (files + │
+│                                                      derived vector index).            │
+│                                                      [default: ./memory_db]            │
+│ --default-top-k                             INTEGER  Number of memory entries to       │
+│                                                      retrieve per query.               │
+│                                                      [default: 5]                      │
+│ --max-entries                               INTEGER  Maximum stored memory entries per │
+│                                                      conversation (excluding summary). │
+│                                                      [default: 500]                    │
+│ --mmr-lambda                                FLOAT    MMR lambda (0-1): higher favors   │
+│                                                      relevance, lower favors           │
+│                                                      diversity.                        │
+│                                                      [default: 0.7]                    │
+│ --recency-weight                            FLOAT    Recency score weight (0.0-1.0).   │
+│                                                      Controls freshness vs. relevance. │
+│                                                      Default 0.2 (20% recency, 80%     │
+│                                                      semantic relevance).              │
+│                                                      [default: 0.2]                    │
+│ --score-threshold                           FLOAT    Minimum semantic relevance        │
+│                                                      threshold (0.0-1.0). Memories     │
+│                                                      below this score are discarded to │
+│                                                      reduce noise.                     │
+│                                                      [default: 0.35]                   │
+│ --summarization      --no-summarization              Enable automatic fact extraction  │
+│                                                      and summaries.                    │
+│                                                      [default: summarization]          │
+│ --git-versioning     --no-git-versioning             Enable automatic git commit of    │
+│                                                      memory changes.                   │
+│                                                      [default: git-versioning]         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
+│                                llama-server: http://localhost:8080/v1).                │
+│                                [env var: OPENAI_BASE_URL]                              │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
+│                                OPENAI_API_KEY environment variable.                    │
+│                                [env var: OPENAI_API_KEY]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
+│                                [default: text-embedding-3-small]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                                 │
+│                        [default: 0.0.0.0]                                              │
+│ --port        INTEGER  Port to bind to                                                 │
+│                        [default: 8100]                                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                           │
+│                           [default: INFO]                                              │
+│ --config            TEXT  Path to a TOML configuration file.                           │
+│ --print-args              Print the command line arguments, including variables taken  │
+│                           from the configuration file.                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1868,11 +1738,11 @@ agent-cli memory add -c work "Project deadline is Friday"
 
  Add memories directly without LLM extraction.
 
- This writes facts directly to the memory store, bypassing the LLM-based fact
- extraction. Useful for bulk imports or seeding memories.
+ This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
+ Useful for bulk imports or seeding memories.
 
- The memory proxy file watcher (if running) will auto-index new files.
- Otherwise, they'll be indexed on next memory proxy startup.
+ The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
+ be indexed on next memory proxy startup.
 
  Examples::
 
@@ -1893,35 +1763,29 @@ agent-cli memory add -c work "Project deadline is Friday"
   agent-cli memory add -c work "Project deadline is Friday"
 
 
-╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   memories      [MEMORIES]...  Memories to add. Each argument becomes one    │
-│                                fact.                                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. │
-│                                                     Use '-' for stdin.       │
-│                                                     Supports JSON array,     │
-│                                                     JSON object with         │
-│                                                     'memories' key, or plain │
-│                                                     text (one per line).     │
-│ --conversation-id  -c                         TEXT  Conversation ID to add   │
-│                                                     memories to.             │
-│                                                     [default: default]       │
-│ --memory-path                                 PATH  Path to the memory       │
-│                                                     store.                   │
-│                                                     [default: ./memory_db]   │
-│ --git-versioning       --no-git-versioning          Commit changes to git.   │
-│                                                     [default:                │
-│                                                     git-versioning]          │
-│ --help             -h                               Show this message and    │
-│                                                     exit.                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
+│   memories      [MEMORIES]...  Memories to add. Each argument becomes one fact.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --file             -f                         PATH  Read memories from file. Use '-'   │
+│                                                     for stdin. Supports JSON array,    │
+│                                                     JSON object with 'memories' key,   │
+│                                                     or plain text (one per line).      │
+│ --conversation-id  -c                         TEXT  Conversation ID to add memories    │
+│                                                     to.                                │
+│                                                     [default: default]                 │
+│ --memory-path                                 PATH  Path to the memory store.          │
+│                                                     [default: ./memory_db]             │
+│ --git-versioning       --no-git-versioning          Commit changes to git.             │
+│                                                     [default: git-versioning]          │
+│ --help             -h                               Show this message and exit.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index da846d8f1..5e4cc5add 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -3,36 +3,30 @@
 from __future__ import annotations
 
 import asyncio
-import json
-import os
 import subprocess
-from datetime import UTC, datetime
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
-
     from agent_cli.memory.client import MemoryClient
 
 
-# --- Advanced Memory State ---
-# These module-level variables are set by init_advanced_memory() when the chat
-# agent starts with --advanced-memory enabled.
+# --- Memory System State ---
+# These module-level variables are set by init_memory() when the chat agent starts.
 
 _memory_client: MemoryClient | None = None
 _conversation_id: str = "default"
 _event_loop: asyncio.AbstractEventLoop | None = None
 
 
-def init_advanced_memory(
+def init_memory(
     client: MemoryClient,
     conversation_id: str = "default",
     event_loop: asyncio.AbstractEventLoop | None = None,
 ) -> None:
-    """Initialize the advanced memory system.
+    """Initialize the memory system.
 
-    Called by the chat agent when --advanced-memory is enabled.
+    Called by the chat agent on startup.
 
     Args:
         client: The MemoryClient instance to use for memory operations.
@@ -46,8 +40,8 @@ def init_advanced_memory(
     _event_loop = event_loop
 
 
-async def cleanup_advanced_memory() -> None:
-    """Clean up the advanced memory system.
+async def cleanup_memory() -> None:
+    """Clean up the memory system.
 
     Called when the chat agent exits.
     """
@@ -58,105 +52,23 @@ async def cleanup_advanced_memory() -> None:
     _event_loop = None
 
 
-def _is_advanced_memory() -> bool:
-    """Check if advanced memory is enabled and initialized."""
-    return _memory_client is not None and _event_loop is not None
-
-
 def _run_async(coro: Any, timeout: float = 30.0) -> Any:
     """Run an async coroutine from sync context using the stored event loop."""
     if _event_loop is None:
-        msg = "Event loop not initialized for advanced memory"
+        msg = "Event loop not initialized for memory system"
         raise RuntimeError(msg)
 
     future = asyncio.run_coroutine_threadsafe(coro, _event_loop)
     return future.result(timeout=timeout)
 
 
-# --- Simple Memory System Helpers ---
-
-
-def _get_memory_file_path() -> Path:
-    """Get the path to the memory file.
-
-    If the environment variable ``AGENT_CLI_HISTORY_DIR`` is set (by the
-    running agent), store the memory file in that directory.
-    Otherwise fall back to the user's config directory.
-    """
-    history_dir = os.getenv("AGENT_CLI_HISTORY_DIR")
-    if history_dir:
-        return Path(history_dir).expanduser() / "long_term_memory.json"
-
-    return Path.home() / ".config" / "agent-cli" / "memory" / "long_term_memory.json"
-
-
-def _load_memories() -> list[dict[str, Any]]:
-    """Load memories from file, returning empty list if file doesn't exist."""
-    memory_file = _get_memory_file_path()
-    if not memory_file.exists():
-        return []
-
-    with memory_file.open("r") as f:
-        return json.load(f)
-
-
-def _save_memories(memories: list[dict[str, Any]]) -> None:
-    """Save memories to file, creating directories if needed."""
-    memory_file = _get_memory_file_path()
-    memory_file.parent.mkdir(parents=True, exist_ok=True)
-
-    with memory_file.open("w") as f:
-        json.dump(memories, f, indent=2)
-
-
-def _find_memory_by_id(memories: list[dict[str, Any]], memory_id: int) -> dict[str, Any] | None:
-    """Find a memory by ID in the memories list."""
-    for memory in memories:
-        if memory["id"] == memory_id:
-            return memory
+def _check_memory_initialized() -> str | None:
+    """Check if memory is initialized. Returns error message if not, None if OK."""
+    if _memory_client is None:
+        return "Error: Memory system not initialized. Install with: pip install 'agent-cli[memory]'"
     return None
 
 
-def _format_memory_summary(memory: dict[str, Any]) -> str:
-    """Format a memory for display in search results."""
-    return (
-        f"ID: {memory['id']} | Category: {memory['category']} | "
-        f"Content: {memory['content']} | Tags: {', '.join(memory['tags'])}"
-    )
-
-
-def _format_memory_detailed(memory: dict[str, Any]) -> str:
-    """Format a memory with full details for listing."""
-    created = datetime.fromisoformat(memory["timestamp"]).strftime("%Y-%m-%d %H:%M")
-    updated_info = ""
-    if "updated_at" in memory:
-        updated = datetime.fromisoformat(memory["updated_at"]).strftime("%Y-%m-%d %H:%M")
-        updated_info = f" (updated: {updated})"
-
-    return (
-        f"ID: {memory['id']} | Category: {memory['category']}\n"
-        f"Content: {memory['content']}\n"
-        f"Tags: {', '.join(memory['tags']) if memory['tags'] else 'None'}\n"
-        f"Created: {created}{updated_info}\n"
-    )
-
-
-def _parse_tags(tags_string: str) -> list[str]:
-    """Parse comma-separated tags string into a list of clean tags."""
-    return [tag.strip() for tag in tags_string.split(",") if tag.strip()]
-
-
-R = TypeVar("R")
-
-
-def _memory_operation(operation_name: str, operation_func: Callable[[], str]) -> str:
-    """Wrapper for memory operations with consistent error handling."""
-    try:
-        return operation_func()
-    except Exception as e:
-        return f"Error {operation_name}: {e}"
-
-
 def read_file(path: str) -> str:
     """Read the content of a file.
 
@@ -193,41 +105,6 @@ def execute_code(code: str) -> str:
         return f"Error: Command not found: {code.split()[0]}"
 
 
-def _add_memory_simple(content: str, category: str, tags: str) -> str:
-    """Add memory using the simple JSON-based system."""
-    memories = _load_memories()
-
-    memory = {
-        "id": len(memories) + 1,
-        "content": content,
-        "category": category,
-        "tags": _parse_tags(tags),
-        "timestamp": datetime.now(UTC).isoformat(),
-    }
-
-    memories.append(memory)
-    _save_memories(memories)
-
-    return f"Memory added successfully with ID {memory['id']}"
-
-
-def _add_memory_advanced(content: str, category: str, tags: str) -> str:
-    """Add memory using the advanced vector-backed system."""
-    if _memory_client is None:
-        return "Error: Advanced memory not initialized"
-
-    # Format content with metadata for the advanced system
-    formatted_content = f"[{category}] {content}"
-    if tags:
-        formatted_content += f" (tags: {tags})"
-
-    try:
-        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))
-        return "Memory added successfully (advanced semantic memory)"
-    except Exception as e:
-        return f"Error adding memory: {e}"
-
-
 def add_memory(content: str, category: str = "general", tags: str = "") -> str:
     """Add important information to long-term memory for future conversations.
 
@@ -245,57 +122,52 @@ def add_memory(content: str, category: str = "general", tags: str = "") -> str:
         tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
 
     Returns:
-        Confirmation message with the memory ID
+        Confirmation message
 
     """
-    if _is_advanced_memory():
-        return _memory_operation(
-            "adding memory",
-            lambda: _add_memory_advanced(content, category, tags),
-        )
-    return _memory_operation("adding memory", lambda: _add_memory_simple(content, category, tags))
-
+    if error := _check_memory_initialized():
+        return error
 
-def _search_memory_simple(query: str, category: str) -> str:
-    """Search memory using the simple JSON-based system."""
-    memories = _load_memories()
-
-    if not memories:
-        return "No memories found. Memory system not initialized."
+    # Format content with metadata
+    formatted_content = f"[{category}] {content}"
+    if tags:
+        formatted_content += f" (tags: {tags})"
 
-    # Simple text-based search
-    query_lower = query.lower()
-    relevant_memories = []
+    try:
+        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))  # type: ignore[union-attr]
+        return "Memory added successfully."
+    except Exception as e:
+        return f"Error adding memory: {e}"
 
-    for memory in memories:
-        # Check if query matches content, tags, or category
-        content_match = query_lower in memory["content"].lower()
-        tag_match = any(query_lower in tag.lower() for tag in memory["tags"])
-        category_match = not category or memory["category"].lower() == category.lower()
 
-        if (content_match or tag_match) and category_match:
-            relevant_memories.append(memory)
+def search_memory(query: str, category: str = "") -> str:
+    """Search long-term memory for relevant information before answering questions.
 
-    if not relevant_memories:
-        return f"No memories found matching '{query}'"
+    Use this tool:
+    - Before answering questions about the user's preferences, personal info, or past conversations
+    - When the user asks "what do you remember about..." or similar questions
+    - When you need context about the user's work, projects, or goals
+    - To check if you've discussed a topic before
 
-    # Format results
-    results = [_format_memory_summary(memory) for memory in relevant_memories[-5:]]
+    This performs semantic search to find conceptually related information.
 
-    return "\n".join(results)
+    Args:
+        query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
+        category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
 
+    Returns:
+        Relevant memories found, or message if none found
 
-def _search_memory_advanced(query: str, category: str) -> str:
-    """Search memory using the advanced vector-backed system with semantic search."""
-    if _memory_client is None:
-        return "Error: Advanced memory not initialized"
+    """
+    if error := _check_memory_initialized():
+        return error
 
     # Include category in search query if provided
     search_query = f"{category} {query}" if category else query
 
     try:
         result = _run_async(
-            _memory_client.search(search_query, conversation_id=_conversation_id),
+            _memory_client.search(search_query, conversation_id=_conversation_id),  # type: ignore[union-attr]
         )
         if not result.entries:
             return f"No memories found matching '{query}'"
@@ -310,77 +182,34 @@ def _search_memory_advanced(query: str, category: str) -> str:
         return f"Error searching memory: {e}"
 
 
-def search_memory(query: str, category: str = "") -> str:
-    """Search long-term memory for relevant information before answering questions.
+def update_memory(memory_id: int, content: str = "", category: str = "", tags: str = "") -> str:
+    """Update an existing memory by adding new information.
 
     Use this tool:
-    - Before answering questions about the user's preferences, personal info, or past conversations
-    - When the user asks "what do you remember about..." or similar questions
-    - When you need context about the user's work, projects, or goals
-    - To check if you've discussed a topic before
+    - When the user wants to correct or modify previously stored information
+    - When information has changed (e.g., job change, preference updates)
+    - When the user says "update my memory about..." or "change the memory where..."
 
-    The search looks through memory content and tags for matches.
-    When using advanced memory, this performs semantic search to find conceptually related information.
+    The memory system uses automatic reconciliation - adding new information will
+    update or replace related existing facts.
 
     Args:
-        query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
-        category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
+        memory_id: Not used - the system automatically reconciles memories
+        content: The updated content to store
+        category: Category for the memory (leave empty for "general")
+        tags: Comma-separated tags (leave empty for none)
 
     Returns:
-        Relevant memories found, or message if none found
+        Confirmation message
 
     """
-    if _is_advanced_memory():
-        return _memory_operation(
-            "searching memory",
-            lambda: _search_memory_advanced(query, category),
-        )
-    return _memory_operation("searching memory", lambda: _search_memory_simple(query, category))
-
-
-def _update_memory_simple(memory_id: int, content: str, category: str, tags: str) -> str:
-    """Update memory using the simple JSON-based system."""
-    memories = _load_memories()
-
-    if not memories:
-        return "No memories found. Memory system not initialized."
-
-    # Find memory to update
-    memory_to_update = _find_memory_by_id(memories, memory_id)
-    if not memory_to_update:
-        return f"Memory with ID {memory_id} not found."
-
-    # Update fields if provided
-    if content:
-        memory_to_update["content"] = content
-    if category:
-        memory_to_update["category"] = category
-    if tags:
-        memory_to_update["tags"] = _parse_tags(tags)
+    _ = memory_id  # System uses reconciliation, not ID-based updates
 
-    # Add update timestamp
-    memory_to_update["updated_at"] = datetime.now(UTC).isoformat()
-
-    _save_memories(memories)
-    return f"Memory ID {memory_id} updated successfully."
-
-
-def _update_memory_advanced(memory_id: int, content: str, category: str, tags: str) -> str:
-    """Update memory using the advanced system.
-
-    Note: The advanced memory system uses the reconciliation pipeline which
-    automatically manages memory updates through fact extraction. Direct updates
-    are handled by adding new information that supersedes old information.
-    """
-    _ = memory_id  # Advanced system uses reconciliation, not ID-based updates
-    if _memory_client is None:
-        return "Error: Advanced memory not initialized"
+    if error := _check_memory_initialized():
+        return error
 
     if not content:
-        return (
-            "In advanced memory mode, please provide the updated content. "
-            "The system will automatically reconcile it with existing memories."
-        )
+        return "Please provide the updated content. The system will automatically reconcile it with existing memories."
 
     # Format content with metadata
     formatted_content = f"[{category or 'general'}] {content}"
@@ -388,77 +217,34 @@ def _update_memory_advanced(memory_id: int, content: str, category: str, tags: s
         formatted_content += f" (tags: {tags})"
 
     try:
-        # Add the updated information - the advanced system's reconciliation
-        # pipeline will handle updating/replacing related facts
-        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))
-        return (
-            "Memory updated successfully. The advanced memory system has reconciled "
-            "this information with existing memories."
-        )
+        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))  # type: ignore[union-attr]
+        return "Memory updated successfully. The system has reconciled this information with existing memories."
     except Exception as e:
         return f"Error updating memory: {e}"
 
 
-def update_memory(memory_id: int, content: str = "", category: str = "", tags: str = "") -> str:
-    """Update an existing memory by ID.
+def list_all_memories(limit: int = 10) -> str:
+    """List all memories with their details.
 
     Use this tool:
-    - When the user wants to correct or modify previously stored information
-    - When information has changed (e.g., job change, preference updates)
-    - When the user says "update my memory about..." or "change the memory where..."
+    - When the user asks "show me all my memories" or "list everything you remember"
+    - When they want to see what information is stored
+    - To provide a complete overview of stored information
 
-    Only provide the fields that should be updated - empty fields will keep existing values.
-    In advanced memory mode, the system automatically reconciles updates with existing information.
+    Shows memories in reverse chronological order (newest first).
 
     Args:
-        memory_id: The ID of the memory to update (use search_memory or list_all_memories to find IDs)
-        content: New content for the memory (leave empty to keep existing)
-        category: New category (leave empty to keep existing)
-        tags: New comma-separated tags (leave empty to keep existing)
+        limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
 
     Returns:
-        Confirmation message or error if memory ID not found
+        Formatted list of all memories
 
     """
-    if _is_advanced_memory():
-        return _memory_operation(
-            "updating memory",
-            lambda: _update_memory_advanced(memory_id, content, category, tags),
-        )
-    return _memory_operation(
-        "updating memory",
-        lambda: _update_memory_simple(memory_id, content, category, tags),
-    )
-
-
-def _list_all_memories_simple(limit: int) -> str:
-    """List all memories using the simple JSON-based system."""
-    memories = _load_memories()
-
-    if not memories:
-        return "No memories stored yet."
-
-    # Sort by ID (newest first) and limit results
-    memories_to_show = sorted(memories, key=lambda x: x["id"], reverse=True)[:limit]
-
-    results = [f"Showing {len(memories_to_show)} of {len(memories)} total memories:\n"]
-    results.extend(_format_memory_detailed(memory) for memory in memories_to_show)
-
-    if len(memories) > limit:
-        results.append(
-            f"... and {len(memories) - limit} more memories. Use a higher limit to see more.",
-        )
-
-    return "\n".join(results)
-
-
-def _list_all_memories_advanced(limit: int) -> str:
-    """List all memories using the advanced vector-backed system."""
-    if _memory_client is None:
-        return "Error: Advanced memory not initialized"
+    if error := _check_memory_initialized():
+        return error
 
     try:
-        entries = _memory_client.list_all(
+        entries = _memory_client.list_all(  # type: ignore[union-attr]
             conversation_id=_conversation_id,
             include_summary=False,
         )
@@ -486,64 +272,31 @@ def _list_all_memories_advanced(limit: int) -> str:
         return f"Error listing memories: {e}"
 
 
-def list_all_memories(limit: int = 10) -> str:
-    """List all memories with their details.
+def list_memory_categories() -> str:
+    """List all memory categories and their counts to see what has been remembered.
 
     Use this tool:
-    - When the user asks "show me all my memories" or "list everything you remember"
-    - When they want to see specific memory IDs for updating or reference
-    - To provide a complete overview of stored information
-
-    Shows memories in reverse chronological order (newest first).
+    - When the user asks "what categories do you have?"
+    - To get a quick overview of memory organization
+    - When the user wants to know what types of information are stored
 
-    Args:
-        limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
+    This provides a summary view before using list_all_memories for details.
 
     Returns:
-        Formatted list of all memories with IDs, content, categories, and tags
+        Summary of memory types with counts
 
     """
-    if _is_advanced_memory():
-        return _memory_operation("listing memories", lambda: _list_all_memories_advanced(limit))
-    return _memory_operation("listing memories", lambda: _list_all_memories_simple(limit))
-
-
-def _list_memory_categories_simple() -> str:
-    """List categories using the simple JSON-based system."""
-    memories = _load_memories()
-
-    if not memories:
-        return "No memories found. Memory system not initialized."
-
-    # Count categories
-    categories: dict[str, int] = {}
-    for memory in memories:
-        category = memory["category"]
-        categories[category] = categories.get(category, 0) + 1
-
-    if not categories:
-        return "No memory categories found."
-
-    results = ["Memory Categories:"]
-    for category, count in sorted(categories.items()):
-        results.append(f"- {category}: {count} memories")
-
-    return "\n".join(results)
-
-
-def _list_memory_categories_advanced() -> str:
-    """List categories using the advanced vector-backed system."""
-    if _memory_client is None:
-        return "Error: Advanced memory not initialized"
+    if error := _check_memory_initialized():
+        return error
 
     try:
-        entries = _memory_client.list_all(
+        entries = _memory_client.list_all(  # type: ignore[union-attr]
             conversation_id=_conversation_id,
             include_summary=False,
         )
 
         if not entries:
-            return "No memories found. Memory system not initialized."
+            return "No memories found."
 
         # Count by role (user, assistant, memory)
         roles: dict[str, int] = {}
@@ -551,7 +304,7 @@ def _list_memory_categories_advanced() -> str:
             role = entry.get("role", "memory")
             roles[role] = roles.get(role, 0) + 1
 
-        results = ["Memory Types (advanced memory system):"]
+        results = ["Memory Types:"]
         for role, count in sorted(roles.items()):
             results.append(f"- {role}: {count} entries")
 
@@ -560,25 +313,6 @@ def _list_memory_categories_advanced() -> str:
         return f"Error listing categories: {e}"
 
 
-def list_memory_categories() -> str:
-    """List all memory categories and their counts to see what has been remembered.
-
-    Use this tool:
-    - When the user asks "what categories do you have?"
-    - To get a quick overview of memory organization
-    - When the user wants to know what types of information are stored
-
-    This provides a summary view before using list_all_memories for details.
-
-    Returns:
-        Summary of memory categories with counts (e.g., "personal: 5 memories")
-
-    """
-    if _is_advanced_memory():
-        return _memory_operation("listing categories", _list_memory_categories_advanced)
-    return _memory_operation("listing categories", _list_memory_categories_simple)
-
-
 def tools() -> list:
     """Return a list of tools."""
     from pydantic_ai.common_tools.duckduckgo import duckduckgo_search_tool  # noqa: PLC0415
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 265534100..e732e268e 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -25,7 +25,7 @@
 import typer
 
 from agent_cli import config, opts
-from agent_cli._tools import cleanup_advanced_memory, init_advanced_memory, tools
+from agent_cli._tools import cleanup_memory, init_memory, tools
 from agent_cli.cli import app
 from agent_cli.core import process
 from agent_cli.core.audio import setup_devices
@@ -69,20 +69,20 @@ def _get_conversation_id(history_cfg: config.History) -> str:
     return "default"
 
 
-def _try_init_advanced_memory(
-    advanced_memory_cfg: config.AdvancedMemory,
+def _try_init_memory(
+    memory_cfg: config.Memory,
     history_cfg: config.History,
     openai_llm_cfg: config.OpenAILLM,
     quiet: bool,
 ) -> object | None:
-    """Try to initialize the advanced memory system.
+    """Try to initialize the memory system.
 
     Returns the MemoryClient if successful, None otherwise.
     """
     from agent_cli.memory.client import MemoryClient  # noqa: PLC0415
 
     # Determine memory path
-    memory_path = advanced_memory_cfg.memory_path
+    memory_path = memory_cfg.memory_path
     if memory_path is None:
         if history_cfg.history_dir:
             memory_path = Path(history_cfg.history_dir).expanduser() / "vector_memory"
@@ -93,21 +93,21 @@ def _try_init_advanced_memory(
     openai_base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"
 
     if not quiet:
-        console.print("[dim]Initializing advanced memory system...[/dim]")
+        console.print("[dim]Initializing memory system...[/dim]")
 
     memory_client = MemoryClient(
         memory_path=memory_path,
         openai_base_url=openai_base_url,
-        embedding_model=advanced_memory_cfg.embedding_model,
+        embedding_model=memory_cfg.embedding_model,
         embedding_api_key=openai_llm_cfg.openai_api_key,
         chat_api_key=openai_llm_cfg.openai_api_key,
-        default_top_k=advanced_memory_cfg.top_k,
-        score_threshold=advanced_memory_cfg.score_threshold,
-        recency_weight=advanced_memory_cfg.recency_weight,
-        mmr_lambda=advanced_memory_cfg.mmr_lambda,
-        enable_summarization=advanced_memory_cfg.enable_summarization,
-        enable_git_versioning=advanced_memory_cfg.enable_git_versioning,
-        max_entries=advanced_memory_cfg.max_entries,
+        default_top_k=memory_cfg.top_k,
+        score_threshold=memory_cfg.score_threshold,
+        recency_weight=memory_cfg.recency_weight,
+        mmr_lambda=memory_cfg.mmr_lambda,
+        enable_summarization=memory_cfg.enable_summarization,
+        enable_git_versioning=memory_cfg.enable_git_versioning,
+        max_entries=memory_cfg.max_entries,
         start_watcher=False,
     )
 
@@ -116,14 +116,14 @@ def _try_init_advanced_memory(
 
     # Generate conversation ID and initialize tools
     conversation_id = _get_conversation_id(history_cfg)
-    init_advanced_memory(
+    init_memory(
         memory_client,
         conversation_id,
         asyncio.get_running_loop(),
     )
 
     if not quiet:
-        console.print("[green]Advanced memory system initialized[/green]")
+        console.print("[green]Memory system initialized[/green]")
 
     return memory_client
 
@@ -393,7 +393,7 @@ async def _async_main(
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
     gemini_tts_cfg: config.GeminiTTS,
-    advanced_memory_cfg: config.AdvancedMemory,
+    memory_cfg: config.Memory,
 ) -> None:
     """Main async function, consumes parsed arguments."""
     memory_client = None
@@ -407,26 +407,24 @@ async def _async_main(
         if audio_out_cfg.enable_tts:
             audio_out_cfg.output_device_index = tts_output_device_index
 
-        # Initialize advanced memory if enabled
-        if advanced_memory_cfg.enabled:
-            try:
-                memory_client = _try_init_advanced_memory(
-                    advanced_memory_cfg,
-                    history_cfg,
-                    openai_llm_cfg,
-                    general_cfg.quiet,
-                )
-            except (ImportError, Exception) as e:
-                msg = (
-                    "Advanced memory not available. Install with: uv pip install agent-cli[memory]"
-                    if isinstance(e, ImportError)
-                    else f"Failed to initialize advanced memory: {e}"
+        # Initialize memory system
+        try:
+            memory_client = _try_init_memory(
+                memory_cfg,
+                history_cfg,
+                openai_llm_cfg,
+                general_cfg.quiet,
+            )
+        except ImportError:
+            if not general_cfg.quiet:
+                console.print(
+                    "[yellow]Memory system not available. "
+                    "Install with: pip install 'agent-cli[memory]'[/yellow]",
                 )
-                if not general_cfg.quiet:
-                    console.print(f"[yellow]{msg}[/yellow]")
-                    console.print("[yellow]Falling back to simple memory system.[/yellow]")
-                if not isinstance(e, ImportError):
-                    LOGGER.warning("Failed to initialize advanced memory: %s", e)
+        except Exception as e:
+            if not general_cfg.quiet:
+                console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
+            LOGGER.warning("Failed to initialize memory: %s", e)
 
         # Load conversation history
         conversation_history = []
@@ -471,9 +469,9 @@ async def _async_main(
             console.print_exception()
         raise
     finally:
-        # Clean up advanced memory client
+        # Clean up memory client
         if memory_client is not None:
-            await cleanup_advanced_memory()
+            await cleanup_memory()
 
 
 @app.command("chat")
@@ -536,8 +534,7 @@ def chat(
         " Set to 0 to disable history.",
         rich_help_panel="History Options",
     ),
-    # --- Advanced Memory Options ---
-    advanced_memory: bool = opts.ADVANCED_MEMORY,
+    # --- Memory Options ---
     memory_path: Path | None = opts.MEMORY_PATH,
     memory_embedding_model: str = opts.MEMORY_EMBEDDING_MODEL,
     memory_top_k: int = opts.MEMORY_TOP_K,
@@ -644,8 +641,7 @@ def chat(
             history_dir=history_dir,
             last_n_messages=last_n_messages,
         )
-        advanced_memory_cfg = config.AdvancedMemory(
-            enabled=advanced_memory,
+        memory_cfg = config.Memory(
             memory_path=memory_path,
             embedding_model=memory_embedding_model,
             top_k=memory_top_k,
@@ -669,6 +665,6 @@ def chat(
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
-                advanced_memory_cfg=advanced_memory_cfg,
+                memory_cfg=memory_cfg,
             ),
         )
diff --git a/agent_cli/config.py b/agent_cli/config.py
index 7230153de..0bb48ebcb 100644
--- a/agent_cli/config.py
+++ b/agent_cli/config.py
@@ -224,17 +224,16 @@ def _expand_user_path(cls, v: str | None) -> Path | None:
         return None
 
 
-# --- Panel: Advanced Memory Options ---
+# --- Panel: Memory Options ---
 
 
-class AdvancedMemory(BaseModel):
-    """Configuration for advanced vector-backed memory system.
+class Memory(BaseModel):
+    """Configuration for the vector-backed memory system.
 
-    The advanced memory system uses ChromaDB with vector embeddings for
-    semantic search, providing better retrieval than simple keyword matching.
+    The memory system uses ChromaDB with vector embeddings for semantic search,
+    recency-aware scoring, and automatic fact reconciliation.
     """
 
-    enabled: bool = True
     memory_path: Path | None = None
     embedding_model: str = "text-embedding-3-small"
     top_k: int = 5
diff --git a/agent_cli/opts.py b/agent_cli/opts.py
index 1cd8229d0..d2643ea7b 100644
--- a/agent_cli/opts.py
+++ b/agent_cli/opts.py
@@ -381,18 +381,11 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
     rich_help_panel="General Options",
 )
 
-# --- Advanced Memory Options ---
-ADVANCED_MEMORY: bool = typer.Option(
-    True,  # noqa: FBT003
-    "--advanced-memory/--no-advanced-memory",
-    help="Use advanced vector-backed memory with semantic search. "
-    "Auto-falls back to simple memory if dependencies not installed.",
-    rich_help_panel="Memory Options",
-)
+# --- Memory Options ---
 MEMORY_PATH: Path | None = typer.Option(
     None,
     "--memory-path",
-    help="Path for advanced memory database storage. Default: ~/.config/agent-cli/memory/vector_db",
+    help="Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db",
     rich_help_panel="Memory Options",
 )
 MEMORY_EMBEDDING_MODEL: str = typer.Option(
diff --git a/docs/architecture/memory.md b/docs/architecture/memory.md
index 3d804bfb0..6a70e50ff 100644
--- a/docs/architecture/memory.md
+++ b/docs/architecture/memory.md
@@ -39,7 +39,7 @@ A local-first system that gives LLMs persistent memory across conversations, wit
 
 ### Related
 
-- [chat command](../commands/chat.md) - Voice-based chat agent with integrated advanced memory
+- [chat command](../commands/chat.md) - Voice-based chat agent with integrated memory
 - [memory command](../commands/memory.md) - Memory proxy server for any OpenAI-compatible app
 - [Configuration](../configuration.md) - Config file keys and defaults
 - [RAG System Architecture](rag.md) - Related retrieval stack for documents
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index 7d34fc14e..719d35325 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -169,8 +169,7 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 
 | Option | Default | Description |
 |--------|---------|-------------|
-| `--advanced-memory/--no-advanced-memory` | `true` | Use advanced vector-backed memory with semantic search. Auto-falls back to simple memory if dependencies not installed. |
-| `--memory-path` | - | Path for advanced memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
 | `--memory-embedding-model` | `text-embedding-3-small` | Embedding model for semantic memory search. |
 | `--memory-top-k` | `5` | Number of memories to retrieve per search. |
 | `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
@@ -193,9 +192,7 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 
 The chat agent includes a built-in long-term memory system that allows it to remember information across conversations.
 
-### Advanced Memory (Default)
-
-By default, the chat agent uses the **advanced vector-backed memory system** with semantic search. This provides:
+The memory system uses a **vector-backed architecture** with semantic search. This provides:
 
 - **Semantic search**: Find relevant memories based on meaning, not just keywords
 - **Recency-aware scoring**: Recent memories are weighted higher
@@ -203,20 +200,11 @@ By default, the chat agent uses the **advanced vector-backed memory system** wit
 - **Automatic reconciliation**: Contradicting facts are updated, not duplicated
 
 > [!NOTE]
-> Advanced memory requires the `[memory]` extra: `pip install "agent-cli[memory]"`.
-> If not installed, the system automatically falls back to simple JSON storage with a warning.
-
-To disable advanced memory and use the simple JSON system:
-```bash
-agent-cli chat --no-advanced-memory
-```
+> The memory system requires the `[memory]` extra: `pip install "agent-cli[memory]"`.
+> If not installed, memory tools will not be available.
 
 For more details on how the memory system works, see [Memory System Architecture](../architecture/memory.md).
 
-### Simple Memory Fallback
-
-When advanced memory is disabled or unavailable, the agent uses a simple JSON-based storage system with text matching.
-
 ## Available Tools
 
 The chat agent has access to tools that let it interact with your system:
@@ -224,7 +212,7 @@ The chat agent has access to tools that let it interact with your system:
 - **read_file**: Read file contents
 - **execute_code**: Run a single command (no shell features like pipes or redirects)
 - **duckduckgo_search**: Search the web via DuckDuckGo
-- **add_memory**: Store information for future conversations (uses [advanced memory](../architecture/memory.md) when enabled)
+- **add_memory**: Store information for future conversations (uses [vector memory](../architecture/memory.md))
 - **search_memory**: Search stored memories with semantic search
 - **update_memory**: Update existing memories
 - **list_all_memories**: List all stored memories
diff --git a/docs/commands/memory.md b/docs/commands/memory.md
index 9d97a53a0..11428a7e1 100644
--- a/docs/commands/memory.md
+++ b/docs/commands/memory.md
@@ -221,7 +221,7 @@ See [Memory System Architecture](../architecture/memory.md) for the full schema
 
 ## Related
 
-- [chat command](chat.md) - Voice-based chat agent with integrated advanced memory
+- [chat command](chat.md) - Voice-based chat agent with integrated memory
 - [Memory System Architecture](../architecture/memory.md) - Full technical specification
 - [Configuration](../configuration.md) - Config file keys for memory proxy defaults
 - [rag-proxy](rag-proxy.md) - Document RAG proxy server (contrast with memory)
diff --git a/tests/agents/test_interactive.py b/tests/agents/test_interactive.py
index d25c79701..ea9de9474 100644
--- a/tests/agents/test_interactive.py
+++ b/tests/agents/test_interactive.py
@@ -140,7 +140,7 @@ async def test_async_main_list_devices(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
-            advanced_memory_cfg=config.AdvancedMemory(enabled=False),
+            memory_cfg=config.Memory(),
         )
         mock_setup_devices.assert_called_once()
 
@@ -210,7 +210,7 @@ async def test_async_main_list_output_devices(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
-            advanced_memory_cfg=config.AdvancedMemory(enabled=False),
+            memory_cfg=config.Memory(),
         )
         mock_setup_devices.assert_called_once()
 
@@ -304,7 +304,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
-            advanced_memory_cfg=config.AdvancedMemory(enabled=False),
+            memory_cfg=config.Memory(),
         )
 
         # Verify that the core functions were called
diff --git a/tests/agents/test_interactive_extra.py b/tests/agents/test_interactive_extra.py
index a51419a62..6f3c2d9dc 100644
--- a/tests/agents/test_interactive_extra.py
+++ b/tests/agents/test_interactive_extra.py
@@ -260,6 +260,6 @@ async def test_async_main_exception_handling():
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
-                advanced_memory_cfg=config.AdvancedMemory(enabled=False),
+                memory_cfg=config.Memory(),
             )
         mock_console.print_exception.assert_called_once()
diff --git a/tests/test_memory_tools.py b/tests/test_memory_tools.py
deleted file mode 100644
index 6c017b0a4..000000000
--- a/tests/test_memory_tools.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""Tests for the memory tools."""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest  # noqa: TC002
-
-from agent_cli import _tools
-
-
-def test_get_memory_file_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-    """Test the _get_memory_file_path function."""
-    # Test with AGENT_CLI_HISTORY_DIR set
-    history_dir = tmp_path / "history"
-    monkeypatch.setenv("AGENT_CLI_HISTORY_DIR", str(history_dir))
-    path = _tools._get_memory_file_path()
-    assert path == history_dir / "long_term_memory.json"
-
-    # Test without AGENT_CLI_HISTORY_DIR set
-    monkeypatch.delenv("AGENT_CLI_HISTORY_DIR", raising=False)
-    path = _tools._get_memory_file_path()
-    assert path == Path.home() / ".config" / "agent-cli" / "memory" / "long_term_memory.json"
-
-
-def test_load_and_save_memories(tmp_path: Path) -> None:
-    """Test the _load_memories and _save_memories functions."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Test loading from a non-existent file
-        memories = _tools._load_memories()
-        assert memories == []
-
-        # Test saving and then loading
-        memories_to_save = [{"id": 1, "content": "test"}]
-        _tools._save_memories(memories_to_save)
-
-        loaded_memories = _tools._load_memories()
-        assert loaded_memories == memories_to_save
-
-        # Verify the file content
-        with memory_file.open("r") as f:
-            assert json.load(f) == memories_to_save
-
-
-def test_add_and_search_memory(tmp_path: Path) -> None:
-    """Test the add_memory and search_memory functions."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Test searching in an empty memory
-        assert "No memories found" in _tools.search_memory("test")
-
-        # Test adding a memory
-        result = _tools.add_memory("test content", "test_category", "tag1, tag2")
-        assert "Memory added successfully with ID 1" in result
-
-        # Test searching for the new memory
-        search_result = _tools.search_memory("test content")
-        assert "ID: 1" in search_result
-        assert "Category: test_category" in search_result
-        assert "Content: test content" in search_result
-        assert "Tags: tag1, tag2" in search_result
-
-        # Test searching with a category filter
-        search_result_cat = _tools.search_memory("test", category="test_category")
-        assert "ID: 1" in search_result_cat
-
-        # Test searching with a non-matching category
-        search_result_no_cat = _tools.search_memory("test", category="wrong_category")
-        assert "No memories found" in search_result_no_cat
-
-
-def test_update_memory(tmp_path: Path) -> None:
-    """Test the update_memory function."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Add a memory to work with
-        _tools.add_memory("original content", "original_category", "original_tag")
-
-        # Test updating a non-existent memory
-        assert "not found" in _tools.update_memory(2, content="new")
-
-        # Test updating the existing memory
-        update_result = _tools.update_memory(1, content="new content", category="new_category")
-        assert "updated successfully" in update_result
-
-        # Verify the update
-        search_result = _tools.search_memory("new content")
-        assert "Category: new_category" in search_result
-
-
-def test_list_all_and_categories(tmp_path: Path) -> None:
-    """Test the list_all_memories and list_memory_categories functions."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Test with no memories
-        assert "No memories stored" in _tools.list_all_memories()
-        assert "No memories found" in _tools.list_memory_categories()
-
-        # Add some memories
-        _tools.add_memory("content1", "cat1", "tag1")
-        _tools.add_memory("content2", "cat2", "tag2")
-        _tools.add_memory("content3", "cat1", "tag3")
-
-        # Test list_all_memories
-        list_all_result = _tools.list_all_memories()
-        assert "Showing 3 of 3 total memories" in list_all_result
-        assert "ID: 1" in list_all_result
-        assert "ID: 2" in list_all_result
-        assert "ID: 3" in list_all_result
-
-        # Test list_memory_categories
-        list_cat_result = _tools.list_memory_categories()
-        assert "cat1: 2 memories" in list_cat_result
-        assert "cat2: 1 memories" in list_cat_result

From 731fac73b6e1b1ff802b53a96bb9b47ed88539c3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 4 Jan 2026 08:26:22 +0000
Subject: [PATCH 06/20] Update auto-generated docs

---
 README.md | 1828 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 974 insertions(+), 854 deletions(-)

diff --git a/README.md b/README.md
index 3efe0f4b9..615f95168 100644
--- a/README.md
+++ b/README.md
@@ -391,14 +391,14 @@ agent-cli config edit
 
  Manage agent-cli configuration files.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
-│ init   Create a new config file with all options commented out.                        │
-│ edit   Open the config file in your default editor.                                    │
-│ show   Display the config file location and contents.                                  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ───────────────────────────────────────────────────────────────────╮
+│ init   Create a new config file with all options commented out.              │
+│ edit   Open the config file in your default editor.                          │
+│ show   Display the config file location and contents.                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -457,49 +457,53 @@ the `[defaults]` section of your configuration file.
 
  Correct text from clipboard using a local or remote LLM.
 
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  The text to correct. If not provided, reads from clipboard.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  The text to correct. If not provided, reads from         │
+│                     clipboard.                                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -544,102 +548,120 @@ the `[defaults]` section of your configuration file.
 
  Wyoming ASR Client for streaming microphone audio to a transcription server.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --extra-instructions                TEXT  Additional instructions for the LLM to       │
-│                                           process the transcription.                   │
-│ --llm                   --no-llm          Use an LLM to process the transcript.        │
-│                                           [default: no-llm]                            │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
-│ --from-file                                PATH     Transcribe audio from a saved WAV  │
-│                                                     file instead of recording.         │
-│ --last-recording                           INTEGER  Transcribe a saved recording. Use  │
-│                                                     1 for most recent, 2 for           │
-│                                                     second-to-last, etc. Use 0 to      │
-│                                                     disable (default).                 │
-│                                                     [default: 0]                       │
-│ --save-recording    --no-save-recording             Save the audio recording to disk   │
-│                                                     for recovery.                      │
-│                                                     [default: save-recording]          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --clipboard              --no-clipboard          Copy result to clipboard.             │
-│                                                  [default: clipboard]                  │
-│ --log-level                                TEXT  Set logging level.                    │
-│                                                  [default: WARNING]                    │
-│ --log-file                                 TEXT  Path to a file to write logs to.      │
-│ --quiet              -q                          Suppress console output from rich.    │
-│ --config                                   TEXT  Path to a TOML configuration file.    │
-│ --print-args                                     Print the command line arguments,     │
-│                                                  including variables taken from the    │
-│                                                  configuration file.                   │
-│ --transcription-log                        PATH  Path to log transcription results     │
-│                                                  with timestamps, hostname, model, and │
-│                                                  raw output.                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --extra-instructions                TEXT  Additional instructions for the    │
+│                                           LLM to process the transcription.  │
+│ --llm                   --no-llm          Use an LLM to process the          │
+│                                           transcript.                        │
+│                                           [default: no-llm]                  │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Recovery ─────────────────────────────────────────────────────────────╮
+│ --from-file                                PATH     Transcribe audio from a  │
+│                                                     saved WAV file instead   │
+│                                                     of recording.            │
+│ --last-recording                           INTEGER  Transcribe a saved       │
+│                                                     recording. Use 1 for     │
+│                                                     most recent, 2 for       │
+│                                                     second-to-last, etc. Use │
+│                                                     0 to disable (default).  │
+│                                                     [default: 0]             │
+│ --save-recording    --no-save-recording             Save the audio recording │
+│                                                     to disk for recovery.    │
+│                                                     [default:                │
+│                                                     save-recording]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --clipboard              --no-clipboard          Copy result to clipboard.   │
+│                                                  [default: clipboard]        │
+│ --log-level                                TEXT  Set logging level.          │
+│                                                  [default: WARNING]          │
+│ --log-file                                 TEXT  Path to a file to write     │
+│                                                  logs to.                    │
+│ --quiet              -q                          Suppress console output     │
+│                                                  from rich.                  │
+│ --config                                   TEXT  Path to a TOML              │
+│                                                  configuration file.         │
+│ --print-args                                     Print the command line      │
+│                                                  arguments, including        │
+│                                                  variables taken from the    │
+│                                                  configuration file.         │
+│ --transcription-log                        PATH  Path to log transcription   │
+│                                                  results with timestamps,    │
+│                                                  hostname, model, and raw    │
+│                                                  output.                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -696,8 +718,9 @@ uv tool install "agent-cli[vad]"
 
  Run a continuous transcription daemon with voice activity detection.
 
- This command runs indefinitely, capturing audio from your microphone, detecting speech
- segments using Silero VAD, transcribing them, and logging results with timestamps.
+ This command runs indefinitely, capturing audio from your microphone,
+ detecting speech segments using Silero VAD, transcribing them, and logging
+ results with timestamps.
 
  Examples: # Basic daemon agent-cli transcribe-daemon
 
@@ -709,105 +732,121 @@ uv tool install "agent-cli[vad]"
   agent-cli transcribe-daemon --llm --role notes
 
   # Custom log file and audio directory
-  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
-
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --role               -r                     TEXT   Role name for logging (e.g.,        │
-│                                                    'meeting', 'notes', 'user').        │
-│                                                    [default: user]                     │
-│ --silence-threshold  -s                     FLOAT  Seconds of silence to end a speech  │
-│                                                    segment.                            │
-│                                                    [default: 1.0]                      │
-│ --min-segment        -m                     FLOAT  Minimum speech duration in seconds  │
-│                                                    to trigger a segment.               │
-│                                                    [default: 0.25]                     │
-│ --vad-threshold                             FLOAT  VAD speech detection threshold      │
-│                                                    (0.0-1.0). Higher = more aggressive │
-│                                                    filtering.                          │
-│                                                    [default: 0.3]                      │
-│ --save-audio             --no-save-audio           Save audio segments as MP3 files.   │
-│                                                    [default: save-audio]               │
-│ --audio-dir                                 PATH   Directory for MP3 files. Default:   │
-│                                                    ~/.config/agent-cli/audio           │
-│ --transcription-log  -t                     PATH   JSON Lines log file path. Default:  │
-│                                                    ~/.config/agent-cli/transcriptions… │
-│ --clipboard              --no-clipboard            Copy each transcription to          │
-│                                                    clipboard.                          │
-│                                                    [default: no-clipboard]             │
-│ --help               -h                            Show this message and exit.         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --llm    --no-llm      Use an LLM to process the transcript.                           │
-│                        [default: no-llm]                                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir
+  ~/audio
+
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --role               -r                     TEXT   Role name for logging     │
+│                                                    (e.g., 'meeting',         │
+│                                                    'notes', 'user').         │
+│                                                    [default: user]           │
+│ --silence-threshold  -s                     FLOAT  Seconds of silence to end │
+│                                                    a speech segment.         │
+│                                                    [default: 1.0]            │
+│ --min-segment        -m                     FLOAT  Minimum speech duration   │
+│                                                    in seconds to trigger a   │
+│                                                    segment.                  │
+│                                                    [default: 0.25]           │
+│ --vad-threshold                             FLOAT  VAD speech detection      │
+│                                                    threshold (0.0-1.0).      │
+│                                                    Higher = more aggressive  │
+│                                                    filtering.                │
+│                                                    [default: 0.3]            │
+│ --save-audio             --no-save-audio           Save audio segments as    │
+│                                                    MP3 files.                │
+│                                                    [default: save-audio]     │
+│ --audio-dir                                 PATH   Directory for MP3 files.  │
+│                                                    Default:                  │
+│                                                    ~/.config/agent-cli/audio │
+│ --transcription-log  -t                     PATH   JSON Lines log file path. │
+│                                                    Default:                  │
+│                                                    ~/.config/agent-cli/tran… │
+│ --clipboard              --no-clipboard            Copy each transcription   │
+│                                                    to clipboard.             │
+│                                                    [default: no-clipboard]   │
+│ --help               -h                            Show this message and     │
+│                                                    exit.                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --llm    --no-llm      Use an LLM to process the transcript.                 │
+│                        [default: no-llm]                                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -851,82 +890,89 @@ uv tool install "agent-cli[vad]"
 
  Convert text to speech using Wyoming or OpenAI-compatible TTS server.
 
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --output-device-index        INTEGER  Index of the audio output device to use for TTS. │
-│ --output-device-name         TEXT     Output device name keywords for partial          │
-│                                       matching.                                        │
-│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, 2.0 =     │
-│                                       twice as fast, 0.5 = half speed).                │
-│                                       [default: 1.0]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the            │
-│                               GEMINI_API_KEY environment variable.                     │
-│                               [env var: GEMINI_API_KEY]                                │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --list-devices          List available audio input and output devices and exit.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.                       │
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --output-device-index        INTEGER  Index of the audio output device to    │
+│                                       use for TTS.                           │
+│ --output-device-name         TEXT     Output device name keywords for        │
+│                                       partial matching.                      │
+│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, │
+│                                       2.0 = twice as fast, 0.5 = half        │
+│                                       speed).                                │
+│                                       [default: 1.0]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the  │
+│                               GEMINI_API_KEY environment variable.           │
+│                               [env var: GEMINI_API_KEY]                      │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --list-devices          List available audio input and output devices and    │
+│                         exit.                                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.             │
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -967,7 +1013,8 @@ uv tool install "agent-cli[vad]"
 
  Usage: agent-cli voice-edit [OPTIONS]
 
- Interact with clipboard text via a voice command using local or remote services.
+ Interact with clipboard text via a voice command using local or remote
+ services.
 
  Usage:
 
@@ -978,124 +1025,139 @@ uv tool install "agent-cli[vad]"
   • List output devices: agent-cli voice-edit --list-output-devices
   • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
-│                                 [default: whisper-1]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV file.         │
-│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
-│                                           [default: clipboard]                         │
-│ --log-level                         TEXT  Set logging level.                           │
-│                                           [default: WARNING]                           │
-│ --log-file                          TEXT  Path to a file to write logs to.             │
-│ --quiet       -q                          Suppress console output from rich.           │
-│ --config                            TEXT  Path to a TOML configuration file.           │
-│ --print-args                              Print the command line arguments, including  │
-│                                           variables taken from the configuration file. │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: whisper-1]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV     │
+│                                           file.                              │
+│ --clipboard       --no-clipboard          Copy result to clipboard.          │
+│                                           [default: clipboard]               │
+│ --log-level                         TEXT  Set logging level.                 │
+│                                           [default: WARNING]                 │
+│ --log-file                          TEXT  Path to a file to write logs to.   │
+│ --quiet       -q                          Suppress console output from rich. │
+│ --config                            TEXT  Path to a TOML configuration file. │
+│ --print-args                              Print the command line arguments,  │
+│                                           including variables taken from the │
+│                                           configuration file.                │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1141,133 +1203,148 @@ uv tool install "agent-cli[vad]"
 
  Wake word-based voice assistant using local or remote services.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
-│ --wake-server-ip          TEXT     Wyoming wake word server IP address.                │
-│                                    [default: localhost]                                │
-│ --wake-server-port        INTEGER  Wyoming wake word server port.                      │
-│                                    [default: 10400]                                    │
-│ --wake-word               TEXT     Name of wake word to detect (e.g., 'ok_nabu',       │
-│                                    'hey_jarvis').                                      │
-│                                    [default: ok_nabu]                                  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
-│                                 [default: whisper-1]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV file.         │
-│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
-│                                           [default: clipboard]                         │
-│ --log-level                         TEXT  Set logging level.                           │
-│                                           [default: WARNING]                           │
-│ --log-file                          TEXT  Path to a file to write logs to.             │
-│ --quiet       -q                          Suppress console output from rich.           │
-│ --config                            TEXT  Path to a TOML configuration file.           │
-│ --print-args                              Print the command line arguments, including  │
-│                                           variables taken from the configuration file. │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Wake Word ──────────────────────────────────────────────────────────────────╮
+│ --wake-server-ip          TEXT     Wyoming wake word server IP address.      │
+│                                    [default: localhost]                      │
+│ --wake-server-port        INTEGER  Wyoming wake word server port.            │
+│                                    [default: 10400]                          │
+│ --wake-word               TEXT     Name of wake word to detect (e.g.,        │
+│                                    'ok_nabu', 'hey_jarvis').                 │
+│                                    [default: ok_nabu]                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: whisper-1]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV     │
+│                                           file.                              │
+│ --clipboard       --no-clipboard          Copy result to clipboard.          │
+│                                           [default: clipboard]               │
+│ --log-level                         TEXT  Set logging level.                 │
+│                                           [default: WARNING]                 │
+│ --log-file                          TEXT  Path to a file to write logs to.   │
+│ --quiet       -q                          Suppress console output from rich. │
+│ --config                            TEXT  Path to a TOML configuration file. │
+│ --print-args                              Print the command line arguments,  │
+│                                           including variables taken from the │
+│                                           configuration file.                │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1320,144 +1397,162 @@ uv tool install "agent-cli[vad]"
 
  An chat agent that you can talk to.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ History Options ──────────────────────────────────────────────────────────────────────╮
-│ --history-dir            PATH     Directory to store conversation history.             │
-│                                   [default: ~/.config/agent-cli/history]               │
-│ --last-n-messages        INTEGER  Number of messages to include in the conversation    │
-│                                   history. Set to 0 to disable history.                │
-│                                   [default: 50]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Options ───────────────────────────────────────────────────────────────────────╮
-│ --memory-path                   PATH     Path for memory database storage. Default:    │
-│                                          ~/.config/agent-cli/memory/vector_db          │
-│ --memory-embedding-model        TEXT     Embedding model for semantic memory search.   │
-│                                          [default: text-embedding-3-small]             │
-│ --memory-top-k                  INTEGER  Number of memories to retrieve per search.    │
-│                                          [default: 5]                                  │
-│ --memory-score-threshold        FLOAT    Minimum relevance score threshold for memory  │
-│                                          retrieval (0.0-1.0).                          │
-│                                          [default: 0.35]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.                       │
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ History Options ────────────────────────────────────────────────────────────╮
+│ --history-dir            PATH     Directory to store conversation history.   │
+│                                   [default: ~/.config/agent-cli/history]     │
+│ --last-n-messages        INTEGER  Number of messages to include in the       │
+│                                   conversation history. Set to 0 to disable  │
+│                                   history.                                   │
+│                                   [default: 50]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path                   PATH     Path for memory database storage.   │
+│                                          Default:                            │
+│                                          ~/.config/agent-cli/memory/vector_… │
+│ --memory-embedding-model        TEXT     Embedding model for semantic memory │
+│                                          search.                             │
+│                                          [default: text-embedding-3-small]   │
+│ --memory-top-k                  INTEGER  Number of memories to retrieve per  │
+│                                          search.                             │
+│                                          [default: 5]                        │
+│ --memory-score-threshold        FLOAT    Minimum relevance score threshold   │
+│                                          for memory retrieval (0.0-1.0).     │
+│                                          [default: 0.35]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.             │
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1505,49 +1600,52 @@ uv tool install "agent-cli[vad]"
  Start the RAG (Retrieval-Augmented Generation) Proxy Server.
 
  This server watches a folder for documents, indexes them, and provides an
- OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
- relevant context from the documents.
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
-│ --docs-folder                      PATH     Folder to watch for documents              │
-│                                             [default: ./rag_docs]                      │
-│ --chroma-path                      PATH     Path to ChromaDB persistence directory     │
-│                                             [default: ./rag_db]                        │
-│ --limit                            INTEGER  Number of document chunks to retrieve per  │
-│                                             query.                                     │
-│                                             [default: 3]                               │
-│ --rag-tools      --no-rag-tools             Allow agent to fetch full documents when   │
-│                                             snippets are insufficient.                 │
-│                                             [default: rag-tools]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
-│                                llama-server: http://localhost:8080/v1).                │
-│                                [env var: OPENAI_BASE_URL]                              │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
-│                                OPENAI_API_KEY environment variable.                    │
-│                                [env var: OPENAI_API_KEY]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
-│                                [default: text-embedding-3-small]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                                 │
-│                        [default: 0.0.0.0]                                              │
-│ --port        INTEGER  Port to bind to                                                 │
-│                        [default: 8000]                                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                           │
-│                           [default: INFO]                                              │
-│ --config            TEXT  Path to a TOML configuration file.                           │
-│ --print-args              Print the command line arguments, including variables taken  │
-│                           from the configuration file.                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
+ injecting relevant context from the documents.
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ RAG Configuration ──────────────────────────────────────────────────────────╮
+│ --docs-folder                      PATH     Folder to watch for documents    │
+│                                             [default: ./rag_docs]            │
+│ --chroma-path                      PATH     Path to ChromaDB persistence     │
+│                                             directory                        │
+│                                             [default: ./rag_db]              │
+│ --limit                            INTEGER  Number of document chunks to     │
+│                                             retrieve per query.              │
+│                                             [default: 3]                     │
+│ --rag-tools      --no-rag-tools             Allow agent to fetch full        │
+│                                             documents when snippets are      │
+│                                             insufficient.                    │
+│                                             [default: rag-tools]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
+│                                (e.g., for llama-server:                      │
+│                                http://localhost:8080/v1).                    │
+│                                [env var: OPENAI_BASE_URL]                    │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
+│                                OPENAI_API_KEY environment variable.          │
+│                                [env var: OPENAI_API_KEY]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ───────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                       │
+│                        [default: 0.0.0.0]                                    │
+│ --port        INTEGER  Port to bind to                                       │
+│                        [default: 8000]                                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                 │
+│                           [default: INFO]                                    │
+│ --config            TEXT  Path to a TOML configuration file.                 │
+│ --print-args              Print the command line arguments, including        │
+│                           variables taken from the configuration file.       │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1603,91 +1701,107 @@ The `memory proxy` command is the core feature—a middleware server that gives
 
  Start the memory-backed chat proxy server.
 
- This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
- plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
+ This server acts as a middleware between your chat client (e.g., a web UI,
+ CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
+ Ollama, vLLM).
 
  Key Features:
 
-  • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
-    as the ultimate source of truth.
-  • Automatic Version Control: Built-in Git integration automatically commits changes,
-    providing a full history of memory evolution.
-  • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
-  • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
-    endpoint.
+  • Simple Markdown Files: Memories are stored as human-readable Markdown
+    files, serving as the ultimate source of truth.
+  • Automatic Version Control: Built-in Git integration automatically commits
+    changes, providing a full history of memory evolution.
+  • Lightweight & Local: Minimal dependencies and runs entirely on your
+    machine.
+  • Proxy Middleware: Works transparently with any OpenAI-compatible
+    /chat/completions endpoint.
 
  How it works:
 
   1 Intercepts POST /v1/chat/completions requests.
-  2 Retrieves relevant memories (facts, previous conversations) from a local vector
-    database (ChromaDB) based on the user's query.
+  2 Retrieves relevant memories (facts, previous conversations) from a local
+    vector database (ChromaDB) based on the user's query.
   3 Injects these memories into the system prompt.
   4 Forwards the augmented request to the real LLM (--openai-base-url).
-  5 Extracts new facts from the conversation in the background and updates the long-term
-    memory store (including handling contradictions).
-
- Use this to give "long-term memory" to any OpenAI-compatible application. Point your
- client's base URL to http://localhost:8100/v1.
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory store (files + │
-│                                                      derived vector index).            │
-│                                                      [default: ./memory_db]            │
-│ --default-top-k                             INTEGER  Number of memory entries to       │
-│                                                      retrieve per query.               │
-│                                                      [default: 5]                      │
-│ --max-entries                               INTEGER  Maximum stored memory entries per │
-│                                                      conversation (excluding summary). │
-│                                                      [default: 500]                    │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1): higher favors   │
-│                                                      relevance, lower favors           │
-│                                                      diversity.                        │
-│                                                      [default: 0.7]                    │
-│ --recency-weight                            FLOAT    Recency score weight (0.0-1.0).   │
-│                                                      Controls freshness vs. relevance. │
-│                                                      Default 0.2 (20% recency, 80%     │
-│                                                      semantic relevance).              │
-│                                                      [default: 0.2]                    │
-│ --score-threshold                           FLOAT    Minimum semantic relevance        │
-│                                                      threshold (0.0-1.0). Memories     │
-│                                                      below this score are discarded to │
-│                                                      reduce noise.                     │
-│                                                      [default: 0.35]                   │
-│ --summarization      --no-summarization              Enable automatic fact extraction  │
-│                                                      and summaries.                    │
-│                                                      [default: summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git commit of    │
-│                                                      memory changes.                   │
-│                                                      [default: git-versioning]         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
-│                                llama-server: http://localhost:8080/v1).                │
-│                                [env var: OPENAI_BASE_URL]                              │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
-│                                OPENAI_API_KEY environment variable.                    │
-│                                [env var: OPENAI_API_KEY]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
-│                                [default: text-embedding-3-small]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                                 │
-│                        [default: 0.0.0.0]                                              │
-│ --port        INTEGER  Port to bind to                                                 │
-│                        [default: 8100]                                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                           │
-│                           [default: INFO]                                              │
-│ --config            TEXT  Path to a TOML configuration file.                           │
-│ --print-args              Print the command line arguments, including variables taken  │
-│                           from the configuration file.                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+  5 Extracts new facts from the conversation in the background and updates the
+    long-term memory store (including handling contradictions).
+
+ Use this to give "long-term memory" to any OpenAI-compatible application.
+ Point your client's base URL to http://localhost:8100/v1.
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Configuration ───────────────────────────────────────────────────────╮
+│ --memory-path                               PATH     Path to the memory      │
+│                                                      store (files + derived  │
+│                                                      vector index).          │
+│                                                      [default: ./memory_db]  │
+│ --default-top-k                             INTEGER  Number of memory        │
+│                                                      entries to retrieve per │
+│                                                      query.                  │
+│                                                      [default: 5]            │
+│ --max-entries                               INTEGER  Maximum stored memory   │
+│                                                      entries per             │
+│                                                      conversation (excluding │
+│                                                      summary).               │
+│                                                      [default: 500]          │
+│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
+│                                                      higher favors           │
+│                                                      relevance, lower favors │
+│                                                      diversity.              │
+│                                                      [default: 0.7]          │
+│ --recency-weight                            FLOAT    Recency score weight    │
+│                                                      (0.0-1.0). Controls     │
+│                                                      freshness vs.           │
+│                                                      relevance. Default 0.2  │
+│                                                      (20% recency, 80%       │
+│                                                      semantic relevance).    │
+│                                                      [default: 0.2]          │
+│ --score-threshold                           FLOAT    Minimum semantic        │
+│                                                      relevance threshold     │
+│                                                      (0.0-1.0). Memories     │
+│                                                      below this score are    │
+│                                                      discarded to reduce     │
+│                                                      noise.                  │
+│                                                      [default: 0.35]         │
+│ --summarization      --no-summarization              Enable automatic fact   │
+│                                                      extraction and          │
+│                                                      summaries.              │
+│                                                      [default:               │
+│                                                      summarization]          │
+│ --git-versioning     --no-git-versioning             Enable automatic git    │
+│                                                      commit of memory        │
+│                                                      changes.                │
+│                                                      [default:               │
+│                                                      git-versioning]         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
+│                                (e.g., for llama-server:                      │
+│                                http://localhost:8080/v1).                    │
+│                                [env var: OPENAI_BASE_URL]                    │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
+│                                OPENAI_API_KEY environment variable.          │
+│                                [env var: OPENAI_API_KEY]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ───────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                       │
+│                        [default: 0.0.0.0]                                    │
+│ --port        INTEGER  Port to bind to                                       │
+│                        [default: 8100]                                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                 │
+│                           [default: INFO]                                    │
+│ --config            TEXT  Path to a TOML configuration file.                 │
+│ --print-args              Print the command line arguments, including        │
+│                           variables taken from the configuration file.       │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1738,11 +1852,11 @@ agent-cli memory add -c work "Project deadline is Friday"
 
  Add memories directly without LLM extraction.
 
- This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
- Useful for bulk imports or seeding memories.
+ This writes facts directly to the memory store, bypassing the LLM-based fact
+ extraction. Useful for bulk imports or seeding memories.
 
- The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
- be indexed on next memory proxy startup.
+ The memory proxy file watcher (if running) will auto-index new files.
+ Otherwise, they'll be indexed on next memory proxy startup.
 
  Examples::
 
@@ -1763,29 +1877,35 @@ agent-cli memory add -c work "Project deadline is Friday"
   agent-cli memory add -c work "Project deadline is Friday"
 
 
-╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
-│   memories      [MEMORIES]...  Memories to add. Each argument becomes one fact.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. Use '-'   │
-│                                                     for stdin. Supports JSON array,    │
-│                                                     JSON object with 'memories' key,   │
-│                                                     or plain text (one per line).      │
-│ --conversation-id  -c                         TEXT  Conversation ID to add memories    │
-│                                                     to.                                │
-│                                                     [default: default]                 │
-│ --memory-path                                 PATH  Path to the memory store.          │
-│                                                     [default: ./memory_db]             │
-│ --git-versioning       --no-git-versioning          Commit changes to git.             │
-│                                                     [default: git-versioning]          │
-│ --help             -h                               Show this message and exit.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│   memories      [MEMORIES]...  Memories to add. Each argument becomes one    │
+│                                fact.                                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --file             -f                         PATH  Read memories from file. │
+│                                                     Use '-' for stdin.       │
+│                                                     Supports JSON array,     │
+│                                                     JSON object with         │
+│                                                     'memories' key, or plain │
+│                                                     text (one per line).     │
+│ --conversation-id  -c                         TEXT  Conversation ID to add   │
+│                                                     memories to.             │
+│                                                     [default: default]       │
+│ --memory-path                                 PATH  Path to the memory       │
+│                                                     store.                   │
+│                                                     [default: ./memory_db]   │
+│ --git-versioning       --no-git-versioning          Commit changes to git.   │
+│                                                     [default:                │
+│                                                     git-versioning]          │
+│ --help             -h                               Show this message and    │
+│                                                     exit.                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 

From c098806fe20ca0163d5ebae9a04116dd1c2373b9 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 00:34:46 -0800
Subject: [PATCH 07/20] refactor(chat): remove globals and simplify memory
 tools

- Replace closure-based memory tools with MemoryTools class
- Pass memory_client and conversation_id directly to tools()
- Remove module-level globals (_memory_client, _conversation_id)
- Remove init_memory/cleanup_memory lifecycle functions
- Update chat.py to handle memory client lifecycle directly
- Add proper type hints using TYPE_CHECKING imports
- Update tests to pass new required parameters
---
 agent_cli/_tools.py                    | 433 +++++++++++--------------
 agent_cli/agents/chat.py               |  32 +-
 tests/agents/test_interactive.py       |   1 +
 tests/agents/test_interactive_extra.py |   4 +
 4 files changed, 209 insertions(+), 261 deletions(-)

diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index 5e4cc5add..295738a6e 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -2,73 +2,14 @@
 
 from __future__ import annotations
 
-import asyncio
 import subprocess
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from agent_cli.memory.client import MemoryClient
 
 
-# --- Memory System State ---
-# These module-level variables are set by init_memory() when the chat agent starts.
-
-_memory_client: MemoryClient | None = None
-_conversation_id: str = "default"
-_event_loop: asyncio.AbstractEventLoop | None = None
-
-
-def init_memory(
-    client: MemoryClient,
-    conversation_id: str = "default",
-    event_loop: asyncio.AbstractEventLoop | None = None,
-) -> None:
-    """Initialize the memory system.
-
-    Called by the chat agent on startup.
-
-    Args:
-        client: The MemoryClient instance to use for memory operations.
-        conversation_id: The conversation ID for scoping memories.
-        event_loop: The asyncio event loop for running async operations.
-
-    """
-    global _memory_client, _conversation_id, _event_loop
-    _memory_client = client
-    _conversation_id = conversation_id
-    _event_loop = event_loop
-
-
-async def cleanup_memory() -> None:
-    """Clean up the memory system.
-
-    Called when the chat agent exits.
-    """
-    global _memory_client, _event_loop
-    if _memory_client is not None:
-        await _memory_client.stop()
-        _memory_client = None
-    _event_loop = None
-
-
-def _run_async(coro: Any, timeout: float = 30.0) -> Any:
-    """Run an async coroutine from sync context using the stored event loop."""
-    if _event_loop is None:
-        msg = "Event loop not initialized for memory system"
-        raise RuntimeError(msg)
-
-    future = asyncio.run_coroutine_threadsafe(coro, _event_loop)
-    return future.result(timeout=timeout)
-
-
-def _check_memory_initialized() -> str | None:
-    """Check if memory is initialized. Returns error message if not, None if OK."""
-    if _memory_client is None:
-        return "Error: Memory system not initialized. Install with: pip install 'agent-cli[memory]'"
-    return None
-
-
 def read_file(path: str) -> str:
     """Read the content of a file.
 
@@ -105,226 +46,228 @@ def execute_code(code: str) -> str:
         return f"Error: Command not found: {code.split()[0]}"
 
 
-def add_memory(content: str, category: str = "general", tags: str = "") -> str:
-    """Add important information to long-term memory for future conversations.
-
-    Use this when the user shares:
-    - Personal information (name, job, location, family, etc.)
-    - Preferences (favorite foods, work style, communication preferences, etc.)
-    - Important facts they want remembered (birthdays, project details, goals, etc.)
-    - Tasks or commitments they mention
-
-    Always ask for permission before storing personal or sensitive information.
-
-    Args:
-        content: The specific information to remember (be descriptive and clear)
-        category: Type of memory - use "personal", "preferences", "facts", "tasks", "projects", or "general"
-        tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
-
-    Returns:
-        Confirmation message
-
-    """
-    if error := _check_memory_initialized():
-        return error
-
-    # Format content with metadata
-    formatted_content = f"[{category}] {content}"
+def _format_memory_content(content: str, category: str, tags: str) -> str:
+    """Format memory content with category and tags."""
+    formatted = f"[{category}] {content}"
     if tags:
-        formatted_content += f" (tags: {tags})"
-
-    try:
-        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))  # type: ignore[union-attr]
-        return "Memory added successfully."
-    except Exception as e:
-        return f"Error adding memory: {e}"
-
-
-def search_memory(query: str, category: str = "") -> str:
-    """Search long-term memory for relevant information before answering questions.
-
-    Use this tool:
-    - Before answering questions about the user's preferences, personal info, or past conversations
-    - When the user asks "what do you remember about..." or similar questions
-    - When you need context about the user's work, projects, or goals
-    - To check if you've discussed a topic before
-
-    This performs semantic search to find conceptually related information.
-
-    Args:
-        query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
-        category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
-
-    Returns:
-        Relevant memories found, or message if none found
-
-    """
-    if error := _check_memory_initialized():
-        return error
-
-    # Include category in search query if provided
-    search_query = f"{category} {query}" if category else query
+        formatted += f" (tags: {tags})"
+    return formatted
+
+
+class MemoryTools:
+    """Memory tools bound to a specific client and conversation."""
+
+    def __init__(
+        self,
+        memory_client: MemoryClient | None,
+        conversation_id: str = "default",
+    ) -> None:
+        self._client = memory_client
+        self._conversation_id = conversation_id
+
+    def _check(self) -> str | None:
+        if self._client is None:
+            return "Error: Memory system not initialized. Install with: pip install 'agent-cli[memory]'"
+        return None
+
+    async def add_memory(
+        self,
+        content: str,
+        category: str = "general",
+        tags: str = "",
+    ) -> str:
+        """Add important information to long-term memory for future conversations.
+
+        Use this when the user shares:
+        - Personal information (name, job, location, family, etc.)
+        - Preferences (favorite foods, work style, communication preferences, etc.)
+        - Important facts they want remembered (birthdays, project details, goals, etc.)
+        - Tasks or commitments they mention
+
+        Always ask for permission before storing personal or sensitive information.
+
+        Args:
+            content: The specific information to remember (be descriptive and clear)
+            category: Type of memory - use "personal", "preferences", "facts", "tasks", "projects", or "general"
+            tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
+
+        Returns:
+            Confirmation message
+
+        """
+        if error := self._check():
+            return error
+
+        try:
+            formatted = _format_memory_content(content, category, tags)
+            await self._client.add(formatted, conversation_id=self._conversation_id)  # type: ignore[union-attr]
+            return "Memory added successfully."
+        except Exception as e:
+            return f"Error adding memory: {e}"
+
+    async def search_memory(self, query: str, category: str = "") -> str:
+        """Search long-term memory for relevant information before answering questions.
+
+        Use this tool:
+        - Before answering questions about the user's preferences, personal info, or past conversations
+        - When the user asks "what do you remember about..." or similar questions
+        - When you need context about the user's work, projects, or goals
+        - To check if you've discussed a topic before
+
+        This performs semantic search to find conceptually related information.
+
+        Args:
+            query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
+            category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
+
+        Returns:
+            Relevant memories found, or message if none found
+
+        """
+        if error := self._check():
+            return error
+
+        search_query = f"{category} {query}" if category else query
+
+        try:
+            result = await self._client.search(search_query, conversation_id=self._conversation_id)  # type: ignore[union-attr]
+            if not result.entries:
+                return f"No memories found matching '{query}'"
+
+            lines = []
+            for entry in result.entries:
+                score_info = f" (relevance: {entry.score:.2f})" if entry.score else ""
+                lines.append(f"- {entry.content}{score_info}")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"Error searching memory: {e}"
+
+    def list_all_memories(self, limit: int = 10) -> str:
+        """List all memories with their details.
+
+        Use this tool:
+        - When the user asks "show me all my memories" or "list everything you remember"
+        - When they want to see what information is stored
+        - To provide a complete overview of stored information
+
+        Shows memories in reverse chronological order (newest first).
+
+        Args:
+            limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
+
+        Returns:
+            Formatted list of all memories
+
+        """
+        if error := self._check():
+            return error
+
+        try:
+            entries = self._client.list_all(  # type: ignore[union-attr]
+                conversation_id=self._conversation_id,
+                include_summary=False,
+            )
 
-    try:
-        result = _run_async(
-            _memory_client.search(search_query, conversation_id=_conversation_id),  # type: ignore[union-attr]
-        )
-        if not result.entries:
-            return f"No memories found matching '{query}'"
+            if not entries:
+                return "No memories stored yet."
 
-        # Format results with relevance scores
-        lines = []
-        for entry in result.entries:
-            score_info = f" (relevance: {entry.score:.2f})" if entry.score else ""
-            lines.append(f"- {entry.content}{score_info}")
-        return "\n".join(lines)
-    except Exception as e:
-        return f"Error searching memory: {e}"
+            entries_to_show = entries[:limit]
 
+            results = [f"Showing {len(entries_to_show)} of {len(entries)} total memories:\n"]
+            for entry in entries_to_show:
+                created_at = entry.get("created_at", "unknown")
+                role = entry.get("role", "memory")
+                content = entry.get("content", "")
+                results.append(f"- [{role}] {content} (created: {created_at})")
 
-def update_memory(memory_id: int, content: str = "", category: str = "", tags: str = "") -> str:
-    """Update an existing memory by adding new information.
+            if len(entries) > limit:
+                results.append(
+                    f"\n... and {len(entries) - limit} more memories. Use a higher limit to see more.",
+                )
 
-    Use this tool:
-    - When the user wants to correct or modify previously stored information
-    - When information has changed (e.g., job change, preference updates)
-    - When the user says "update my memory about..." or "change the memory where..."
+            return "\n".join(results)
+        except Exception as e:
+            return f"Error listing memories: {e}"
 
-    The memory system uses automatic reconciliation - adding new information will
-    update or replace related existing facts.
+    def list_memory_categories(self) -> str:
+        """List all memory categories and their counts to see what has been remembered.
 
-    Args:
-        memory_id: Not used - the system automatically reconciles memories
-        content: The updated content to store
-        category: Category for the memory (leave empty for "general")
-        tags: Comma-separated tags (leave empty for none)
+        Use this tool:
+        - When the user asks "what categories do you have?"
+        - To get a quick overview of memory organization
+        - When the user wants to know what types of information are stored
 
-    Returns:
-        Confirmation message
+        This provides a summary view before using list_all_memories for details.
 
-    """
-    _ = memory_id  # System uses reconciliation, not ID-based updates
+        Returns:
+            Summary of memory types with counts
 
-    if error := _check_memory_initialized():
-        return error
+        """
+        if error := self._check():
+            return error
 
-    if not content:
-        return "Please provide the updated content. The system will automatically reconcile it with existing memories."
+        try:
+            entries = self._client.list_all(  # type: ignore[union-attr]
+                conversation_id=self._conversation_id,
+                include_summary=False,
+            )
 
-    # Format content with metadata
-    formatted_content = f"[{category or 'general'}] {content}"
-    if tags:
-        formatted_content += f" (tags: {tags})"
+            if not entries:
+                return "No memories found."
 
-    try:
-        _run_async(_memory_client.add(formatted_content, conversation_id=_conversation_id))  # type: ignore[union-attr]
-        return "Memory updated successfully. The system has reconciled this information with existing memories."
-    except Exception as e:
-        return f"Error updating memory: {e}"
+            roles: dict[str, int] = {}
+            for entry in entries:
+                role = entry.get("role", "memory")
+                roles[role] = roles.get(role, 0) + 1
 
+            results = ["Memory Types:"]
+            for role, count in sorted(roles.items()):
+                results.append(f"- {role}: {count} entries")
 
-def list_all_memories(limit: int = 10) -> str:
-    """List all memories with their details.
+            return "\n".join(results)
+        except Exception as e:
+            return f"Error listing categories: {e}"
 
-    Use this tool:
-    - When the user asks "show me all my memories" or "list everything you remember"
-    - When they want to see what information is stored
-    - To provide a complete overview of stored information
 
-    Shows memories in reverse chronological order (newest first).
+def create_memory_tools(
+    memory_client: MemoryClient | None,
+    conversation_id: str = "default",
+) -> list:
+    """Create memory tools bound to a specific client and conversation.
 
     Args:
-        limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
+        memory_client: The MemoryClient instance, or None if not available.
+        conversation_id: The conversation ID for scoping memories.
 
     Returns:
-        Formatted list of all memories
+        List of pydantic_ai Tool objects for memory operations.
 
     """
-    if error := _check_memory_initialized():
-        return error
-
-    try:
-        entries = _memory_client.list_all(  # type: ignore[union-attr]
-            conversation_id=_conversation_id,
-            include_summary=False,
-        )
-
-        if not entries:
-            return "No memories stored yet."
-
-        # Limit results
-        entries_to_show = entries[:limit]
-
-        results = [f"Showing {len(entries_to_show)} of {len(entries)} total memories:\n"]
-        for entry in entries_to_show:
-            created_at = entry.get("created_at", "unknown")
-            role = entry.get("role", "memory")
-            content = entry.get("content", "")
-            results.append(f"- [{role}] {content} (created: {created_at})")
-
-        if len(entries) > limit:
-            results.append(
-                f"\n... and {len(entries) - limit} more memories. Use a higher limit to see more.",
-            )
-
-        return "\n".join(results)
-    except Exception as e:
-        return f"Error listing memories: {e}"
-
+    from pydantic_ai.tools import Tool  # noqa: PLC0415
 
-def list_memory_categories() -> str:
-    """List all memory categories and their counts to see what has been remembered.
+    mt = MemoryTools(memory_client, conversation_id)
+    return [
+        Tool(mt.add_memory),
+        Tool(mt.search_memory),
+        Tool(mt.list_all_memories),
+        Tool(mt.list_memory_categories),
+    ]
 
-    Use this tool:
-    - When the user asks "what categories do you have?"
-    - To get a quick overview of memory organization
-    - When the user wants to know what types of information are stored
 
-    This provides a summary view before using list_all_memories for details.
+def tools(memory_client: MemoryClient | None = None, conversation_id: str = "default") -> list:
+    """Return a list of all tools for the chat agent.
 
-    Returns:
-        Summary of memory types with counts
+    Args:
+        memory_client: The MemoryClient instance, or None if not available.
+        conversation_id: The conversation ID for scoping memories.
 
     """
-    if error := _check_memory_initialized():
-        return error
-
-    try:
-        entries = _memory_client.list_all(  # type: ignore[union-attr]
-            conversation_id=_conversation_id,
-            include_summary=False,
-        )
-
-        if not entries:
-            return "No memories found."
-
-        # Count by role (user, assistant, memory)
-        roles: dict[str, int] = {}
-        for entry in entries:
-            role = entry.get("role", "memory")
-            roles[role] = roles.get(role, 0) + 1
-
-        results = ["Memory Types:"]
-        for role, count in sorted(roles.items()):
-            results.append(f"- {role}: {count} entries")
-
-        return "\n".join(results)
-    except Exception as e:
-        return f"Error listing categories: {e}"
-
-
-def tools() -> list:
-    """Return a list of tools."""
     from pydantic_ai.common_tools.duckduckgo import duckduckgo_search_tool  # noqa: PLC0415
     from pydantic_ai.tools import Tool  # noqa: PLC0415
 
     return [
         Tool(read_file),
         Tool(execute_code),
-        Tool(add_memory),
-        Tool(search_memory),
-        Tool(update_memory),
-        Tool(list_all_memories),
-        Tool(list_memory_categories),
+        *create_memory_tools(memory_client, conversation_id),
         duckduckgo_search_tool(),
     ]
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index e732e268e..02e553da4 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -25,7 +25,7 @@
 import typer
 
 from agent_cli import config, opts
-from agent_cli._tools import cleanup_memory, init_memory, tools
+from agent_cli._tools import tools
 from agent_cli.cli import app
 from agent_cli.core import process
 from agent_cli.core.audio import setup_devices
@@ -50,6 +50,8 @@
 if TYPE_CHECKING:
     from rich.live import Live
 
+    from agent_cli.memory.client import MemoryClient
+
 
 LOGGER = logging.getLogger(__name__)
 
@@ -74,12 +76,12 @@ def _try_init_memory(
     history_cfg: config.History,
     openai_llm_cfg: config.OpenAILLM,
     quiet: bool,
-) -> object | None:
+) -> MemoryClient | None:
     """Try to initialize the memory system.
 
     Returns the MemoryClient if successful, None otherwise.
     """
-    from agent_cli.memory.client import MemoryClient  # noqa: PLC0415
+    from agent_cli.memory.client import MemoryClient as MemoryClientImpl  # noqa: PLC0415
 
     # Determine memory path
     memory_path = memory_cfg.memory_path
@@ -95,7 +97,7 @@ def _try_init_memory(
     if not quiet:
         console.print("[dim]Initializing memory system...[/dim]")
 
-    memory_client = MemoryClient(
+    memory_client = MemoryClientImpl(
         memory_path=memory_path,
         openai_base_url=openai_base_url,
         embedding_model=memory_cfg.embedding_model,
@@ -114,14 +116,6 @@ def _try_init_memory(
     # Start the memory client's file watcher
     memory_client.start()
 
-    # Generate conversation ID and initialize tools
-    conversation_id = _get_conversation_id(history_cfg)
-    init_memory(
-        memory_client,
-        conversation_id,
-        asyncio.get_running_loop(),
-    )
-
     if not quiet:
         console.print("[green]Memory system initialized[/green]")
 
@@ -149,8 +143,7 @@ class ConversationEntry(TypedDict):
 - execute_code: Execute a shell command.
 - add_memory: Add important information to long-term memory for future recall.
 - search_memory: Search your long-term memory for relevant information.
-- update_memory: Modify existing memories by ID when information changes.
-- list_all_memories: Show all stored memories with their IDs and details.
+- list_all_memories: Show all stored memories with their details.
 - list_memory_categories: See what types of information you've remembered.
 - duckduckgo_search: Search the web for current information.
 
@@ -223,6 +216,8 @@ async def _handle_conversation_turn(
     *,
     stop_event: InteractiveStopEvent,
     conversation_history: list[ConversationEntry],
+    memory_client: MemoryClient | None,
+    conversation_id: str,
     provider_cfg: config.ProviderSelection,
     general_cfg: config.General,
     history_cfg: config.History,
@@ -314,7 +309,7 @@ async def _handle_conversation_turn(
             openai_cfg=openai_llm_cfg,
             gemini_cfg=gemini_llm_cfg,
             logger=LOGGER,
-            tools=tools(),
+            tools=tools(memory_client, conversation_id),
             quiet=True,  # Suppress internal output since we're showing our own timer
             live=live,
         )
@@ -439,6 +434,9 @@ async def _async_main(
                 history_cfg.last_n_messages,
             )
 
+        # Generate conversation ID for memory scoping
+        conversation_id = _get_conversation_id(history_cfg)
+
         with (
             maybe_live(not general_cfg.quiet) as live,
             signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
@@ -447,6 +445,8 @@ async def _async_main(
                 await _handle_conversation_turn(
                     stop_event=stop_event,
                     conversation_history=conversation_history,
+                    memory_client=memory_client,
+                    conversation_id=conversation_id,
                     provider_cfg=provider_cfg,
                     general_cfg=general_cfg,
                     history_cfg=history_cfg,
@@ -471,7 +471,7 @@ async def _async_main(
     finally:
         # Clean up memory client
         if memory_client is not None:
-            await cleanup_memory()
+            await memory_client.stop()
 
 
 @app.command("chat")
diff --git a/tests/agents/test_interactive.py b/tests/agents/test_interactive.py
index ea9de9474..e51648ddc 100644
--- a/tests/agents/test_interactive.py
+++ b/tests/agents/test_interactive.py
@@ -267,6 +267,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
 
     with (
         patch("agent_cli.agents.chat.setup_devices", return_value=(1, "mock_input", 1)),
+        patch("agent_cli.agents.chat._try_init_memory", return_value=None),
         patch("agent_cli.agents.chat.asr.create_transcriber") as mock_create_transcriber,
         patch(
             "agent_cli.agents.chat.get_llm_response",
diff --git a/tests/agents/test_interactive_extra.py b/tests/agents/test_interactive_extra.py
index 6f3c2d9dc..db10a6019 100644
--- a/tests/agents/test_interactive_extra.py
+++ b/tests/agents/test_interactive_extra.py
@@ -68,6 +68,8 @@ async def test_handle_conversation_turn_no_llm_response():
         await _handle_conversation_turn(
             stop_event=stop_event,
             conversation_history=conversation_history,
+            memory_client=None,
+            conversation_id="test",
             provider_cfg=provider_cfg,
             general_cfg=general_cfg,
             history_cfg=history_cfg,
@@ -138,6 +140,8 @@ async def test_handle_conversation_turn_no_instruction():
         await _handle_conversation_turn(
             stop_event=stop_event,
             conversation_history=conversation_history,
+            memory_client=None,
+            conversation_id="test",
             provider_cfg=provider_cfg,
             general_cfg=general_cfg,
             history_cfg=history_cfg,

From f40fa09ab208de3626f44cf326c40338a2e39c8e Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 00:41:55 -0800
Subject: [PATCH 08/20] refactor(chat): remove list_memory_categories tool

The tool was misleading - it counted entries by internal role
(memory, user, assistant, summary) rather than user-facing
categories (personal, preferences, facts, etc.).
---
 agent_cli/_tools.py      | 40 ----------------------------------------
 agent_cli/agents/chat.py |  1 -
 docs/commands/chat.md    |  2 --
 3 files changed, 43 deletions(-)

diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index 295738a6e..180fa3f7d 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -189,45 +189,6 @@ def list_all_memories(self, limit: int = 10) -> str:
         except Exception as e:
             return f"Error listing memories: {e}"
 
-    def list_memory_categories(self) -> str:
-        """List all memory categories and their counts to see what has been remembered.
-
-        Use this tool:
-        - When the user asks "what categories do you have?"
-        - To get a quick overview of memory organization
-        - When the user wants to know what types of information are stored
-
-        This provides a summary view before using list_all_memories for details.
-
-        Returns:
-            Summary of memory types with counts
-
-        """
-        if error := self._check():
-            return error
-
-        try:
-            entries = self._client.list_all(  # type: ignore[union-attr]
-                conversation_id=self._conversation_id,
-                include_summary=False,
-            )
-
-            if not entries:
-                return "No memories found."
-
-            roles: dict[str, int] = {}
-            for entry in entries:
-                role = entry.get("role", "memory")
-                roles[role] = roles.get(role, 0) + 1
-
-            results = ["Memory Types:"]
-            for role, count in sorted(roles.items()):
-                results.append(f"- {role}: {count} entries")
-
-            return "\n".join(results)
-        except Exception as e:
-            return f"Error listing categories: {e}"
-
 
 def create_memory_tools(
     memory_client: MemoryClient | None,
@@ -250,7 +211,6 @@ def create_memory_tools(
         Tool(mt.add_memory),
         Tool(mt.search_memory),
         Tool(mt.list_all_memories),
-        Tool(mt.list_memory_categories),
     ]
 
 
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 02e553da4..6ecdd22de 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -144,7 +144,6 @@ class ConversationEntry(TypedDict):
 - add_memory: Add important information to long-term memory for future recall.
 - search_memory: Search your long-term memory for relevant information.
 - list_all_memories: Show all stored memories with their details.
-- list_memory_categories: See what types of information you've remembered.
 - duckduckgo_search: Search the web for current information.
 
 Memory Guidelines:
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index 719d35325..935cde8b1 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -214,9 +214,7 @@ The chat agent has access to tools that let it interact with your system:
 - **duckduckgo_search**: Search the web via DuckDuckGo
 - **add_memory**: Store information for future conversations (uses [vector memory](../architecture/memory.md))
 - **search_memory**: Search stored memories with semantic search
-- **update_memory**: Update existing memories
 - **list_all_memories**: List all stored memories
-- **list_memory_categories**: Show memory category summary
 
 ## Example Conversation
 

From 05edfdd418e22be8bdf747aff300bd1632ce5392 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 01:10:37 -0800
Subject: [PATCH 09/20] feat(chat): add --memory-mode option for memory control

Add memory mode selection with three options:
- off: Memory system disabled
- tools: LLM decides via add_memory/search_memory tools (default)
- auto: Automatic fact extraction after each conversation turn

The modes are mutually exclusive to avoid duplicate memory storage.
In "auto" mode, facts are automatically extracted from both user and
assistant messages without requiring explicit tool calls.

Resolves #184 as part of #183
---
 agent_cli/agents/chat.py               | 90 ++++++++++++++++++++------
 agent_cli/config.py                    |  9 +++
 agent_cli/memory/client.py             | 26 ++++++++
 agent_cli/opts.py                      |  6 ++
 tests/agents/test_interactive_extra.py |  2 +
 5 files changed, 114 insertions(+), 19 deletions(-)

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 6ecdd22de..28f81a3f0 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -122,6 +122,30 @@ def _try_init_memory(
     return memory_client
 
 
+def _maybe_init_memory(
+    memory_cfg: config.Memory,
+    history_cfg: config.History,
+    openai_llm_cfg: config.OpenAILLM,
+    quiet: bool,
+) -> MemoryClient | None:
+    """Initialize memory if mode is not 'off', handling errors gracefully."""
+    if memory_cfg.mode == "off":
+        return None
+    try:
+        return _try_init_memory(memory_cfg, history_cfg, openai_llm_cfg, quiet)
+    except ImportError:
+        if not quiet:
+            console.print(
+                "[yellow]Memory system not available. "
+                "Install with: pip install 'agent-cli[memory]'[/yellow]",
+            )
+    except Exception as e:
+        if not quiet:
+            console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
+        LOGGER.warning("Failed to initialize memory: %s", e)
+    return None
+
+
 # --- Conversation History ---
 
 
@@ -211,12 +235,36 @@ def _format_conversation_for_llm(history: list[ConversationEntry]) -> str:
     return "\n".join(formatted_lines)
 
 
+async def _maybe_extract_memories(
+    memory_cfg: config.Memory,
+    memory_client: MemoryClient | None,
+    instruction: str,
+    response_text: str,
+    conversation_id: str,
+    quiet: bool,
+) -> None:
+    """Extract memories in auto mode, silently skip otherwise."""
+    if memory_cfg.mode != "auto" or memory_client is None:
+        return
+    try:
+        await memory_client.extract_from_turn(
+            user_message=instruction,
+            assistant_message=response_text,
+            conversation_id=conversation_id,
+        )
+        if not quiet:
+            console.print("[dim]💾 Memory extraction complete[/dim]")
+    except Exception as e:
+        LOGGER.warning("Failed to extract memories: %s", e)
+
+
 async def _handle_conversation_turn(
     *,
     stop_event: InteractiveStopEvent,
     conversation_history: list[ConversationEntry],
     memory_client: MemoryClient | None,
     conversation_id: str,
+    memory_cfg: config.Memory,
     provider_cfg: config.ProviderSelection,
     general_cfg: config.General,
     history_cfg: config.History,
@@ -299,6 +347,8 @@ async def _handle_conversation_turn(
         quiet=general_cfg.quiet,
         stop_event=stop_event,
     ):
+        # Only include memory tools in "tools" mode
+        tool_memory_client = memory_client if memory_cfg.mode == "tools" else None
         response_text = await get_llm_response(
             system_prompt=SYSTEM_PROMPT,
             agent_instructions=AGENT_INSTRUCTIONS,
@@ -308,7 +358,7 @@ async def _handle_conversation_turn(
             openai_cfg=openai_llm_cfg,
             gemini_cfg=gemini_llm_cfg,
             logger=LOGGER,
-            tools=tools(memory_client, conversation_id),
+            tools=tools(tool_memory_client, conversation_id),
             quiet=True,  # Suppress internal output since we're showing our own timer
             live=live,
         )
@@ -336,6 +386,16 @@ async def _handle_conversation_turn(
         },
     )
 
+    # 5b. Auto-extract memories in "auto" mode
+    await _maybe_extract_memories(
+        memory_cfg,
+        memory_client,
+        instruction,
+        response_text,
+        conversation_id,
+        general_cfg.quiet,
+    )
+
     # 6. Save history
     if history_cfg.history_dir:
         history_path = Path(history_cfg.history_dir).expanduser()
@@ -401,24 +461,13 @@ async def _async_main(
         if audio_out_cfg.enable_tts:
             audio_out_cfg.output_device_index = tts_output_device_index
 
-        # Initialize memory system
-        try:
-            memory_client = _try_init_memory(
-                memory_cfg,
-                history_cfg,
-                openai_llm_cfg,
-                general_cfg.quiet,
-            )
-        except ImportError:
-            if not general_cfg.quiet:
-                console.print(
-                    "[yellow]Memory system not available. "
-                    "Install with: pip install 'agent-cli[memory]'[/yellow]",
-                )
-        except Exception as e:
-            if not general_cfg.quiet:
-                console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
-            LOGGER.warning("Failed to initialize memory: %s", e)
+        # Initialize memory system (if not disabled)
+        memory_client = _maybe_init_memory(
+            memory_cfg,
+            history_cfg,
+            openai_llm_cfg,
+            general_cfg.quiet,
+        )
 
         # Load conversation history
         conversation_history = []
@@ -446,6 +495,7 @@ async def _async_main(
                     conversation_history=conversation_history,
                     memory_client=memory_client,
                     conversation_id=conversation_id,
+                    memory_cfg=memory_cfg,
                     provider_cfg=provider_cfg,
                     general_cfg=general_cfg,
                     history_cfg=history_cfg,
@@ -534,6 +584,7 @@ def chat(
         rich_help_panel="History Options",
     ),
     # --- Memory Options ---
+    memory_mode: str = opts.MEMORY_MODE,
     memory_path: Path | None = opts.MEMORY_PATH,
     memory_embedding_model: str = opts.MEMORY_EMBEDDING_MODEL,
     memory_top_k: int = opts.MEMORY_TOP_K,
@@ -641,6 +692,7 @@ def chat(
             last_n_messages=last_n_messages,
         )
         memory_cfg = config.Memory(
+            mode=memory_mode,  # type: ignore[arg-type]
             memory_path=memory_path,
             embedding_model=memory_embedding_model,
             top_k=memory_top_k,
diff --git a/agent_cli/config.py b/agent_cli/config.py
index 0bb48ebcb..db17115d8 100644
--- a/agent_cli/config.py
+++ b/agent_cli/config.py
@@ -227,13 +227,22 @@ def _expand_user_path(cls, v: str | None) -> Path | None:
 # --- Panel: Memory Options ---
 
 
+MemoryMode = Literal["off", "tools", "auto"]
+
+
 class Memory(BaseModel):
     """Configuration for the vector-backed memory system.
 
     The memory system uses ChromaDB with vector embeddings for semantic search,
     recency-aware scoring, and automatic fact reconciliation.
+
+    Modes:
+        - off: Memory disabled
+        - tools: LLM decides via add_memory/search_memory tools (default)
+        - auto: Automatic extraction and injection each turn
     """
 
+    mode: MemoryMode = "tools"
     memory_path: Path | None = None
     embedding_model: str = "text-embedding-3-small"
     top_k: int = 5
diff --git a/agent_cli/memory/client.py b/agent_cli/memory/client.py
index 3ca4762f6..6e8d62671 100644
--- a/agent_cli/memory/client.py
+++ b/agent_cli/memory/client.py
@@ -137,6 +137,32 @@ async def add(
         )
         evict_if_needed(self.collection, self.memory_path, conversation_id, self.max_entries)
 
+    async def extract_from_turn(
+        self,
+        user_message: str,
+        assistant_message: str,
+        conversation_id: str = "default",
+        model: str = DEFAULT_OPENAI_MODEL,
+    ) -> None:
+        """Extract and store facts from a conversation turn.
+
+        This is used for automatic memory extraction mode, where facts are
+        extracted from both user and assistant messages after each turn.
+        """
+        await extract_and_store_facts_and_summaries(
+            collection=self.collection,
+            memory_root=self.memory_path,
+            conversation_id=conversation_id,
+            user_message=user_message,
+            assistant_message=assistant_message,
+            openai_base_url=self.openai_base_url,
+            api_key=self.chat_api_key,
+            model=model,
+            enable_git_versioning=self.enable_git_versioning,
+            enable_summarization=self.enable_summarization,
+        )
+        evict_if_needed(self.collection, self.memory_path, conversation_id, self.max_entries)
+
     async def search(
         self,
         query: str,
diff --git a/agent_cli/opts.py b/agent_cli/opts.py
index d2643ea7b..272ccf59c 100644
--- a/agent_cli/opts.py
+++ b/agent_cli/opts.py
@@ -382,6 +382,12 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
 )
 
 # --- Memory Options ---
+MEMORY_MODE: str = typer.Option(
+    "tools",
+    "--memory-mode",
+    help="Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction).",
+    rich_help_panel="Memory Options",
+)
 MEMORY_PATH: Path | None = typer.Option(
     None,
     "--memory-path",
diff --git a/tests/agents/test_interactive_extra.py b/tests/agents/test_interactive_extra.py
index db10a6019..dcb020877 100644
--- a/tests/agents/test_interactive_extra.py
+++ b/tests/agents/test_interactive_extra.py
@@ -70,6 +70,7 @@ async def test_handle_conversation_turn_no_llm_response():
             conversation_history=conversation_history,
             memory_client=None,
             conversation_id="test",
+            memory_cfg=config.Memory(),
             provider_cfg=provider_cfg,
             general_cfg=general_cfg,
             history_cfg=history_cfg,
@@ -142,6 +143,7 @@ async def test_handle_conversation_turn_no_instruction():
             conversation_history=conversation_history,
             memory_client=None,
             conversation_id="test",
+            memory_cfg=config.Memory(),
             provider_cfg=provider_cfg,
             general_cfg=general_cfg,
             history_cfg=history_cfg,

From 868cb7e38e5e7e6eae8e71176c114324fa14e441 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 4 Jan 2026 09:11:26 +0000
Subject: [PATCH 10/20] Update auto-generated docs

---
 README.md             | 4 ++++
 docs/commands/chat.md | 1 +
 2 files changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 615f95168..2d4a228e5 100644
--- a/README.md
+++ b/README.md
@@ -1530,6 +1530,10 @@ uv tool install "agent-cli[vad]"
 │                                   [default: 50]                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-mode                   TEXT     Memory mode: 'off' (disabled),      │
+│                                          'tools' (LLM decides via tools),    │
+│                                          'auto' (automatic extraction).      │
+│                                          [default: tools]                    │
 │ --memory-path                   PATH     Path for memory database storage.   │
 │                                          Default:                            │
 │                                          ~/.config/agent-cli/memory/vector_… │
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index 935cde8b1..b01245958 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -169,6 +169,7 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 
 | Option | Default | Description |
 |--------|---------|-------------|
+| `--memory-mode` | `tools` | Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction). |
 | `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
 | `--memory-embedding-model` | `text-embedding-3-small` | Embedding model for semantic memory search. |
 | `--memory-top-k` | `5` | Number of memories to retrieve per search. |

From d1ff04a93c1b488d03961644d624a8f767d8f156 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 01:11:39 -0800
Subject: [PATCH 11/20] refactor: address review comments - move hashlib import
 to top, simplify MemoryClient import

---
 agent_cli/agents/chat.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 28f81a3f0..2b1810b93 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -13,6 +13,7 @@
 from __future__ import annotations
 
 import asyncio
+import hashlib
 import json
 import logging
 import os
@@ -61,8 +62,6 @@ def _get_conversation_id(history_cfg: config.History) -> str:
 
     Uses a hash of the history directory path to ensure consistency across sessions.
     """
-    import hashlib  # noqa: PLC0415
-
     if history_cfg.history_dir:
         return hashlib.md5(
             str(Path(history_cfg.history_dir).resolve()).encode(),
@@ -81,7 +80,7 @@ def _try_init_memory(
 
     Returns the MemoryClient if successful, None otherwise.
     """
-    from agent_cli.memory.client import MemoryClient as MemoryClientImpl  # noqa: PLC0415
+    from agent_cli.memory.client import MemoryClient  # noqa: PLC0415
 
     # Determine memory path
     memory_path = memory_cfg.memory_path
@@ -97,7 +96,7 @@ def _try_init_memory(
     if not quiet:
         console.print("[dim]Initializing memory system...[/dim]")
 
-    memory_client = MemoryClientImpl(
+    memory_client = MemoryClient(
         memory_path=memory_path,
         openai_base_url=openai_base_url,
         embedding_model=memory_cfg.embedding_model,

From 73a07f3be7bd78aa8b7804d09da9f8384bebb25b Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 01:13:44 -0800
Subject: [PATCH 12/20] docs: regenerate auto-generated CLI help sections

---
 README.md | 1836 +++++++++++++++++++++++++----------------------------
 1 file changed, 858 insertions(+), 978 deletions(-)

diff --git a/README.md b/README.md
index 2d4a228e5..b5a1d56a3 100644
--- a/README.md
+++ b/README.md
@@ -391,14 +391,14 @@ agent-cli config edit
 
  Manage agent-cli configuration files.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Commands ───────────────────────────────────────────────────────────────────╮
-│ init   Create a new config file with all options commented out.              │
-│ edit   Open the config file in your default editor.                          │
-│ show   Display the config file location and contents.                        │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
+│ init   Create a new config file with all options commented out.                        │
+│ edit   Open the config file in your default editor.                                    │
+│ show   Display the config file location and contents.                                  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -457,53 +457,49 @@ the `[defaults]` section of your configuration file.
 
  Correct text from clipboard using a local or remote LLM.
 
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  The text to correct. If not provided, reads from         │
-│                     clipboard.                                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  The text to correct. If not provided, reads from clipboard.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -548,120 +544,102 @@ the `[defaults]` section of your configuration file.
 
  Wyoming ASR Client for streaming microphone audio to a transcription server.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --extra-instructions                TEXT  Additional instructions for the    │
-│                                           LLM to process the transcription.  │
-│ --llm                   --no-llm          Use an LLM to process the          │
-│                                           transcript.                        │
-│                                           [default: no-llm]                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Recovery ─────────────────────────────────────────────────────────────╮
-│ --from-file                                PATH     Transcribe audio from a  │
-│                                                     saved WAV file instead   │
-│                                                     of recording.            │
-│ --last-recording                           INTEGER  Transcribe a saved       │
-│                                                     recording. Use 1 for     │
-│                                                     most recent, 2 for       │
-│                                                     second-to-last, etc. Use │
-│                                                     0 to disable (default).  │
-│                                                     [default: 0]             │
-│ --save-recording    --no-save-recording             Save the audio recording │
-│                                                     to disk for recovery.    │
-│                                                     [default:                │
-│                                                     save-recording]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --clipboard              --no-clipboard          Copy result to clipboard.   │
-│                                                  [default: clipboard]        │
-│ --log-level                                TEXT  Set logging level.          │
-│                                                  [default: WARNING]          │
-│ --log-file                                 TEXT  Path to a file to write     │
-│                                                  logs to.                    │
-│ --quiet              -q                          Suppress console output     │
-│                                                  from rich.                  │
-│ --config                                   TEXT  Path to a TOML              │
-│                                                  configuration file.         │
-│ --print-args                                     Print the command line      │
-│                                                  arguments, including        │
-│                                                  variables taken from the    │
-│                                                  configuration file.         │
-│ --transcription-log                        PATH  Path to log transcription   │
-│                                                  results with timestamps,    │
-│                                                  hostname, model, and raw    │
-│                                                  output.                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --extra-instructions                TEXT  Additional instructions for the LLM to       │
+│                                           process the transcription.                   │
+│ --llm                   --no-llm          Use an LLM to process the transcript.        │
+│                                           [default: no-llm]                            │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
+│ --from-file                                PATH     Transcribe audio from a saved WAV  │
+│                                                     file instead of recording.         │
+│ --last-recording                           INTEGER  Transcribe a saved recording. Use  │
+│                                                     1 for most recent, 2 for           │
+│                                                     second-to-last, etc. Use 0 to      │
+│                                                     disable (default).                 │
+│                                                     [default: 0]                       │
+│ --save-recording    --no-save-recording             Save the audio recording to disk   │
+│                                                     for recovery.                      │
+│                                                     [default: save-recording]          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --clipboard              --no-clipboard          Copy result to clipboard.             │
+│                                                  [default: clipboard]                  │
+│ --log-level                                TEXT  Set logging level.                    │
+│                                                  [default: WARNING]                    │
+│ --log-file                                 TEXT  Path to a file to write logs to.      │
+│ --quiet              -q                          Suppress console output from rich.    │
+│ --config                                   TEXT  Path to a TOML configuration file.    │
+│ --print-args                                     Print the command line arguments,     │
+│                                                  including variables taken from the    │
+│                                                  configuration file.                   │
+│ --transcription-log                        PATH  Path to log transcription results     │
+│                                                  with timestamps, hostname, model, and │
+│                                                  raw output.                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -718,9 +696,8 @@ uv tool install "agent-cli[vad]"
 
  Run a continuous transcription daemon with voice activity detection.
 
- This command runs indefinitely, capturing audio from your microphone,
- detecting speech segments using Silero VAD, transcribing them, and logging
- results with timestamps.
+ This command runs indefinitely, capturing audio from your microphone, detecting speech
+ segments using Silero VAD, transcribing them, and logging results with timestamps.
 
  Examples: # Basic daemon agent-cli transcribe-daemon
 
@@ -732,121 +709,105 @@ uv tool install "agent-cli[vad]"
   agent-cli transcribe-daemon --llm --role notes
 
   # Custom log file and audio directory
-  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir
-  ~/audio
-
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --role               -r                     TEXT   Role name for logging     │
-│                                                    (e.g., 'meeting',         │
-│                                                    'notes', 'user').         │
-│                                                    [default: user]           │
-│ --silence-threshold  -s                     FLOAT  Seconds of silence to end │
-│                                                    a speech segment.         │
-│                                                    [default: 1.0]            │
-│ --min-segment        -m                     FLOAT  Minimum speech duration   │
-│                                                    in seconds to trigger a   │
-│                                                    segment.                  │
-│                                                    [default: 0.25]           │
-│ --vad-threshold                             FLOAT  VAD speech detection      │
-│                                                    threshold (0.0-1.0).      │
-│                                                    Higher = more aggressive  │
-│                                                    filtering.                │
-│                                                    [default: 0.3]            │
-│ --save-audio             --no-save-audio           Save audio segments as    │
-│                                                    MP3 files.                │
-│                                                    [default: save-audio]     │
-│ --audio-dir                                 PATH   Directory for MP3 files.  │
-│                                                    Default:                  │
-│                                                    ~/.config/agent-cli/audio │
-│ --transcription-log  -t                     PATH   JSON Lines log file path. │
-│                                                    Default:                  │
-│                                                    ~/.config/agent-cli/tran… │
-│ --clipboard              --no-clipboard            Copy each transcription   │
-│                                                    to clipboard.             │
-│                                                    [default: no-clipboard]   │
-│ --help               -h                            Show this message and     │
-│                                                    exit.                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --llm    --no-llm      Use an LLM to process the transcript.                 │
-│                        [default: no-llm]                                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
+
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --role               -r                     TEXT   Role name for logging (e.g.,        │
+│                                                    'meeting', 'notes', 'user').        │
+│                                                    [default: user]                     │
+│ --silence-threshold  -s                     FLOAT  Seconds of silence to end a speech  │
+│                                                    segment.                            │
+│                                                    [default: 1.0]                      │
+│ --min-segment        -m                     FLOAT  Minimum speech duration in seconds  │
+│                                                    to trigger a segment.               │
+│                                                    [default: 0.25]                     │
+│ --vad-threshold                             FLOAT  VAD speech detection threshold      │
+│                                                    (0.0-1.0). Higher = more aggressive │
+│                                                    filtering.                          │
+│                                                    [default: 0.3]                      │
+│ --save-audio             --no-save-audio           Save audio segments as MP3 files.   │
+│                                                    [default: save-audio]               │
+│ --audio-dir                                 PATH   Directory for MP3 files. Default:   │
+│                                                    ~/.config/agent-cli/audio           │
+│ --transcription-log  -t                     PATH   JSON Lines log file path. Default:  │
+│                                                    ~/.config/agent-cli/transcriptions… │
+│ --clipboard              --no-clipboard            Copy each transcription to          │
+│                                                    clipboard.                          │
+│                                                    [default: no-clipboard]             │
+│ --help               -h                            Show this message and exit.         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --llm    --no-llm      Use an LLM to process the transcript.                           │
+│                        [default: no-llm]                                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -890,89 +851,82 @@ uv tool install "agent-cli[vad]"
 
  Convert text to speech using Wyoming or OpenAI-compatible TTS server.
 
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --output-device-index        INTEGER  Index of the audio output device to    │
-│                                       use for TTS.                           │
-│ --output-device-name         TEXT     Output device name keywords for        │
-│                                       partial matching.                      │
-│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, │
-│                                       2.0 = twice as fast, 0.5 = half        │
-│                                       speed).                                │
-│                                       [default: 1.0]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the  │
-│                               GEMINI_API_KEY environment variable.           │
-│                               [env var: GEMINI_API_KEY]                      │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --list-devices          List available audio input and output devices and    │
-│                         exit.                                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.             │
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --output-device-index        INTEGER  Index of the audio output device to use for TTS. │
+│ --output-device-name         TEXT     Output device name keywords for partial          │
+│                                       matching.                                        │
+│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, 2.0 =     │
+│                                       twice as fast, 0.5 = half speed).                │
+│                                       [default: 1.0]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the            │
+│                               GEMINI_API_KEY environment variable.                     │
+│                               [env var: GEMINI_API_KEY]                                │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --list-devices          List available audio input and output devices and exit.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.                       │
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1013,8 +967,7 @@ uv tool install "agent-cli[vad]"
 
  Usage: agent-cli voice-edit [OPTIONS]
 
- Interact with clipboard text via a voice command using local or remote
- services.
+ Interact with clipboard text via a voice command using local or remote services.
 
  Usage:
 
@@ -1025,139 +978,124 @@ uv tool install "agent-cli[vad]"
   • List output devices: agent-cli voice-edit --list-output-devices
   • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: whisper-1]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV     │
-│                                           file.                              │
-│ --clipboard       --no-clipboard          Copy result to clipboard.          │
-│                                           [default: clipboard]               │
-│ --log-level                         TEXT  Set logging level.                 │
-│                                           [default: WARNING]                 │
-│ --log-file                          TEXT  Path to a file to write logs to.   │
-│ --quiet       -q                          Suppress console output from rich. │
-│ --config                            TEXT  Path to a TOML configuration file. │
-│ --print-args                              Print the command line arguments,  │
-│                                           including variables taken from the │
-│                                           configuration file.                │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
+│                                 [default: whisper-1]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV file.         │
+│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
+│                                           [default: clipboard]                         │
+│ --log-level                         TEXT  Set logging level.                           │
+│                                           [default: WARNING]                           │
+│ --log-file                          TEXT  Path to a file to write logs to.             │
+│ --quiet       -q                          Suppress console output from rich.           │
+│ --config                            TEXT  Path to a TOML configuration file.           │
+│ --print-args                              Print the command line arguments, including  │
+│                                           variables taken from the configuration file. │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1203,148 +1141,133 @@ uv tool install "agent-cli[vad]"
 
  Wake word-based voice assistant using local or remote services.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Wake Word ──────────────────────────────────────────────────────────────────╮
-│ --wake-server-ip          TEXT     Wyoming wake word server IP address.      │
-│                                    [default: localhost]                      │
-│ --wake-server-port        INTEGER  Wyoming wake word server port.            │
-│                                    [default: 10400]                          │
-│ --wake-word               TEXT     Name of wake word to detect (e.g.,        │
-│                                    'ok_nabu', 'hey_jarvis').                 │
-│                                    [default: ok_nabu]                        │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: whisper-1]                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV     │
-│                                           file.                              │
-│ --clipboard       --no-clipboard          Copy result to clipboard.          │
-│                                           [default: clipboard]               │
-│ --log-level                         TEXT  Set logging level.                 │
-│                                           [default: WARNING]                 │
-│ --log-file                          TEXT  Path to a file to write logs to.   │
-│ --quiet       -q                          Suppress console output from rich. │
-│ --config                            TEXT  Path to a TOML configuration file. │
-│ --print-args                              Print the command line arguments,  │
-│                                           including variables taken from the │
-│                                           configuration file.                │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
+│ --wake-server-ip          TEXT     Wyoming wake word server IP address.                │
+│                                    [default: localhost]                                │
+│ --wake-server-port        INTEGER  Wyoming wake word server port.                      │
+│                                    [default: 10400]                                    │
+│ --wake-word               TEXT     Name of wake word to detect (e.g., 'ok_nabu',       │
+│                                    'hey_jarvis').                                      │
+│                                    [default: ok_nabu]                                  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
+│                                 [default: whisper-1]                                   │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV file.         │
+│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
+│                                           [default: clipboard]                         │
+│ --log-level                         TEXT  Set logging level.                           │
+│                                           [default: WARNING]                           │
+│ --log-file                          TEXT  Path to a file to write logs to.             │
+│ --quiet       -q                          Suppress console output from rich.           │
+│ --config                            TEXT  Path to a TOML configuration file.           │
+│ --print-args                              Print the command line arguments, including  │
+│                                           variables taken from the configuration file. │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1397,166 +1320,148 @@ uv tool install "agent-cli[vad]"
 
  An chat agent that you can talk to.
 
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ─────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
-│                             'gemini').                                       │
-│                             [default: wyoming]                               │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
-│                             'gemini').                                       │
-│                             [default: ollama]                                │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
-│                             'kokoro', 'gemini').                             │
-│                             [default: wyoming]                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use. │
-│ --input-device-name         TEXT     Device name keywords for partial        │
-│                                      matching.                               │
-│ --list-devices                       List available audio input and output   │
-│                                      devices and exit.                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
-│                                    [default: localhost]                      │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
-│                                    [default: 10300]                          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
-│                                    (transcription).                          │
-│                                    [default: whisper-1]                      │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
-│                                    API (e.g., for custom Whisper server:     │
-│                                    http://localhost:9898).                   │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
-│                                    (optional).                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
-│                                 (transcription).                             │
-│                                 [default: gemini-3-flash-preview]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
-│                                 gemma3:4b.                                   │
-│                                 [default: gemma3:4b]                         │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
-│                                 http://localhost:11434.                      │
-│                                 [default: http://localhost:11434]            │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
-│                                 [default: gpt-5-mini]                        │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
-│                                 the OPENAI_API_KEY environment variable.     │
-│                                 [env var: OPENAI_API_KEY]                    │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
-│                                 (e.g., for llama-server:                     │
-│                                 http://localhost:8080/v1).                   │
-│                                 [env var: OPENAI_BASE_URL]                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
-│                                 [default: gemini-3-flash-preview]            │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
-│                                 the GEMINI_API_KEY environment variable.     │
-│                                 [env var: GEMINI_API_KEY]                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ───────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for      │
-│                                               responses.                     │
-│                                               [default: no-tts]              │
-│ --output-device-index                INTEGER  Index of the audio output      │
-│                                               device to use for TTS.         │
-│ --output-device-name                 TEXT     Output device name keywords    │
-│                                               for partial matching.          │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
-│                                               normal, 2.0 = twice as fast,   │
-│                                               0.5 = half speed).             │
-│                                               [default: 1.0]                 │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
-│                                        [default: localhost]                  │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
-│                                        [default: 10200]                      │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
-│                                        (e.g., 'en_US-lessac-medium').        │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
-│                                        'en_US').                             │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
-│                                    [default: tts-1]                          │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
-│                                    TTS.                                      │
-│                                    [default: alloy]                          │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
-│                                    API (e.g., http://localhost:8000/v1 for a │
-│                                    proxy).                                   │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
-│                                 [default: kokoro]                            │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
-│                                 [default: af_sky]                            │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
-│                                 [default: http://localhost:8880/v1]          │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
-│                                 [default: gemini-2.5-flash-preview-tts]      │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
-│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
-│                                 [default: Kore]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ─────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                       │
-│ --status          Check if a background process is running.                  │
-│ --toggle          Toggle the background process on/off. If the process is    │
-│                   running, it will be stopped. If the process is not         │
-│                   running, it will be started.                               │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ History Options ────────────────────────────────────────────────────────────╮
-│ --history-dir            PATH     Directory to store conversation history.   │
-│                                   [default: ~/.config/agent-cli/history]     │
-│ --last-n-messages        INTEGER  Number of messages to include in the       │
-│                                   conversation history. Set to 0 to disable  │
-│                                   history.                                   │
-│                                   [default: 50]                              │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Options ─────────────────────────────────────────────────────────────╮
-│ --memory-mode                   TEXT     Memory mode: 'off' (disabled),      │
-│                                          'tools' (LLM decides via tools),    │
-│                                          'auto' (automatic extraction).      │
-│                                          [default: tools]                    │
-│ --memory-path                   PATH     Path for memory database storage.   │
-│                                          Default:                            │
-│                                          ~/.config/agent-cli/memory/vector_… │
-│ --memory-embedding-model        TEXT     Embedding model for semantic memory │
-│                                          search.                             │
-│                                          [default: text-embedding-3-small]   │
-│ --memory-top-k                  INTEGER  Number of memories to retrieve per  │
-│                                          search.                             │
-│                                          [default: 5]                        │
-│ --memory-score-threshold        FLOAT    Minimum relevance score threshold   │
-│                                          for memory retrieval (0.0-1.0).     │
-│                                          [default: 0.35]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.             │
-│ --log-level           TEXT  Set logging level.                               │
-│                             [default: WARNING]                               │
-│ --log-file            TEXT  Path to a file to write logs to.                 │
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
+│                             [default: wyoming]                                         │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
+│                             [default: ollama]                                          │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
+│                             'gemini').                                                 │
+│                             [default: wyoming]                                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use.           │
+│ --input-device-name         TEXT     Device name keywords for partial matching.        │
+│ --list-devices                       List available audio input and output devices and │
+│                                      exit.                                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
+│                                    [default: localhost]                                │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
+│                                    [default: 10300]                                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
+│                                    [default: whisper-1]                                │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
+│                                    (e.g., for custom Whisper server:                   │
+│                                    http://localhost:9898).                             │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
+│                                 [default: gemini-3-flash-preview]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
+│                                 [default: gemma3:4b]                                   │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
+│                                 http://localhost:11434.                                │
+│                                 [default: http://localhost:11434]                      │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
+│                                 [default: gpt-5-mini]                                  │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
+│                                 OPENAI_API_KEY environment variable.                   │
+│                                 [env var: OPENAI_API_KEY]                              │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
+│                                 llama-server: http://localhost:8080/v1).               │
+│                                 [env var: OPENAI_BASE_URL]                             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
+│                                 [default: gemini-3-flash-preview]                      │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
+│                                 GEMINI_API_KEY environment variable.                   │
+│                                 [env var: GEMINI_API_KEY]                              │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for responses.     │
+│                                               [default: no-tts]                        │
+│ --output-device-index                INTEGER  Index of the audio output device to use  │
+│                                               for TTS.                                 │
+│ --output-device-name                 TEXT     Output device name keywords for partial  │
+│                                               matching.                                │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
+│                                               2.0 = twice as fast, 0.5 = half speed).  │
+│                                               [default: 1.0]                           │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
+│                                        [default: localhost]                            │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
+│                                        [default: 10200]                                │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
+│                                        'en_US-lessac-medium').                         │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
+│                                    [default: tts-1]                                    │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
+│                                    [default: alloy]                                    │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
+│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
+│                                 [default: kokoro]                                      │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
+│                                 [default: af_sky]                                      │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
+│                                 [default: http://localhost:8880/v1]                    │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
+│                                 [default: gemini-2.5-flash-preview-tts]                │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
+│                                 'Charon', 'Fenrir').                                   │
+│                                 [default: Kore]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ───────────────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                                 │
+│ --status          Check if a background process is running.                            │
+│ --toggle          Toggle the background process on/off. If the process is running, it  │
+│                   will be stopped. If the process is not running, it will be started.  │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ History Options ──────────────────────────────────────────────────────────────────────╮
+│ --history-dir            PATH     Directory to store conversation history.             │
+│                                   [default: ~/.config/agent-cli/history]               │
+│ --last-n-messages        INTEGER  Number of messages to include in the conversation    │
+│                                   history. Set to 0 to disable history.                │
+│                                   [default: 50]                                        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ───────────────────────────────────────────────────────────────────────╮
+│ --memory-mode                   TEXT     Memory mode: 'off' (disabled), 'tools' (LLM   │
+│                                          decides via tools), 'auto' (automatic         │
+│                                          extraction).                                  │
+│                                          [default: tools]                              │
+│ --memory-path                   PATH     Path for memory database storage. Default:    │
+│                                          ~/.config/agent-cli/memory/vector_db          │
+│ --memory-embedding-model        TEXT     Embedding model for semantic memory search.   │
+│                                          [default: text-embedding-3-small]             │
+│ --memory-top-k                  INTEGER  Number of memories to retrieve per search.    │
+│                                          [default: 5]                                  │
+│ --memory-score-threshold        FLOAT    Minimum relevance score threshold for memory  │
+│                                          retrieval (0.0-1.0).                          │
+│                                          [default: 0.35]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.                       │
+│ --log-level           TEXT  Set logging level.                                         │
+│                             [default: WARNING]                                         │
+│ --log-file            TEXT  Path to a file to write logs to.                           │
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1604,52 +1509,49 @@ uv tool install "agent-cli[vad]"
  Start the RAG (Retrieval-Augmented Generation) Proxy Server.
 
  This server watches a folder for documents, indexes them, and provides an
- OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
- injecting relevant context from the documents.
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ RAG Configuration ──────────────────────────────────────────────────────────╮
-│ --docs-folder                      PATH     Folder to watch for documents    │
-│                                             [default: ./rag_docs]            │
-│ --chroma-path                      PATH     Path to ChromaDB persistence     │
-│                                             directory                        │
-│                                             [default: ./rag_db]              │
-│ --limit                            INTEGER  Number of document chunks to     │
-│                                             retrieve per query.              │
-│                                             [default: 3]                     │
-│ --rag-tools      --no-rag-tools             Allow agent to fetch full        │
-│                                             documents when snippets are      │
-│                                             insufficient.                    │
-│                                             [default: rag-tools]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
-│                                (e.g., for llama-server:                      │
-│                                http://localhost:8080/v1).                    │
-│                                [env var: OPENAI_BASE_URL]                    │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
-│                                OPENAI_API_KEY environment variable.          │
-│                                [env var: OPENAI_API_KEY]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
-│                                [default: text-embedding-3-small]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ───────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                       │
-│                        [default: 0.0.0.0]                                    │
-│ --port        INTEGER  Port to bind to                                       │
-│                        [default: 8000]                                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                 │
-│                           [default: INFO]                                    │
-│ --config            TEXT  Path to a TOML configuration file.                 │
-│ --print-args              Print the command line arguments, including        │
-│                           variables taken from the configuration file.       │
-╰──────────────────────────────────────────────────────────────────────────────╯
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
+ relevant context from the documents.
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
+│ --docs-folder                      PATH     Folder to watch for documents              │
+│                                             [default: ./rag_docs]                      │
+│ --chroma-path                      PATH     Path to ChromaDB persistence directory     │
+│                                             [default: ./rag_db]                        │
+│ --limit                            INTEGER  Number of document chunks to retrieve per  │
+│                                             query.                                     │
+│                                             [default: 3]                               │
+│ --rag-tools      --no-rag-tools             Allow agent to fetch full documents when   │
+│                                             snippets are insufficient.                 │
+│                                             [default: rag-tools]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
+│                                llama-server: http://localhost:8080/v1).                │
+│                                [env var: OPENAI_BASE_URL]                              │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
+│                                OPENAI_API_KEY environment variable.                    │
+│                                [env var: OPENAI_API_KEY]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
+│                                [default: text-embedding-3-small]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                                 │
+│                        [default: 0.0.0.0]                                              │
+│ --port        INTEGER  Port to bind to                                                 │
+│                        [default: 8000]                                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                           │
+│                           [default: INFO]                                              │
+│ --config            TEXT  Path to a TOML configuration file.                           │
+│ --print-args              Print the command line arguments, including variables taken  │
+│                           from the configuration file.                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1705,107 +1607,91 @@ The `memory proxy` command is the core feature—a middleware server that gives
 
  Start the memory-backed chat proxy server.
 
- This server acts as a middleware between your chat client (e.g., a web UI,
- CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
- Ollama, vLLM).
+ This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
+ plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
 
  Key Features:
 
-  • Simple Markdown Files: Memories are stored as human-readable Markdown
-    files, serving as the ultimate source of truth.
-  • Automatic Version Control: Built-in Git integration automatically commits
-    changes, providing a full history of memory evolution.
-  • Lightweight & Local: Minimal dependencies and runs entirely on your
-    machine.
-  • Proxy Middleware: Works transparently with any OpenAI-compatible
-    /chat/completions endpoint.
+  • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
+    as the ultimate source of truth.
+  • Automatic Version Control: Built-in Git integration automatically commits changes,
+    providing a full history of memory evolution.
+  • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
+  • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
+    endpoint.
 
  How it works:
 
   1 Intercepts POST /v1/chat/completions requests.
-  2 Retrieves relevant memories (facts, previous conversations) from a local
-    vector database (ChromaDB) based on the user's query.
+  2 Retrieves relevant memories (facts, previous conversations) from a local vector
+    database (ChromaDB) based on the user's query.
   3 Injects these memories into the system prompt.
   4 Forwards the augmented request to the real LLM (--openai-base-url).
-  5 Extracts new facts from the conversation in the background and updates the
-    long-term memory store (including handling contradictions).
-
- Use this to give "long-term memory" to any OpenAI-compatible application.
- Point your client's base URL to http://localhost:8100/v1.
-
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ───────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory      │
-│                                                      store (files + derived  │
-│                                                      vector index).          │
-│                                                      [default: ./memory_db]  │
-│ --default-top-k                             INTEGER  Number of memory        │
-│                                                      entries to retrieve per │
-│                                                      query.                  │
-│                                                      [default: 5]            │
-│ --max-entries                               INTEGER  Maximum stored memory   │
-│                                                      entries per             │
-│                                                      conversation (excluding │
-│                                                      summary).               │
-│                                                      [default: 500]          │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
-│                                                      higher favors           │
-│                                                      relevance, lower favors │
-│                                                      diversity.              │
-│                                                      [default: 0.7]          │
-│ --recency-weight                            FLOAT    Recency score weight    │
-│                                                      (0.0-1.0). Controls     │
-│                                                      freshness vs.           │
-│                                                      relevance. Default 0.2  │
-│                                                      (20% recency, 80%       │
-│                                                      semantic relevance).    │
-│                                                      [default: 0.2]          │
-│ --score-threshold                           FLOAT    Minimum semantic        │
-│                                                      relevance threshold     │
-│                                                      (0.0-1.0). Memories     │
-│                                                      below this score are    │
-│                                                      discarded to reduce     │
-│                                                      noise.                  │
-│                                                      [default: 0.35]         │
-│ --summarization      --no-summarization              Enable automatic fact   │
-│                                                      extraction and          │
-│                                                      summaries.              │
-│                                                      [default:               │
-│                                                      summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git    │
-│                                                      commit of memory        │
-│                                                      changes.                │
-│                                                      [default:               │
-│                                                      git-versioning]         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
-│                                (e.g., for llama-server:                      │
-│                                http://localhost:8080/v1).                    │
-│                                [env var: OPENAI_BASE_URL]                    │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
-│                                OPENAI_API_KEY environment variable.          │
-│                                [env var: OPENAI_API_KEY]                     │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
-│                                [default: text-embedding-3-small]             │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ───────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                       │
-│                        [default: 0.0.0.0]                                    │
-│ --port        INTEGER  Port to bind to                                       │
-│                        [default: 8100]                                       │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                 │
-│                           [default: INFO]                                    │
-│ --config            TEXT  Path to a TOML configuration file.                 │
-│ --print-args              Print the command line arguments, including        │
-│                           variables taken from the configuration file.       │
-╰──────────────────────────────────────────────────────────────────────────────╯
+  5 Extracts new facts from the conversation in the background and updates the long-term
+    memory store (including handling contradictions).
+
+ Use this to give "long-term memory" to any OpenAI-compatible application. Point your
+ client's base URL to http://localhost:8100/v1.
+
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                          │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
+│ --memory-path                               PATH     Path to the memory store (files + │
+│                                                      derived vector index).            │
+│                                                      [default: ./memory_db]            │
+│ --default-top-k                             INTEGER  Number of memory entries to       │
+│                                                      retrieve per query.               │
+│                                                      [default: 5]                      │
+│ --max-entries                               INTEGER  Maximum stored memory entries per │
+│                                                      conversation (excluding summary). │
+│                                                      [default: 500]                    │
+│ --mmr-lambda                                FLOAT    MMR lambda (0-1): higher favors   │
+│                                                      relevance, lower favors           │
+│                                                      diversity.                        │
+│                                                      [default: 0.7]                    │
+│ --recency-weight                            FLOAT    Recency score weight (0.0-1.0).   │
+│                                                      Controls freshness vs. relevance. │
+│                                                      Default 0.2 (20% recency, 80%     │
+│                                                      semantic relevance).              │
+│                                                      [default: 0.2]                    │
+│ --score-threshold                           FLOAT    Minimum semantic relevance        │
+│                                                      threshold (0.0-1.0). Memories     │
+│                                                      below this score are discarded to │
+│                                                      reduce noise.                     │
+│                                                      [default: 0.35]                   │
+│ --summarization      --no-summarization              Enable automatic fact extraction  │
+│                                                      and summaries.                    │
+│                                                      [default: summarization]          │
+│ --git-versioning     --no-git-versioning             Enable automatic git commit of    │
+│                                                      memory changes.                   │
+│                                                      [default: git-versioning]         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
+│                                llama-server: http://localhost:8080/v1).                │
+│                                [env var: OPENAI_BASE_URL]                              │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
+│                                OPENAI_API_KEY environment variable.                    │
+│                                [env var: OPENAI_API_KEY]                               │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
+│                                [default: text-embedding-3-small]                       │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                                 │
+│                        [default: 0.0.0.0]                                              │
+│ --port        INTEGER  Port to bind to                                                 │
+│                        [default: 8100]                                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                           │
+│                           [default: INFO]                                              │
+│ --config            TEXT  Path to a TOML configuration file.                           │
+│ --print-args              Print the command line arguments, including variables taken  │
+│                           from the configuration file.                                 │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1856,11 +1742,11 @@ agent-cli memory add -c work "Project deadline is Friday"
 
  Add memories directly without LLM extraction.
 
- This writes facts directly to the memory store, bypassing the LLM-based fact
- extraction. Useful for bulk imports or seeding memories.
+ This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
+ Useful for bulk imports or seeding memories.
 
- The memory proxy file watcher (if running) will auto-index new files.
- Otherwise, they'll be indexed on next memory proxy startup.
+ The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
+ be indexed on next memory proxy startup.
 
  Examples::
 
@@ -1881,35 +1767,29 @@ agent-cli memory add -c work "Project deadline is Friday"
   agent-cli memory add -c work "Project deadline is Friday"
 
 
-╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   memories      [MEMORIES]...  Memories to add. Each argument becomes one    │
-│                                fact.                                         │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. │
-│                                                     Use '-' for stdin.       │
-│                                                     Supports JSON array,     │
-│                                                     JSON object with         │
-│                                                     'memories' key, or plain │
-│                                                     text (one per line).     │
-│ --conversation-id  -c                         TEXT  Conversation ID to add   │
-│                                                     memories to.             │
-│                                                     [default: default]       │
-│ --memory-path                                 PATH  Path to the memory       │
-│                                                     store.                   │
-│                                                     [default: ./memory_db]   │
-│ --git-versioning       --no-git-versioning          Commit changes to git.   │
-│                                                     [default:                │
-│                                                     git-versioning]          │
-│ --help             -h                               Show this message and    │
-│                                                     exit.                    │
-╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ────────────────────────────────────────────────────────────╮
-│ --quiet       -q            Suppress console output from rich.               │
-│ --config              TEXT  Path to a TOML configuration file.               │
-│ --print-args                Print the command line arguments, including      │
-│                             variables taken from the configuration file.     │
-╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
+│   memories      [MEMORIES]...  Memories to add. Each argument becomes one fact.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
+│ --file             -f                         PATH  Read memories from file. Use '-'   │
+│                                                     for stdin. Supports JSON array,    │
+│                                                     JSON object with 'memories' key,   │
+│                                                     or plain text (one per line).      │
+│ --conversation-id  -c                         TEXT  Conversation ID to add memories    │
+│                                                     to.                                │
+│                                                     [default: default]                 │
+│ --memory-path                                 PATH  Path to the memory store.          │
+│                                                     [default: ./memory_db]             │
+│ --git-versioning       --no-git-versioning          Commit changes to git.             │
+│                                                     [default: git-versioning]          │
+│ --help             -h                               Show this message and exit.        │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ──────────────────────────────────────────────────────────────────────╮
+│ --quiet       -q            Suppress console output from rich.                         │
+│ --config              TEXT  Path to a TOML configuration file.                         │
+│ --print-args                Print the command line arguments, including variables      │
+│                             taken from the configuration file.                         │
+╰────────────────────────────────────────────────────────────────────────────────────────╯
 
 ```
 

From 29cab5a260881dc3cefe0dd9dff76f4ab2ef4917 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 4 Jan 2026 09:14:22 +0000
Subject: [PATCH 13/20] Update auto-generated docs

---
 README.md | 1836 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 978 insertions(+), 858 deletions(-)

diff --git a/README.md b/README.md
index b5a1d56a3..2d4a228e5 100644
--- a/README.md
+++ b/README.md
@@ -391,14 +391,14 @@ agent-cli config edit
 
  Manage agent-cli configuration files.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
-│ init   Create a new config file with all options commented out.                        │
-│ edit   Open the config file in your default editor.                                    │
-│ show   Display the config file location and contents.                                  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Commands ───────────────────────────────────────────────────────────────────╮
+│ init   Create a new config file with all options commented out.              │
+│ edit   Open the config file in your default editor.                          │
+│ show   Display the config file location and contents.                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -457,49 +457,53 @@ the `[defaults]` section of your configuration file.
 
  Correct text from clipboard using a local or remote LLM.
 
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  The text to correct. If not provided, reads from clipboard.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  The text to correct. If not provided, reads from         │
+│                     clipboard.                                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -544,102 +548,120 @@ the `[defaults]` section of your configuration file.
 
  Wyoming ASR Client for streaming microphone audio to a transcription server.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --extra-instructions                TEXT  Additional instructions for the LLM to       │
-│                                           process the transcription.                   │
-│ --llm                   --no-llm          Use an LLM to process the transcript.        │
-│                                           [default: no-llm]                            │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
-│ --from-file                                PATH     Transcribe audio from a saved WAV  │
-│                                                     file instead of recording.         │
-│ --last-recording                           INTEGER  Transcribe a saved recording. Use  │
-│                                                     1 for most recent, 2 for           │
-│                                                     second-to-last, etc. Use 0 to      │
-│                                                     disable (default).                 │
-│                                                     [default: 0]                       │
-│ --save-recording    --no-save-recording             Save the audio recording to disk   │
-│                                                     for recovery.                      │
-│                                                     [default: save-recording]          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --clipboard              --no-clipboard          Copy result to clipboard.             │
-│                                                  [default: clipboard]                  │
-│ --log-level                                TEXT  Set logging level.                    │
-│                                                  [default: WARNING]                    │
-│ --log-file                                 TEXT  Path to a file to write logs to.      │
-│ --quiet              -q                          Suppress console output from rich.    │
-│ --config                                   TEXT  Path to a TOML configuration file.    │
-│ --print-args                                     Print the command line arguments,     │
-│                                                  including variables taken from the    │
-│                                                  configuration file.                   │
-│ --transcription-log                        PATH  Path to log transcription results     │
-│                                                  with timestamps, hostname, model, and │
-│                                                  raw output.                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --extra-instructions                TEXT  Additional instructions for the    │
+│                                           LLM to process the transcription.  │
+│ --llm                   --no-llm          Use an LLM to process the          │
+│                                           transcript.                        │
+│                                           [default: no-llm]                  │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Recovery ─────────────────────────────────────────────────────────────╮
+│ --from-file                                PATH     Transcribe audio from a  │
+│                                                     saved WAV file instead   │
+│                                                     of recording.            │
+│ --last-recording                           INTEGER  Transcribe a saved       │
+│                                                     recording. Use 1 for     │
+│                                                     most recent, 2 for       │
+│                                                     second-to-last, etc. Use │
+│                                                     0 to disable (default).  │
+│                                                     [default: 0]             │
+│ --save-recording    --no-save-recording             Save the audio recording │
+│                                                     to disk for recovery.    │
+│                                                     [default:                │
+│                                                     save-recording]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --clipboard              --no-clipboard          Copy result to clipboard.   │
+│                                                  [default: clipboard]        │
+│ --log-level                                TEXT  Set logging level.          │
+│                                                  [default: WARNING]          │
+│ --log-file                                 TEXT  Path to a file to write     │
+│                                                  logs to.                    │
+│ --quiet              -q                          Suppress console output     │
+│                                                  from rich.                  │
+│ --config                                   TEXT  Path to a TOML              │
+│                                                  configuration file.         │
+│ --print-args                                     Print the command line      │
+│                                                  arguments, including        │
+│                                                  variables taken from the    │
+│                                                  configuration file.         │
+│ --transcription-log                        PATH  Path to log transcription   │
+│                                                  results with timestamps,    │
+│                                                  hostname, model, and raw    │
+│                                                  output.                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -696,8 +718,9 @@ uv tool install "agent-cli[vad]"
 
  Run a continuous transcription daemon with voice activity detection.
 
- This command runs indefinitely, capturing audio from your microphone, detecting speech
- segments using Silero VAD, transcribing them, and logging results with timestamps.
+ This command runs indefinitely, capturing audio from your microphone,
+ detecting speech segments using Silero VAD, transcribing them, and logging
+ results with timestamps.
 
  Examples: # Basic daemon agent-cli transcribe-daemon
 
@@ -709,105 +732,121 @@ uv tool install "agent-cli[vad]"
   agent-cli transcribe-daemon --llm --role notes
 
   # Custom log file and audio directory
-  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
-
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --role               -r                     TEXT   Role name for logging (e.g.,        │
-│                                                    'meeting', 'notes', 'user').        │
-│                                                    [default: user]                     │
-│ --silence-threshold  -s                     FLOAT  Seconds of silence to end a speech  │
-│                                                    segment.                            │
-│                                                    [default: 1.0]                      │
-│ --min-segment        -m                     FLOAT  Minimum speech duration in seconds  │
-│                                                    to trigger a segment.               │
-│                                                    [default: 0.25]                     │
-│ --vad-threshold                             FLOAT  VAD speech detection threshold      │
-│                                                    (0.0-1.0). Higher = more aggressive │
-│                                                    filtering.                          │
-│                                                    [default: 0.3]                      │
-│ --save-audio             --no-save-audio           Save audio segments as MP3 files.   │
-│                                                    [default: save-audio]               │
-│ --audio-dir                                 PATH   Directory for MP3 files. Default:   │
-│                                                    ~/.config/agent-cli/audio           │
-│ --transcription-log  -t                     PATH   JSON Lines log file path. Default:  │
-│                                                    ~/.config/agent-cli/transcriptions… │
-│ --clipboard              --no-clipboard            Copy each transcription to          │
-│                                                    clipboard.                          │
-│                                                    [default: no-clipboard]             │
-│ --help               -h                            Show this message and exit.         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --llm    --no-llm      Use an LLM to process the transcript.                           │
-│                        [default: no-llm]                                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+  agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir
+  ~/audio
+
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --role               -r                     TEXT   Role name for logging     │
+│                                                    (e.g., 'meeting',         │
+│                                                    'notes', 'user').         │
+│                                                    [default: user]           │
+│ --silence-threshold  -s                     FLOAT  Seconds of silence to end │
+│                                                    a speech segment.         │
+│                                                    [default: 1.0]            │
+│ --min-segment        -m                     FLOAT  Minimum speech duration   │
+│                                                    in seconds to trigger a   │
+│                                                    segment.                  │
+│                                                    [default: 0.25]           │
+│ --vad-threshold                             FLOAT  VAD speech detection      │
+│                                                    threshold (0.0-1.0).      │
+│                                                    Higher = more aggressive  │
+│                                                    filtering.                │
+│                                                    [default: 0.3]            │
+│ --save-audio             --no-save-audio           Save audio segments as    │
+│                                                    MP3 files.                │
+│                                                    [default: save-audio]     │
+│ --audio-dir                                 PATH   Directory for MP3 files.  │
+│                                                    Default:                  │
+│                                                    ~/.config/agent-cli/audio │
+│ --transcription-log  -t                     PATH   JSON Lines log file path. │
+│                                                    Default:                  │
+│                                                    ~/.config/agent-cli/tran… │
+│ --clipboard              --no-clipboard            Copy each transcription   │
+│                                                    to clipboard.             │
+│                                                    [default: no-clipboard]   │
+│ --help               -h                            Show this message and     │
+│                                                    exit.                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --llm    --no-llm      Use an LLM to process the transcript.                 │
+│                        [default: no-llm]                                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -851,82 +890,89 @@ uv tool install "agent-cli[vad]"
 
  Convert text to speech using Wyoming or OpenAI-compatible TTS server.
 
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --output-device-index        INTEGER  Index of the audio output device to use for TTS. │
-│ --output-device-name         TEXT     Output device name keywords for partial          │
-│                                       matching.                                        │
-│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, 2.0 =     │
-│                                       twice as fast, 0.5 = half speed).                │
-│                                       [default: 1.0]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the            │
-│                               GEMINI_API_KEY environment variable.                     │
-│                               [env var: GEMINI_API_KEY]                                │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --list-devices          List available audio input and output devices and exit.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.                       │
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│   text      [TEXT]  Text to speak. Reads from clipboard if not provided.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --output-device-index        INTEGER  Index of the audio output device to    │
+│                                       use for TTS.                           │
+│ --output-device-name         TEXT     Output device name keywords for        │
+│                                       partial matching.                      │
+│ --tts-speed                  FLOAT    Speech speed multiplier (1.0 = normal, │
+│                                       2.0 = twice as fast, 0.5 = half        │
+│                                       speed).                                │
+│                                       [default: 1.0]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --gemini-api-key        TEXT  Your Gemini API key. Can also be set with the  │
+│                               GEMINI_API_KEY environment variable.           │
+│                               [env var: GEMINI_API_KEY]                      │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --list-devices          List available audio input and output devices and    │
+│                         exit.                                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.             │
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -967,7 +1013,8 @@ uv tool install "agent-cli[vad]"
 
  Usage: agent-cli voice-edit [OPTIONS]
 
- Interact with clipboard text via a voice command using local or remote services.
+ Interact with clipboard text via a voice command using local or remote
+ services.
 
  Usage:
 
@@ -978,124 +1025,139 @@ uv tool install "agent-cli[vad]"
   • List output devices: agent-cli voice-edit --list-output-devices
   • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
-│                                 [default: whisper-1]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV file.         │
-│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
-│                                           [default: clipboard]                         │
-│ --log-level                         TEXT  Set logging level.                           │
-│                                           [default: WARNING]                           │
-│ --log-file                          TEXT  Path to a file to write logs to.             │
-│ --quiet       -q                          Suppress console output from rich.           │
-│ --config                            TEXT  Path to a TOML configuration file.           │
-│ --print-args                              Print the command line arguments, including  │
-│                                           variables taken from the configuration file. │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: whisper-1]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV     │
+│                                           file.                              │
+│ --clipboard       --no-clipboard          Copy result to clipboard.          │
+│                                           [default: clipboard]               │
+│ --log-level                         TEXT  Set logging level.                 │
+│                                           [default: WARNING]                 │
+│ --log-file                          TEXT  Path to a file to write logs to.   │
+│ --quiet       -q                          Suppress console output from rich. │
+│ --config                            TEXT  Path to a TOML configuration file. │
+│ --print-args                              Print the command line arguments,  │
+│                                           including variables taken from the │
+│                                           configuration file.                │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1141,133 +1203,148 @@ uv tool install "agent-cli[vad]"
 
  Wake word-based voice assistant using local or remote services.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
-│ --wake-server-ip          TEXT     Wyoming wake word server IP address.                │
-│                                    [default: localhost]                                │
-│ --wake-server-port        INTEGER  Wyoming wake word server port.                      │
-│                                    [default: 10400]                                    │
-│ --wake-word               TEXT     Name of wake word to detect (e.g., 'ok_nabu',       │
-│                                    'hey_jarvis').                                      │
-│                                    [default: ok_nabu]                                  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model        TEXT  The OpenAI model to use for ASR (transcription).       │
-│                                 [default: whisper-1]                                   │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file                         PATH  Save TTS response audio to WAV file.         │
-│ --clipboard       --no-clipboard          Copy result to clipboard.                    │
-│                                           [default: clipboard]                         │
-│ --log-level                         TEXT  Set logging level.                           │
-│                                           [default: WARNING]                           │
-│ --log-file                          TEXT  Path to a file to write logs to.             │
-│ --quiet       -q                          Suppress console output from rich.           │
-│ --config                            TEXT  Path to a TOML configuration file.           │
-│ --print-args                              Print the command line arguments, including  │
-│                                           variables taken from the configuration file. │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Wake Word ──────────────────────────────────────────────────────────────────╮
+│ --wake-server-ip          TEXT     Wyoming wake word server IP address.      │
+│                                    [default: localhost]                      │
+│ --wake-server-port        INTEGER  Wyoming wake word server port.            │
+│                                    [default: 10400]                          │
+│ --wake-word               TEXT     Name of wake word to detect (e.g.,        │
+│                                    'ok_nabu', 'hey_jarvis').                 │
+│                                    [default: ok_nabu]                        │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model        TEXT  The OpenAI model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: whisper-1]                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file                         PATH  Save TTS response audio to WAV     │
+│                                           file.                              │
+│ --clipboard       --no-clipboard          Copy result to clipboard.          │
+│                                           [default: clipboard]               │
+│ --log-level                         TEXT  Set logging level.                 │
+│                                           [default: WARNING]                 │
+│ --log-file                          TEXT  Path to a file to write logs to.   │
+│ --quiet       -q                          Suppress console output from rich. │
+│ --config                            TEXT  Path to a TOML configuration file. │
+│ --print-args                              Print the command line arguments,  │
+│                                           including variables taken from the │
+│                                           configuration file.                │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1320,148 +1397,166 @@ uv tool install "agent-cli[vad]"
 
  An chat agent that you can talk to.
 
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
-│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai', 'gemini').   │
-│                             [default: wyoming]                                         │
-│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai', 'gemini').    │
-│                             [default: ollama]                                          │
-│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai', 'kokoro',    │
-│                             'gemini').                                                 │
-│                             [default: wyoming]                                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
-│ --input-device-index        INTEGER  Index of the audio input device to use.           │
-│ --input-device-name         TEXT     Device name keywords for partial matching.        │
-│ --list-devices                       List available audio input and output devices and │
-│                                      exit.                                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
-│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.                      │
-│                                    [default: localhost]                                │
-│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                            │
-│                                    [default: 10300]                                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
-│ --asr-openai-model           TEXT  The OpenAI model to use for ASR (transcription).    │
-│                                    [default: whisper-1]                                │
-│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR API       │
-│                                    (e.g., for custom Whisper server:                   │
-│                                    http://localhost:9898).                             │
-│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription (optional).    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
-│ --asr-gemini-model        TEXT  The Gemini model to use for ASR (transcription).       │
-│                                 [default: gemini-3-flash-preview]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-ollama-model        TEXT  The Ollama model to use. Default is gemma3:4b.         │
-│                                 [default: gemma3:4b]                                   │
-│ --llm-ollama-host         TEXT  The Ollama server host. Default is                     │
-│                                 http://localhost:11434.                                │
-│                                 [default: http://localhost:11434]                      │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.                 │
-│                                 [default: gpt-5-mini]                                  │
-│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with the          │
-│                                 OPENAI_API_KEY environment variable.                   │
-│                                 [env var: OPENAI_API_KEY]                              │
-│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API (e.g., for   │
-│                                 llama-server: http://localhost:8080/v1).               │
-│                                 [env var: OPENAI_BASE_URL]                             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
-│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.                 │
-│                                 [default: gemini-3-flash-preview]                      │
-│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with the          │
-│                                 GEMINI_API_KEY environment variable.                   │
-│                                 [env var: GEMINI_API_KEY]                              │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
-│ --tts                    --no-tts             Enable text-to-speech for responses.     │
-│                                               [default: no-tts]                        │
-│ --output-device-index                INTEGER  Index of the audio output device to use  │
-│                                               for TTS.                                 │
-│ --output-device-name                 TEXT     Output device name keywords for partial  │
-│                                               matching.                                │
-│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = normal,   │
-│                                               2.0 = twice as fast, 0.5 = half speed).  │
-│                                               [default: 1.0]                           │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
-│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.                  │
-│                                        [default: localhost]                            │
-│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.                        │
-│                                        [default: 10200]                                │
-│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS (e.g.,        │
-│                                        'en_US-lessac-medium').                         │
-│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g., 'en_US').       │
-│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.             │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
-│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.                    │
-│                                    [default: tts-1]                                    │
-│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible TTS.         │
-│                                    [default: alloy]                                    │
-│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS API       │
-│                                    (e.g., http://localhost:8000/v1 for a proxy).       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
-│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.                       │
-│                                 [default: kokoro]                                      │
-│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.                       │
-│                                 [default: af_sky]                                      │
-│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.                       │
-│                                 [default: http://localhost:8880/v1]                    │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
-│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.                       │
-│                                 [default: gemini-2.5-flash-preview-tts]                │
-│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
-│                                 'Charon', 'Fenrir').                                   │
-│                                 [default: Kore]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Process Management ───────────────────────────────────────────────────────────────────╮
-│ --stop            Stop any running background process.                                 │
-│ --status          Check if a background process is running.                            │
-│ --toggle          Toggle the background process on/off. If the process is running, it  │
-│                   will be stopped. If the process is not running, it will be started.  │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ History Options ──────────────────────────────────────────────────────────────────────╮
-│ --history-dir            PATH     Directory to store conversation history.             │
-│                                   [default: ~/.config/agent-cli/history]               │
-│ --last-n-messages        INTEGER  Number of messages to include in the conversation    │
-│                                   history. Set to 0 to disable history.                │
-│                                   [default: 50]                                        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Options ───────────────────────────────────────────────────────────────────────╮
-│ --memory-mode                   TEXT     Memory mode: 'off' (disabled), 'tools' (LLM   │
-│                                          decides via tools), 'auto' (automatic         │
-│                                          extraction).                                  │
-│                                          [default: tools]                              │
-│ --memory-path                   PATH     Path for memory database storage. Default:    │
-│                                          ~/.config/agent-cli/memory/vector_db          │
-│ --memory-embedding-model        TEXT     Embedding model for semantic memory search.   │
-│                                          [default: text-embedding-3-small]             │
-│ --memory-top-k                  INTEGER  Number of memories to retrieve per search.    │
-│                                          [default: 5]                                  │
-│ --memory-score-threshold        FLOAT    Minimum relevance score threshold for memory  │
-│                                          retrieval (0.0-1.0).                          │
-│                                          [default: 0.35]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --save-file           PATH  Save TTS response audio to WAV file.                       │
-│ --log-level           TEXT  Set logging level.                                         │
-│                             [default: WARNING]                                         │
-│ --log-file            TEXT  Path to a file to write logs to.                           │
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Provider Selection ─────────────────────────────────────────────────────────╮
+│ --asr-provider        TEXT  The ASR provider to use ('wyoming', 'openai',    │
+│                             'gemini').                                       │
+│                             [default: wyoming]                               │
+│ --llm-provider        TEXT  The LLM provider to use ('ollama', 'openai',     │
+│                             'gemini').                                       │
+│                             [default: ollama]                                │
+│ --tts-provider        TEXT  The TTS provider to use ('wyoming', 'openai',    │
+│                             'kokoro', 'gemini').                             │
+│                             [default: wyoming]                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input ────────────────────────────────────────────────────────────────╮
+│ --input-device-index        INTEGER  Index of the audio input device to use. │
+│ --input-device-name         TEXT     Device name keywords for partial        │
+│                                      matching.                               │
+│ --list-devices                       List available audio input and output   │
+│                                      devices and exit.                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Wyoming ───────────────────────────────────────────────────────╮
+│ --asr-wyoming-ip          TEXT     Wyoming ASR server IP address.            │
+│                                    [default: localhost]                      │
+│ --asr-wyoming-port        INTEGER  Wyoming ASR server port.                  │
+│                                    [default: 10300]                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: OpenAI-compatible ─────────────────────────────────────────────╮
+│ --asr-openai-model           TEXT  The OpenAI model to use for ASR           │
+│                                    (transcription).                          │
+│                                    [default: whisper-1]                      │
+│ --asr-openai-base-url        TEXT  Custom base URL for OpenAI-compatible ASR │
+│                                    API (e.g., for custom Whisper server:     │
+│                                    http://localhost:9898).                   │
+│ --asr-openai-prompt          TEXT  Custom prompt to guide transcription      │
+│                                    (optional).                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Input: Gemini ────────────────────────────────────────────────────────╮
+│ --asr-gemini-model        TEXT  The Gemini model to use for ASR              │
+│                                 (transcription).                             │
+│                                 [default: gemini-3-flash-preview]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Ollama ────────────────────────────────────────────────────────────────╮
+│ --llm-ollama-model        TEXT  The Ollama model to use. Default is          │
+│                                 gemma3:4b.                                   │
+│                                 [default: gemma3:4b]                         │
+│ --llm-ollama-host         TEXT  The Ollama server host. Default is           │
+│                                 http://localhost:11434.                      │
+│                                 [default: http://localhost:11434]            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --llm-openai-model        TEXT  The OpenAI model to use for LLM tasks.       │
+│                                 [default: gpt-5-mini]                        │
+│ --openai-api-key          TEXT  Your OpenAI API key. Can also be set with    │
+│                                 the OPENAI_API_KEY environment variable.     │
+│                                 [env var: OPENAI_API_KEY]                    │
+│ --openai-base-url         TEXT  Custom base URL for OpenAI-compatible API    │
+│                                 (e.g., for llama-server:                     │
+│                                 http://localhost:8080/v1).                   │
+│                                 [env var: OPENAI_BASE_URL]                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: Gemini ────────────────────────────────────────────────────────────────╮
+│ --llm-gemini-model        TEXT  The Gemini model to use for LLM tasks.       │
+│                                 [default: gemini-3-flash-preview]            │
+│ --gemini-api-key          TEXT  Your Gemini API key. Can also be set with    │
+│                                 the GEMINI_API_KEY environment variable.     │
+│                                 [env var: GEMINI_API_KEY]                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output ───────────────────────────────────────────────────────────────╮
+│ --tts                    --no-tts             Enable text-to-speech for      │
+│                                               responses.                     │
+│                                               [default: no-tts]              │
+│ --output-device-index                INTEGER  Index of the audio output      │
+│                                               device to use for TTS.         │
+│ --output-device-name                 TEXT     Output device name keywords    │
+│                                               for partial matching.          │
+│ --tts-speed                          FLOAT    Speech speed multiplier (1.0 = │
+│                                               normal, 2.0 = twice as fast,   │
+│                                               0.5 = half speed).             │
+│                                               [default: 1.0]                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Wyoming ──────────────────────────────────────────────────────╮
+│ --tts-wyoming-ip              TEXT     Wyoming TTS server IP address.        │
+│                                        [default: localhost]                  │
+│ --tts-wyoming-port            INTEGER  Wyoming TTS server port.              │
+│                                        [default: 10200]                      │
+│ --tts-wyoming-voice           TEXT     Voice name to use for Wyoming TTS     │
+│                                        (e.g., 'en_US-lessac-medium').        │
+│ --tts-wyoming-language        TEXT     Language for Wyoming TTS (e.g.,       │
+│                                        'en_US').                             │
+│ --tts-wyoming-speaker         TEXT     Speaker name for Wyoming TTS voice.   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: OpenAI-compatible ────────────────────────────────────────────╮
+│ --tts-openai-model           TEXT  The OpenAI model to use for TTS.          │
+│                                    [default: tts-1]                          │
+│ --tts-openai-voice           TEXT  The voice to use for OpenAI-compatible    │
+│                                    TTS.                                      │
+│                                    [default: alloy]                          │
+│ --tts-openai-base-url        TEXT  Custom base URL for OpenAI-compatible TTS │
+│                                    API (e.g., http://localhost:8000/v1 for a │
+│                                    proxy).                                   │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Kokoro ───────────────────────────────────────────────────────╮
+│ --tts-kokoro-model        TEXT  The Kokoro model to use for TTS.             │
+│                                 [default: kokoro]                            │
+│ --tts-kokoro-voice        TEXT  The voice to use for Kokoro TTS.             │
+│                                 [default: af_sky]                            │
+│ --tts-kokoro-host         TEXT  The base URL for the Kokoro API.             │
+│                                 [default: http://localhost:8880/v1]          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Audio Output: Gemini ───────────────────────────────────────────────────────╮
+│ --tts-gemini-model        TEXT  The Gemini model to use for TTS.             │
+│                                 [default: gemini-2.5-flash-preview-tts]      │
+│ --tts-gemini-voice        TEXT  The voice to use for Gemini TTS (e.g.,       │
+│                                 'Kore', 'Puck', 'Charon', 'Fenrir').         │
+│                                 [default: Kore]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Process Management ─────────────────────────────────────────────────────────╮
+│ --stop            Stop any running background process.                       │
+│ --status          Check if a background process is running.                  │
+│ --toggle          Toggle the background process on/off. If the process is    │
+│                   running, it will be stopped. If the process is not         │
+│                   running, it will be started.                               │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ History Options ────────────────────────────────────────────────────────────╮
+│ --history-dir            PATH     Directory to store conversation history.   │
+│                                   [default: ~/.config/agent-cli/history]     │
+│ --last-n-messages        INTEGER  Number of messages to include in the       │
+│                                   conversation history. Set to 0 to disable  │
+│                                   history.                                   │
+│                                   [default: 50]                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-mode                   TEXT     Memory mode: 'off' (disabled),      │
+│                                          'tools' (LLM decides via tools),    │
+│                                          'auto' (automatic extraction).      │
+│                                          [default: tools]                    │
+│ --memory-path                   PATH     Path for memory database storage.   │
+│                                          Default:                            │
+│                                          ~/.config/agent-cli/memory/vector_… │
+│ --memory-embedding-model        TEXT     Embedding model for semantic memory │
+│                                          search.                             │
+│                                          [default: text-embedding-3-small]   │
+│ --memory-top-k                  INTEGER  Number of memories to retrieve per  │
+│                                          search.                             │
+│                                          [default: 5]                        │
+│ --memory-score-threshold        FLOAT    Minimum relevance score threshold   │
+│                                          for memory retrieval (0.0-1.0).     │
+│                                          [default: 0.35]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --save-file           PATH  Save TTS response audio to WAV file.             │
+│ --log-level           TEXT  Set logging level.                               │
+│                             [default: WARNING]                               │
+│ --log-file            TEXT  Path to a file to write logs to.                 │
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1509,49 +1604,52 @@ uv tool install "agent-cli[vad]"
  Start the RAG (Retrieval-Augmented Generation) Proxy Server.
 
  This server watches a folder for documents, indexes them, and provides an
- OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
- relevant context from the documents.
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
-│ --docs-folder                      PATH     Folder to watch for documents              │
-│                                             [default: ./rag_docs]                      │
-│ --chroma-path                      PATH     Path to ChromaDB persistence directory     │
-│                                             [default: ./rag_db]                        │
-│ --limit                            INTEGER  Number of document chunks to retrieve per  │
-│                                             query.                                     │
-│                                             [default: 3]                               │
-│ --rag-tools      --no-rag-tools             Allow agent to fetch full documents when   │
-│                                             snippets are insufficient.                 │
-│                                             [default: rag-tools]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
-│                                llama-server: http://localhost:8080/v1).                │
-│                                [env var: OPENAI_BASE_URL]                              │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
-│                                OPENAI_API_KEY environment variable.                    │
-│                                [env var: OPENAI_API_KEY]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
-│                                [default: text-embedding-3-small]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                                 │
-│                        [default: 0.0.0.0]                                              │
-│ --port        INTEGER  Port to bind to                                                 │
-│                        [default: 8000]                                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                           │
-│                           [default: INFO]                                              │
-│ --config            TEXT  Path to a TOML configuration file.                           │
-│ --print-args              Print the command line arguments, including variables taken  │
-│                           from the configuration file.                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
+ injecting relevant context from the documents.
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ RAG Configuration ──────────────────────────────────────────────────────────╮
+│ --docs-folder                      PATH     Folder to watch for documents    │
+│                                             [default: ./rag_docs]            │
+│ --chroma-path                      PATH     Path to ChromaDB persistence     │
+│                                             directory                        │
+│                                             [default: ./rag_db]              │
+│ --limit                            INTEGER  Number of document chunks to     │
+│                                             retrieve per query.              │
+│                                             [default: 3]                     │
+│ --rag-tools      --no-rag-tools             Allow agent to fetch full        │
+│                                             documents when snippets are      │
+│                                             insufficient.                    │
+│                                             [default: rag-tools]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
+│                                (e.g., for llama-server:                      │
+│                                http://localhost:8080/v1).                    │
+│                                [env var: OPENAI_BASE_URL]                    │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
+│                                OPENAI_API_KEY environment variable.          │
+│                                [env var: OPENAI_API_KEY]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ───────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                       │
+│                        [default: 0.0.0.0]                                    │
+│ --port        INTEGER  Port to bind to                                       │
+│                        [default: 8000]                                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                 │
+│                           [default: INFO]                                    │
+│ --config            TEXT  Path to a TOML configuration file.                 │
+│ --print-args              Print the command line arguments, including        │
+│                           variables taken from the configuration file.       │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1607,91 +1705,107 @@ The `memory proxy` command is the core feature—a middleware server that gives
 
  Start the memory-backed chat proxy server.
 
- This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
- plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
+ This server acts as a middleware between your chat client (e.g., a web UI,
+ CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
+ Ollama, vLLM).
 
  Key Features:
 
-  • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
-    as the ultimate source of truth.
-  • Automatic Version Control: Built-in Git integration automatically commits changes,
-    providing a full history of memory evolution.
-  • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
-  • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
-    endpoint.
+  • Simple Markdown Files: Memories are stored as human-readable Markdown
+    files, serving as the ultimate source of truth.
+  • Automatic Version Control: Built-in Git integration automatically commits
+    changes, providing a full history of memory evolution.
+  • Lightweight & Local: Minimal dependencies and runs entirely on your
+    machine.
+  • Proxy Middleware: Works transparently with any OpenAI-compatible
+    /chat/completions endpoint.
 
  How it works:
 
   1 Intercepts POST /v1/chat/completions requests.
-  2 Retrieves relevant memories (facts, previous conversations) from a local vector
-    database (ChromaDB) based on the user's query.
+  2 Retrieves relevant memories (facts, previous conversations) from a local
+    vector database (ChromaDB) based on the user's query.
   3 Injects these memories into the system prompt.
   4 Forwards the augmented request to the real LLM (--openai-base-url).
-  5 Extracts new facts from the conversation in the background and updates the long-term
-    memory store (including handling contradictions).
-
- Use this to give "long-term memory" to any OpenAI-compatible application. Point your
- client's base URL to http://localhost:8100/v1.
-
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --help  -h        Show this message and exit.                                          │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory store (files + │
-│                                                      derived vector index).            │
-│                                                      [default: ./memory_db]            │
-│ --default-top-k                             INTEGER  Number of memory entries to       │
-│                                                      retrieve per query.               │
-│                                                      [default: 5]                      │
-│ --max-entries                               INTEGER  Maximum stored memory entries per │
-│                                                      conversation (excluding summary). │
-│                                                      [default: 500]                    │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1): higher favors   │
-│                                                      relevance, lower favors           │
-│                                                      diversity.                        │
-│                                                      [default: 0.7]                    │
-│ --recency-weight                            FLOAT    Recency score weight (0.0-1.0).   │
-│                                                      Controls freshness vs. relevance. │
-│                                                      Default 0.2 (20% recency, 80%     │
-│                                                      semantic relevance).              │
-│                                                      [default: 0.2]                    │
-│ --score-threshold                           FLOAT    Minimum semantic relevance        │
-│                                                      threshold (0.0-1.0). Memories     │
-│                                                      below this score are discarded to │
-│                                                      reduce noise.                     │
-│                                                      [default: 0.35]                   │
-│ --summarization      --no-summarization              Enable automatic fact extraction  │
-│                                                      and summaries.                    │
-│                                                      [default: summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git commit of    │
-│                                                      memory changes.                   │
-│                                                      [default: git-versioning]         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
-│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API (e.g., for    │
-│                                llama-server: http://localhost:8080/v1).                │
-│                                [env var: OPENAI_BASE_URL]                              │
-│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the           │
-│                                OPENAI_API_KEY environment variable.                    │
-│                                [env var: OPENAI_API_KEY]                               │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
-│ --embedding-model        TEXT  Embedding model to use for vectorization.               │
-│                                [default: text-embedding-3-small]                       │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
-│ --host        TEXT     Host/IP to bind API servers to.                                 │
-│                        [default: 0.0.0.0]                                              │
-│ --port        INTEGER  Port to bind to                                                 │
-│                        [default: 8100]                                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --log-level         TEXT  Set logging level.                                           │
-│                           [default: INFO]                                              │
-│ --config            TEXT  Path to a TOML configuration file.                           │
-│ --print-args              Print the command line arguments, including variables taken  │
-│                           from the configuration file.                                 │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+  5 Extracts new facts from the conversation in the background and updates the
+    long-term memory store (including handling contradictions).
+
+ Use this to give "long-term memory" to any OpenAI-compatible application.
+ Point your client's base URL to http://localhost:8100/v1.
+
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --help  -h        Show this message and exit.                                │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Configuration ───────────────────────────────────────────────────────╮
+│ --memory-path                               PATH     Path to the memory      │
+│                                                      store (files + derived  │
+│                                                      vector index).          │
+│                                                      [default: ./memory_db]  │
+│ --default-top-k                             INTEGER  Number of memory        │
+│                                                      entries to retrieve per │
+│                                                      query.                  │
+│                                                      [default: 5]            │
+│ --max-entries                               INTEGER  Maximum stored memory   │
+│                                                      entries per             │
+│                                                      conversation (excluding │
+│                                                      summary).               │
+│                                                      [default: 500]          │
+│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
+│                                                      higher favors           │
+│                                                      relevance, lower favors │
+│                                                      diversity.              │
+│                                                      [default: 0.7]          │
+│ --recency-weight                            FLOAT    Recency score weight    │
+│                                                      (0.0-1.0). Controls     │
+│                                                      freshness vs.           │
+│                                                      relevance. Default 0.2  │
+│                                                      (20% recency, 80%       │
+│                                                      semantic relevance).    │
+│                                                      [default: 0.2]          │
+│ --score-threshold                           FLOAT    Minimum semantic        │
+│                                                      relevance threshold     │
+│                                                      (0.0-1.0). Memories     │
+│                                                      below this score are    │
+│                                                      discarded to reduce     │
+│                                                      noise.                  │
+│                                                      [default: 0.35]         │
+│ --summarization      --no-summarization              Enable automatic fact   │
+│                                                      extraction and          │
+│                                                      summaries.              │
+│                                                      [default:               │
+│                                                      summarization]          │
+│ --git-versioning     --no-git-versioning             Enable automatic git    │
+│                                                      commit of memory        │
+│                                                      changes.                │
+│                                                      [default:               │
+│                                                      git-versioning]         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
+│ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
+│                                (e.g., for llama-server:                      │
+│                                http://localhost:8080/v1).                    │
+│                                [env var: OPENAI_BASE_URL]                    │
+│ --openai-api-key         TEXT  Your OpenAI API key. Can also be set with the │
+│                                OPENAI_API_KEY environment variable.          │
+│                                [env var: OPENAI_API_KEY]                     │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Server Configuration ───────────────────────────────────────────────────────╮
+│ --host        TEXT     Host/IP to bind API servers to.                       │
+│                        [default: 0.0.0.0]                                    │
+│ --port        INTEGER  Port to bind to                                       │
+│                        [default: 8100]                                       │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --log-level         TEXT  Set logging level.                                 │
+│                           [default: INFO]                                    │
+│ --config            TEXT  Path to a TOML configuration file.                 │
+│ --print-args              Print the command line arguments, including        │
+│                           variables taken from the configuration file.       │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 
@@ -1742,11 +1856,11 @@ agent-cli memory add -c work "Project deadline is Friday"
 
  Add memories directly without LLM extraction.
 
- This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
- Useful for bulk imports or seeding memories.
+ This writes facts directly to the memory store, bypassing the LLM-based fact
+ extraction. Useful for bulk imports or seeding memories.
 
- The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
- be indexed on next memory proxy startup.
+ The memory proxy file watcher (if running) will auto-index new files.
+ Otherwise, they'll be indexed on next memory proxy startup.
 
  Examples::
 
@@ -1767,29 +1881,35 @@ agent-cli memory add -c work "Project deadline is Friday"
   agent-cli memory add -c work "Project deadline is Friday"
 
 
-╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
-│   memories      [MEMORIES]...  Memories to add. Each argument becomes one fact.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. Use '-'   │
-│                                                     for stdin. Supports JSON array,    │
-│                                                     JSON object with 'memories' key,   │
-│                                                     or plain text (one per line).      │
-│ --conversation-id  -c                         TEXT  Conversation ID to add memories    │
-│                                                     to.                                │
-│                                                     [default: default]                 │
-│ --memory-path                                 PATH  Path to the memory store.          │
-│                                                     [default: ./memory_db]             │
-│ --git-versioning       --no-git-versioning          Commit changes to git.             │
-│                                                     [default: git-versioning]          │
-│ --help             -h                               Show this message and exit.        │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
-╭─ General Options ──────────────────────────────────────────────────────────────────────╮
-│ --quiet       -q            Suppress console output from rich.                         │
-│ --config              TEXT  Path to a TOML configuration file.                         │
-│ --print-args                Print the command line arguments, including variables      │
-│                             taken from the configuration file.                         │
-╰────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│   memories      [MEMORIES]...  Memories to add. Each argument becomes one    │
+│                                fact.                                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --file             -f                         PATH  Read memories from file. │
+│                                                     Use '-' for stdin.       │
+│                                                     Supports JSON array,     │
+│                                                     JSON object with         │
+│                                                     'memories' key, or plain │
+│                                                     text (one per line).     │
+│ --conversation-id  -c                         TEXT  Conversation ID to add   │
+│                                                     memories to.             │
+│                                                     [default: default]       │
+│ --memory-path                                 PATH  Path to the memory       │
+│                                                     store.                   │
+│                                                     [default: ./memory_db]   │
+│ --git-versioning       --no-git-versioning          Commit changes to git.   │
+│                                                     [default:                │
+│                                                     git-versioning]          │
+│ --help             -h                               Show this message and    │
+│                                                     exit.                    │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ General Options ────────────────────────────────────────────────────────────╮
+│ --quiet       -q            Suppress console output from rich.               │
+│ --config              TEXT  Path to a TOML configuration file.               │
+│ --print-args                Print the command line arguments, including      │
+│                             variables taken from the configuration file.     │
+╰──────────────────────────────────────────────────────────────────────────────╯
 
 ```
 

From d6fc660753695368eae7615f446b5f1a7b9a2a0b Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 01:15:21 -0800
Subject: [PATCH 14/20] fix(chat): use configured LLM model for memory
 extraction

Pass the user's configured openai model to extract_from_turn() instead
of using the default gpt-5-mini, which may not be available on custom
OpenAI-compatible endpoints.
---
 agent_cli/agents/chat.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 2b1810b93..a955369d5 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -240,6 +240,7 @@ async def _maybe_extract_memories(
     instruction: str,
     response_text: str,
     conversation_id: str,
+    model: str,
     quiet: bool,
 ) -> None:
     """Extract memories in auto mode, silently skip otherwise."""
@@ -250,6 +251,7 @@ async def _maybe_extract_memories(
             user_message=instruction,
             assistant_message=response_text,
             conversation_id=conversation_id,
+            model=model,
         )
         if not quiet:
             console.print("[dim]💾 Memory extraction complete[/dim]")
@@ -392,6 +394,7 @@ async def _handle_conversation_turn(
         instruction,
         response_text,
         conversation_id,
+        openai_llm_cfg.llm_openai_model,
         general_cfg.quiet,
     )
 

From 6b4875873d7c93006fb7510bfb4cdaafdd36c822 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 07:33:30 -0800
Subject: [PATCH 15/20] fix: address review issues and add missing tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix opts.with_default type hint (str → Any) for bool support
- Fix FBT003 lint errors by using keyword arg default=True
- Fix tests using old --no-git-versioning option name
- Add comprehensive tests for MemoryTools class (30 tests)
- Document memory modes (off/tools/auto) in chat.md
---
 agent_cli/agents/chat.py         |  14 +-
 agent_cli/agents/memory/add.py   |  16 +-
 agent_cli/agents/memory/proxy.py |  54 +---
 agent_cli/opts.py                |  39 ++-
 docs/commands/chat.md            |  32 ++-
 tests/agents/test_memory_add.py  |  12 +-
 tests/test_memory_tools.py       | 418 +++++++++++++++++++++++++++++++
 7 files changed, 515 insertions(+), 70 deletions(-)
 create mode 100644 tests/test_memory_tools.py

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index a955369d5..39fbaf674 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -588,9 +588,14 @@ def chat(
     # --- Memory Options ---
     memory_mode: str = opts.MEMORY_MODE,
     memory_path: Path | None = opts.MEMORY_PATH,
-    memory_embedding_model: str = opts.MEMORY_EMBEDDING_MODEL,
+    embedding_model: str = opts.EMBEDDING_MODEL,
     memory_top_k: int = opts.MEMORY_TOP_K,
     memory_score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+    memory_max_entries: int = opts.MEMORY_MAX_ENTRIES,
+    memory_mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+    memory_recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+    memory_summarization: bool = opts.MEMORY_SUMMARIZATION,
+    memory_git_versioning: bool = opts.MEMORY_GIT_VERSIONING,
     # --- General Options ---
     save_file: Path | None = opts.SAVE_FILE,
     log_level: str = opts.LOG_LEVEL,
@@ -696,9 +701,14 @@ def chat(
         memory_cfg = config.Memory(
             mode=memory_mode,  # type: ignore[arg-type]
             memory_path=memory_path,
-            embedding_model=memory_embedding_model,
+            embedding_model=embedding_model,
             top_k=memory_top_k,
             score_threshold=memory_score_threshold,
+            max_entries=memory_max_entries,
+            mmr_lambda=memory_mmr_lambda,
+            recency_weight=memory_recency_weight,
+            enable_summarization=memory_summarization,
+            enable_git_versioning=memory_git_versioning,
         )
 
         asyncio.run(
diff --git a/agent_cli/agents/memory/add.py b/agent_cli/agents/memory/add.py
index 0675e9920..aea4b0301 100644
--- a/agent_cli/agents/memory/add.py
+++ b/agent_cli/agents/memory/add.py
@@ -6,7 +6,7 @@
 import re
 import sys
 from datetime import UTC, datetime
-from pathlib import Path  # noqa: TC003
+from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import typer
@@ -127,16 +127,8 @@ def add(
         "-c",
         help="Conversation ID to add memories to.",
     ),
-    memory_path: Path = typer.Option(  # noqa: B008
-        "./memory_db",
-        "--memory-path",
-        help="Path to the memory store.",
-    ),
-    git_versioning: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--git-versioning/--no-git-versioning",
-        help="Commit changes to git.",
-    ),
+    memory_path: Path | None = opts.MEMORY_PATH,
+    git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
     quiet: bool = opts.QUIET,
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
@@ -176,6 +168,8 @@ def add(
         console.print("[red]No memories provided. Use arguments or --file.[/red]")
         raise typer.Exit(1)
 
+    if memory_path is None:
+        memory_path = Path("./memory_db")
     memory_path = memory_path.resolve()
     records = _write_memories(memory_path, parsed, git_versioning)
 
diff --git a/agent_cli/agents/memory/proxy.py b/agent_cli/agents/memory/proxy.py
index 73906c62d..18796eedc 100644
--- a/agent_cli/agents/memory/proxy.py
+++ b/agent_cli/agents/memory/proxy.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import logging
-from pathlib import Path  # noqa: TC003
+from pathlib import Path
 
 import typer
 from rich.logging import RichHandler
@@ -15,57 +15,23 @@
 
 @memory_app.command("proxy")
 def proxy(
-    memory_path: Path = typer.Option(  # noqa: B008
-        "./memory_db",
-        help="Path to the memory store (files + derived vector index).",
-        rich_help_panel="Memory Configuration",
-    ),
+    memory_path: Path | None = opts.MEMORY_PATH,
     openai_base_url: str | None = opts.OPENAI_BASE_URL,
     embedding_model: str = opts.EMBEDDING_MODEL,
     openai_api_key: str | None = opts.OPENAI_API_KEY,
-    default_top_k: int = typer.Option(
-        5,
-        help="Number of memory entries to retrieve per query.",
-        rich_help_panel="Memory Configuration",
-    ),
+    default_top_k: int = opts.MEMORY_TOP_K,
     host: str = opts.SERVER_HOST,
     port: int = typer.Option(
         8100,
         help="Port to bind to",
         rich_help_panel="Server Configuration",
     ),
-    max_entries: int = typer.Option(
-        500,
-        help="Maximum stored memory entries per conversation (excluding summary).",
-        rich_help_panel="Memory Configuration",
-    ),
-    mmr_lambda: float = typer.Option(
-        0.7,
-        help="MMR lambda (0-1): higher favors relevance, lower favors diversity.",
-        rich_help_panel="Memory Configuration",
-    ),
-    recency_weight: float = typer.Option(
-        0.2,
-        help="Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance).",
-        rich_help_panel="Memory Configuration",
-    ),
-    score_threshold: float = typer.Option(
-        0.35,
-        help="Minimum semantic relevance threshold (0.0-1.0). Memories below this score are discarded to reduce noise.",
-        rich_help_panel="Memory Configuration",
-    ),
-    summarization: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--summarization/--no-summarization",
-        help="Enable automatic fact extraction and summaries.",
-        rich_help_panel="Memory Configuration",
-    ),
-    git_versioning: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--git-versioning/--no-git-versioning",
-        help="Enable automatic git commit of memory changes.",
-        rich_help_panel="Memory Configuration",
-    ),
+    max_entries: int = opts.MEMORY_MAX_ENTRIES,
+    mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+    recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+    score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+    summarization: bool = opts.MEMORY_SUMMARIZATION,
+    git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
     log_level: str = opts.with_default(opts.LOG_LEVEL, "INFO"),
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
@@ -127,6 +93,8 @@ def proxy(
     logging.getLogger("chromadb").setLevel(logging.WARNING)
     logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
 
+    if memory_path is None:
+        memory_path = Path("./memory_db")
     memory_path = memory_path.resolve()
     entries_dir, _ = ensure_store_dirs(memory_path)
     if openai_base_url is None:
diff --git a/agent_cli/opts.py b/agent_cli/opts.py
index 272ccf59c..ac149b80f 100644
--- a/agent_cli/opts.py
+++ b/agent_cli/opts.py
@@ -2,6 +2,7 @@
 
 import copy
 from pathlib import Path
+from typing import Any
 
 import typer
 from typer.models import OptionInfo
@@ -9,7 +10,7 @@
 from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL, DEFAULT_OPENAI_MODEL
 
 
-def with_default(option: OptionInfo, default: str) -> OptionInfo:
+def with_default(option: OptionInfo, default: Any) -> OptionInfo:
     """Create a copy of a typer Option with a different default value."""
     opt = copy.copy(option)
     opt.default = default
@@ -394,12 +395,6 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
     help="Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db",
     rich_help_panel="Memory Options",
 )
-MEMORY_EMBEDDING_MODEL: str = typer.Option(
-    DEFAULT_OPENAI_EMBEDDING_MODEL,
-    "--memory-embedding-model",
-    help="Embedding model for semantic memory search.",
-    rich_help_panel="Memory Options",
-)
 MEMORY_TOP_K: int = typer.Option(
     5,
     "--memory-top-k",
@@ -412,6 +407,36 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
     help="Minimum relevance score threshold for memory retrieval (0.0-1.0).",
     rich_help_panel="Memory Options",
 )
+MEMORY_MAX_ENTRIES: int = typer.Option(
+    500,
+    "--memory-max-entries",
+    help="Maximum stored memory entries per conversation (excluding summary).",
+    rich_help_panel="Memory Options",
+)
+MEMORY_MMR_LAMBDA: float = typer.Option(
+    0.7,
+    "--memory-mmr-lambda",
+    help="MMR lambda (0-1): higher favors relevance, lower favors diversity.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_RECENCY_WEIGHT: float = typer.Option(
+    0.2,
+    "--memory-recency-weight",
+    help="Recency score weight (0.0-1.0). Controls freshness vs. relevance.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_SUMMARIZATION: bool = typer.Option(
+    True,  # noqa: FBT003
+    "--memory-summarization/--no-memory-summarization",
+    help="Enable automatic fact extraction and summaries.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_GIT_VERSIONING: bool = typer.Option(
+    False,  # noqa: FBT003
+    "--memory-git-versioning/--no-memory-git-versioning",
+    help="Enable automatic git commit of memory changes.",
+    rich_help_panel="Memory Options",
+)
 
 
 # --- Server Options ---
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index b01245958..f83b4a7ff 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -171,9 +171,19 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 |--------|---------|-------------|
 | `--memory-mode` | `tools` | Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction). |
 | `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
-| `--memory-embedding-model` | `text-embedding-3-small` | Embedding model for semantic memory search. |
 | `--memory-top-k` | `5` | Number of memories to retrieve per search. |
 | `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+| `--memory-max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
+| `--memory-mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
+| `--memory-recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. |
+| `--memory-summarization/--no-memory-summarization` | `true` | Enable automatic fact extraction and summaries. |
+| `--memory-git-versioning/--no-memory-git-versioning` | `false` | Enable automatic git commit of memory changes. |
+
+### LLM Configuration
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--embedding-model` | `text-embedding-3-small` | Embedding model to use for vectorization. |
 
 ### General Options
 
@@ -200,6 +210,26 @@ The memory system uses a **vector-backed architecture** with semantic search. Th
 - **Diversity selection (MMR)**: Avoids redundant memories in context
 - **Automatic reconciliation**: Contradicting facts are updated, not duplicated
 
+### Memory Modes
+
+Use `--memory-mode` to control how memory works:
+
+| Mode | Description |
+|------|-------------|
+| `off` | Memory system disabled |
+| `tools` (default) | LLM decides when to store/retrieve via tools. LLM asks permission before storing. |
+| `auto` | Automatic extraction after each conversation turn (no LLM tools exposed). |
+
+Example:
+
+```bash
+# Automatic memory extraction (no prompting, just remembers)
+agent-cli chat --memory-mode auto
+
+# Disable memory entirely
+agent-cli chat --memory-mode off
+```
+
 > [!NOTE]
 > The memory system requires the `[memory]` extra: `pip install "agent-cli[memory]"`.
 > If not installed, memory tools will not be available.
diff --git a/tests/agents/test_memory_add.py b/tests/agents/test_memory_add.py
index d81e300c9..52c0f3064 100644
--- a/tests/agents/test_memory_add.py
+++ b/tests/agents/test_memory_add.py
@@ -154,7 +154,7 @@ def test_memory_add_single_memory(tmp_path: Path) -> None:
             "User likes Python",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -181,7 +181,7 @@ def test_memory_add_multiple_memories(tmp_path: Path) -> None:
             "Fact three",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -203,7 +203,7 @@ def test_memory_add_from_file(tmp_path: Path) -> None:
             str(input_file),
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -225,7 +225,7 @@ def test_memory_add_with_conversation_id(tmp_path: Path) -> None:
             "work",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -247,7 +247,7 @@ def test_memory_add_no_memories_error(tmp_path: Path) -> None:
             "add",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 1
@@ -265,7 +265,7 @@ def test_memory_add_quiet_mode(tmp_path: Path) -> None:
             "Silent fact",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
             "--quiet",
         ],
     )
diff --git a/tests/test_memory_tools.py b/tests/test_memory_tools.py
new file mode 100644
index 000000000..d63a0666c
--- /dev/null
+++ b/tests/test_memory_tools.py
@@ -0,0 +1,418 @@
+"""Tests for the memory tools in _tools.py."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from agent_cli._tools import (
+    MemoryTools,
+    _format_memory_content,
+    create_memory_tools,
+    tools,
+)
+from agent_cli.agents.chat import (
+    _get_conversation_id,
+    _maybe_extract_memories,
+    _maybe_init_memory,
+)
+from agent_cli.config import History, Memory, OpenAILLM
+
+# --- Tests for _format_memory_content ---
+
+
+def test_format_memory_content_basic() -> None:
+    """Test basic memory content formatting."""
+    result = _format_memory_content("User likes Python", "preferences", "")
+    assert result == "[preferences] User likes Python"
+
+
+def test_format_memory_content_with_tags() -> None:
+    """Test memory content formatting with tags."""
+    result = _format_memory_content("User likes Python", "preferences", "programming, languages")
+    assert result == "[preferences] User likes Python (tags: programming, languages)"
+
+
+def test_format_memory_content_empty_category() -> None:
+    """Test memory content formatting with empty category."""
+    result = _format_memory_content("Some content", "", "")
+    assert result == "[] Some content"
+
+
+# --- Tests for MemoryTools._check ---
+
+
+def test_memory_tools_check_with_no_client() -> None:
+    """Test that _check returns error when client is None."""
+    mt = MemoryTools(None, "test_conversation")
+    error = mt._check()
+    assert error is not None
+    assert "Memory system not initialized" in error
+    assert "pip install 'agent-cli[memory]'" in error
+
+
+def test_memory_tools_check_with_client() -> None:
+    """Test that _check returns None when client exists."""
+    mock_client = MagicMock()
+    mt = MemoryTools(mock_client, "test_conversation")
+    error = mt._check()
+    assert error is None
+
+
+# --- Tests for MemoryTools.add_memory ---
+
+
+@pytest.mark.asyncio
+async def test_add_memory_without_client() -> None:
+    """Test add_memory returns error when no client."""
+    mt = MemoryTools(None, "test")
+    result = await mt.add_memory("content", "category", "tags")
+    assert "Error: Memory system not initialized" in result
+
+
+@pytest.mark.asyncio
+async def test_add_memory_success() -> None:
+    """Test successful memory addition."""
+    mock_client = MagicMock()
+    mock_client.add = AsyncMock()
+
+    mt = MemoryTools(mock_client, "test_conversation")
+    result = await mt.add_memory("User likes coffee", "preferences", "food")
+
+    assert result == "Memory added successfully."
+    mock_client.add.assert_called_once_with(
+        "[preferences] User likes coffee (tags: food)",
+        conversation_id="test_conversation",
+    )
+
+
+@pytest.mark.asyncio
+async def test_add_memory_exception() -> None:
+    """Test add_memory handles exceptions."""
+    mock_client = MagicMock()
+    mock_client.add = AsyncMock(side_effect=RuntimeError("Database error"))
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.add_memory("content", "category", "tags")
+
+    assert "Error adding memory" in result
+    assert "Database error" in result
+
+
+# --- Tests for MemoryTools.search_memory ---
+
+
+@pytest.mark.asyncio
+async def test_search_memory_without_client() -> None:
+    """Test search_memory returns error when no client."""
+    mt = MemoryTools(None, "test")
+    result = await mt.search_memory("query")
+    assert "Error: Memory system not initialized" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_no_results() -> None:
+    """Test search_memory with no matching results."""
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = []
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.search_memory("nonexistent")
+
+    assert "No memories found matching 'nonexistent'" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_with_results() -> None:
+    """Test search_memory returns formatted results."""
+    # Create mock entries
+    entry1 = MagicMock()
+    entry1.content = "User likes Python"
+    entry1.score = 0.95
+
+    entry2 = MagicMock()
+    entry2.content = "User prefers dark mode"
+    entry2.score = 0.87
+
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = [entry1, entry2]
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.search_memory("preferences")
+
+    assert "User likes Python" in result
+    assert "User prefers dark mode" in result
+    assert "relevance: 0.95" in result
+    assert "relevance: 0.87" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_with_category() -> None:
+    """Test search_memory includes category in query."""
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = []
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    mt = MemoryTools(mock_client, "test_conv")
+    await mt.search_memory("coffee", category="preferences")
+
+    # Verify category is prepended to the query
+    mock_client.search.assert_called_once_with(
+        "preferences coffee",
+        conversation_id="test_conv",
+    )
+
+
+@pytest.mark.asyncio
+async def test_search_memory_exception() -> None:
+    """Test search_memory handles exceptions."""
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(side_effect=RuntimeError("Search failed"))
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.search_memory("query")
+
+    assert "Error searching memory" in result
+    assert "Search failed" in result
+
+
+# --- Tests for MemoryTools.list_all_memories ---
+
+
+def test_list_all_memories_without_client() -> None:
+    """Test list_all_memories returns error when no client."""
+    mt = MemoryTools(None, "test")
+    result = mt.list_all_memories()
+    assert "Error: Memory system not initialized" in result
+
+
+def test_list_all_memories_empty() -> None:
+    """Test list_all_memories with no stored memories."""
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(return_value=[])
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories()
+
+    assert result == "No memories stored yet."
+
+
+def test_list_all_memories_with_entries() -> None:
+    """Test list_all_memories returns formatted list."""
+    entries = [
+        {"content": "User likes Python", "role": "memory", "created_at": "2024-01-01T10:00:00"},
+        {
+            "content": "User lives in Amsterdam",
+            "role": "memory",
+            "created_at": "2024-01-02T12:00:00",
+        },
+    ]
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(return_value=entries)
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories()
+
+    assert "Showing 2 of 2 total memories" in result
+    assert "User likes Python" in result
+    assert "User lives in Amsterdam" in result
+    assert "[memory]" in result
+
+
+def test_list_all_memories_with_limit() -> None:
+    """Test list_all_memories respects limit parameter."""
+    entries = [
+        {"content": f"Memory {i}", "role": "memory", "created_at": "2024-01-01"} for i in range(5)
+    ]
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(return_value=entries)
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories(limit=3)
+
+    assert "Showing 3 of 5 total memories" in result
+    assert "... and 2 more memories" in result
+
+
+def test_list_all_memories_exception() -> None:
+    """Test list_all_memories handles exceptions."""
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(side_effect=RuntimeError("List failed"))
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories()
+
+    assert "Error listing memories" in result
+    assert "List failed" in result
+
+
+# --- Tests for create_memory_tools ---
+
+
+def test_create_memory_tools_returns_list() -> None:
+    """Test create_memory_tools returns a list of Tool objects."""
+    mock_client = MagicMock()
+    result = create_memory_tools(mock_client, "test")
+
+    assert isinstance(result, list)
+    assert len(result) == 3  # add_memory, search_memory, list_all_memories
+
+
+def test_create_memory_tools_with_none_client() -> None:
+    """Test create_memory_tools works with None client."""
+    result = create_memory_tools(None, "test")
+
+    assert isinstance(result, list)
+    assert len(result) == 3
+
+
+# --- Tests for tools function ---
+
+
+def test_tools_returns_all_expected_tools() -> None:
+    """Test tools function returns all expected tools."""
+    result = tools(None, "test")
+
+    assert isinstance(result, list)
+    # Should have: read_file, execute_code, 3 memory tools, duckduckgo_search
+    assert len(result) == 6
+
+
+def test_tools_with_memory_client() -> None:
+    """Test tools function works with a memory client."""
+    mock_client = MagicMock()
+    result = tools(mock_client, "conversation_123")
+
+    assert isinstance(result, list)
+    assert len(result) == 6
+
+
+# --- Tests for chat.py integration functions ---
+
+
+def test_get_conversation_id_with_history_dir() -> None:
+    """Test _get_conversation_id generates stable ID from history dir."""
+    history_cfg = History(history_dir=Path("/home/user/.chat-history"))
+    result = _get_conversation_id(history_cfg)
+
+    # Should be a 12-character hex string
+    assert len(result) == 12
+    assert all(c in "0123456789abcdef" for c in result)
+
+
+def test_get_conversation_id_without_history_dir() -> None:
+    """Test _get_conversation_id returns 'default' when no history dir."""
+    history_cfg = History(history_dir=None)
+    result = _get_conversation_id(history_cfg)
+
+    assert result == "default"
+
+
+def test_get_conversation_id_is_stable() -> None:
+    """Test _get_conversation_id produces same ID for same path."""
+    history_cfg1 = History(history_dir=Path("/some/path"))
+    history_cfg2 = History(history_dir=Path("/some/path"))
+
+    assert _get_conversation_id(history_cfg1) == _get_conversation_id(history_cfg2)
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_off_mode() -> None:
+    """Test _maybe_extract_memories does nothing when mode is not 'auto'."""
+    memory_cfg = Memory(mode="tools")  # Not 'auto'
+    mock_client = MagicMock()
+    mock_client.extract_from_turn = AsyncMock()
+
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        response_text="response",
+        conversation_id="test",
+        model="gpt-4",
+        quiet=True,
+    )
+
+    # Should not call extract_from_turn when mode is not 'auto'
+    mock_client.extract_from_turn.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_auto_mode() -> None:
+    """Test _maybe_extract_memories extracts when mode is 'auto'."""
+    memory_cfg = Memory(mode="auto")
+    mock_client = MagicMock()
+    mock_client.extract_from_turn = AsyncMock()
+
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="Hello world",
+        response_text="Hi there!",
+        conversation_id="conv123",
+        model="gpt-4",
+        quiet=True,
+    )
+
+    mock_client.extract_from_turn.assert_called_once_with(
+        user_message="Hello world",
+        assistant_message="Hi there!",
+        conversation_id="conv123",
+        model="gpt-4",
+    )
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_handles_exception() -> None:
+    """Test _maybe_extract_memories handles exceptions gracefully."""
+    memory_cfg = Memory(mode="auto")
+    mock_client = MagicMock()
+    mock_client.extract_from_turn = AsyncMock(side_effect=RuntimeError("Extraction failed"))
+
+    # Should not raise, just log warning
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        response_text="response",
+        conversation_id="test",
+        model="gpt-4",
+        quiet=True,
+    )
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_no_client() -> None:
+    """Test _maybe_extract_memories does nothing when client is None."""
+    memory_cfg = Memory(mode="auto")
+
+    # Should not raise even with None client
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=None,
+        instruction="test",
+        response_text="response",
+        conversation_id="test",
+        model="gpt-4",
+        quiet=True,
+    )
+
+
+def test_maybe_init_memory_off_mode() -> None:
+    """Test _maybe_init_memory returns None when mode is 'off'."""
+    memory_cfg = Memory(mode="off")
+    history_cfg = History()
+    openai_cfg = OpenAILLM(llm_openai_model="gpt-4o-mini")
+
+    result = _maybe_init_memory(memory_cfg, history_cfg, openai_cfg, quiet=True)
+    assert result is None

From 0b2a7497fb6873dcc931864ff11fbbf9774b00a9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 4 Jan 2026 15:34:35 +0000
Subject: [PATCH 16/20] Update auto-generated docs

---
 README.md               | 182 +++++++++++++++++++++++-----------------
 docs/commands/memory.md |  27 +++---
 2 files changed, 122 insertions(+), 87 deletions(-)

diff --git a/README.md b/README.md
index 2d4a228e5..c0676575c 100644
--- a/README.md
+++ b/README.md
@@ -1530,22 +1530,55 @@ uv tool install "agent-cli[vad]"
 │                                   [default: 50]                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Memory Options ─────────────────────────────────────────────────────────────╮
-│ --memory-mode                   TEXT     Memory mode: 'off' (disabled),      │
-│                                          'tools' (LLM decides via tools),    │
-│                                          'auto' (automatic extraction).      │
-│                                          [default: tools]                    │
-│ --memory-path                   PATH     Path for memory database storage.   │
-│                                          Default:                            │
-│                                          ~/.config/agent-cli/memory/vector_… │
-│ --memory-embedding-model        TEXT     Embedding model for semantic memory │
-│                                          search.                             │
-│                                          [default: text-embedding-3-small]   │
-│ --memory-top-k                  INTEGER  Number of memories to retrieve per  │
-│                                          search.                             │
-│                                          [default: 5]                        │
-│ --memory-score-threshold        FLOAT    Minimum relevance score threshold   │
-│                                          for memory retrieval (0.0-1.0).     │
-│                                          [default: 0.35]                     │
+│ --memory-mode                                  TEXT     Memory mode: 'off'   │
+│                                                         (disabled), 'tools'  │
+│                                                         (LLM decides via     │
+│                                                         tools), 'auto'       │
+│                                                         (automatic           │
+│                                                         extraction).         │
+│                                                         [default: tools]     │
+│ --memory-path                                  PATH     Path for memory      │
+│                                                         database storage.    │
+│                                                         Default:             │
+│                                                         ~/.config/agent-cli… │
+│ --memory-top-k                                 INTEGER  Number of memories   │
+│                                                         to retrieve per      │
+│                                                         search.              │
+│                                                         [default: 5]         │
+│ --memory-score-thre…                           FLOAT    Minimum relevance    │
+│                                                         score threshold for  │
+│                                                         memory retrieval     │
+│                                                         (0.0-1.0).           │
+│                                                         [default: 0.35]      │
+│ --memory-max-entries                           INTEGER  Maximum stored       │
+│                                                         memory entries per   │
+│                                                         conversation         │
+│                                                         (excluding summary). │
+│                                                         [default: 500]       │
+│ --memory-mmr-lambda                            FLOAT    MMR lambda (0-1):    │
+│                                                         higher favors        │
+│                                                         relevance, lower     │
+│                                                         favors diversity.    │
+│                                                         [default: 0.7]       │
+│ --memory-recency-we…                           FLOAT    Recency score weight │
+│                                                         (0.0-1.0). Controls  │
+│                                                         freshness vs.        │
+│                                                         relevance.           │
+│                                                         [default: 0.2]       │
+│ --memory-summarizat…    --no-memory-summar…             Enable automatic     │
+│                                                         fact extraction and  │
+│                                                         summaries.           │
+│                                                         [default:            │
+│                                                         memory-summarizatio… │
+│ --memory-git-versio…    --no-memory-git-ve…             Enable automatic git │
+│                                                         commit of memory     │
+│                                                         changes.             │
+│                                                         [default:            │
+│                                                         no-memory-git-versi… │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ General Options ────────────────────────────────────────────────────────────╮
 │ --save-file           PATH  Save TTS response audio to WAV file.             │
@@ -1736,49 +1769,45 @@ The `memory proxy` command is the core feature—a middleware server that gives
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
 │ --help  -h        Show this message and exit.                                │
 ╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ───────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory      │
-│                                                      store (files + derived  │
-│                                                      vector index).          │
-│                                                      [default: ./memory_db]  │
-│ --default-top-k                             INTEGER  Number of memory        │
-│                                                      entries to retrieve per │
-│                                                      query.                  │
-│                                                      [default: 5]            │
-│ --max-entries                               INTEGER  Maximum stored memory   │
-│                                                      entries per             │
-│                                                      conversation (excluding │
-│                                                      summary).               │
-│                                                      [default: 500]          │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
-│                                                      higher favors           │
-│                                                      relevance, lower favors │
-│                                                      diversity.              │
-│                                                      [default: 0.7]          │
-│ --recency-weight                            FLOAT    Recency score weight    │
-│                                                      (0.0-1.0). Controls     │
-│                                                      freshness vs.           │
-│                                                      relevance. Default 0.2  │
-│                                                      (20% recency, 80%       │
-│                                                      semantic relevance).    │
-│                                                      [default: 0.2]          │
-│ --score-threshold                           FLOAT    Minimum semantic        │
-│                                                      relevance threshold     │
-│                                                      (0.0-1.0). Memories     │
-│                                                      below this score are    │
-│                                                      discarded to reduce     │
-│                                                      noise.                  │
-│                                                      [default: 0.35]         │
-│ --summarization      --no-summarization              Enable automatic fact   │
-│                                                      extraction and          │
-│                                                      summaries.              │
-│                                                      [default:               │
-│                                                      summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git    │
-│                                                      commit of memory        │
-│                                                      changes.                │
-│                                                      [default:               │
-│                                                      git-versioning]         │
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path                                  PATH     Path for memory      │
+│                                                         database storage.    │
+│                                                         Default:             │
+│                                                         ~/.config/agent-cli… │
+│ --memory-top-k                                 INTEGER  Number of memories   │
+│                                                         to retrieve per      │
+│                                                         search.              │
+│                                                         [default: 5]         │
+│ --memory-max-entries                           INTEGER  Maximum stored       │
+│                                                         memory entries per   │
+│                                                         conversation         │
+│                                                         (excluding summary). │
+│                                                         [default: 500]       │
+│ --memory-mmr-lambda                            FLOAT    MMR lambda (0-1):    │
+│                                                         higher favors        │
+│                                                         relevance, lower     │
+│                                                         favors diversity.    │
+│                                                         [default: 0.7]       │
+│ --memory-recency-we…                           FLOAT    Recency score weight │
+│                                                         (0.0-1.0). Controls  │
+│                                                         freshness vs.        │
+│                                                         relevance.           │
+│                                                         [default: 0.2]       │
+│ --memory-score-thre…                           FLOAT    Minimum relevance    │
+│                                                         score threshold for  │
+│                                                         memory retrieval     │
+│                                                         (0.0-1.0).           │
+│                                                         [default: 0.35]      │
+│ --memory-summarizat…    --no-memory-summar…             Enable automatic     │
+│                                                         fact extraction and  │
+│                                                         summaries.           │
+│                                                         [default:            │
+│                                                         memory-summarizatio… │
+│ --memory-git-versio…    --no-memory-git-ve…             Enable automatic git │
+│                                                         commit of memory     │
+│                                                         changes.             │
+│                                                         [default:            │
+│                                                         memory-git-versioni… │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
 │ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
@@ -1886,23 +1915,24 @@ agent-cli memory add -c work "Project deadline is Friday"
 │                                fact.                                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. │
-│                                                     Use '-' for stdin.       │
-│                                                     Supports JSON array,     │
-│                                                     JSON object with         │
-│                                                     'memories' key, or plain │
-│                                                     text (one per line).     │
-│ --conversation-id  -c                         TEXT  Conversation ID to add   │
-│                                                     memories to.             │
-│                                                     [default: default]       │
-│ --memory-path                                 PATH  Path to the memory       │
-│                                                     store.                   │
-│                                                     [default: ./memory_db]   │
-│ --git-versioning       --no-git-versioning          Commit changes to git.   │
-│                                                     [default:                │
-│                                                     git-versioning]          │
-│ --help             -h                               Show this message and    │
-│                                                     exit.                    │
+│ --file             -f      PATH  Read memories from file. Use '-' for stdin. │
+│                                  Supports JSON array, JSON object with       │
+│                                  'memories' key, or plain text (one per      │
+│                                  line).                                      │
+│ --conversation-id  -c      TEXT  Conversation ID to add memories to.         │
+│                                  [default: default]                          │
+│ --help             -h            Show this message and exit.                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path                                    PATH  Path for memory       │
+│                                                        database storage.     │
+│                                                        Default:              │
+│                                                        ~/.config/agent-cli/… │
+│ --memory-git-version…    --no-memory-git-ver…          Enable automatic git  │
+│                                                        commit of memory      │
+│                                                        changes.              │
+│                                                        [default:             │
+│                                                        memory-git-versionin… │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ General Options ────────────────────────────────────────────────────────────╮
 │ --quiet       -q            Suppress console output from rich.               │
diff --git a/docs/commands/memory.md b/docs/commands/memory.md
index 11428a7e1..6277f3c7a 100644
--- a/docs/commands/memory.md
+++ b/docs/commands/memory.md
@@ -69,18 +69,18 @@ agent-cli chat --openai-base-url http://localhost:8100/v1 --llm-provider openai
 <!-- CODE:END -->
 <!-- OUTPUT:START -->
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
-### Memory Configuration
+### Memory Options
 
 | Option | Default | Description |
 |--------|---------|-------------|
-| `--memory-path` | `./memory_db` | Path to the memory store (files + derived vector index). |
-| `--default-top-k` | `5` | Number of memory entries to retrieve per query. |
-| `--max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
-| `--mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
-| `--recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance). |
-| `--score-threshold` | `0.35` | Minimum semantic relevance threshold (0.0-1.0). Memories below this score are discarded to reduce noise. |
-| `--summarization/--no-summarization` | `true` | Enable automatic fact extraction and summaries. |
-| `--git-versioning/--no-git-versioning` | `true` | Enable automatic git commit of memory changes. |
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-top-k` | `5` | Number of memories to retrieve per search. |
+| `--memory-max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
+| `--memory-mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
+| `--memory-recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. |
+| `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+| `--memory-summarization/--no-memory-summarization` | `true` | Enable automatic fact extraction and summaries. |
+| `--memory-git-versioning/--no-memory-git-versioning` | `true` | Enable automatic git commit of memory changes. |
 
 ### LLM: OpenAI-compatible
 
@@ -162,8 +162,13 @@ agent-cli memory add -c work "Project deadline is Friday"
 |--------|---------|-------------|
 | `--file` | - | Read memories from file. Use '-' for stdin. Supports JSON array, JSON object with 'memories' key, or plain text (one per line). |
 | `--conversation-id` | `default` | Conversation ID to add memories to. |
-| `--memory-path` | `./memory_db` | Path to the memory store. |
-| `--git-versioning/--no-git-versioning` | `true` | Commit changes to git. |
+
+### Memory Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-git-versioning/--no-memory-git-versioning` | `true` | Enable automatic git commit of memory changes. |
 
 ### General Options
 

From d104dcb40443020dea6a6e07e0021ef911f6c7c8 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 07:35:09 -0800
Subject: [PATCH 17/20] fix(tests): extend timeout for memory integration tests
 on Windows

---
 tests/memory/test_memory_integration.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/memory/test_memory_integration.py b/tests/memory/test_memory_integration.py
index 28c7d48a9..e3dfcaf44 100644
--- a/tests/memory/test_memory_integration.py
+++ b/tests/memory/test_memory_integration.py
@@ -15,6 +15,9 @@
 from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
 from agent_cli.memory import api as memory_api
 
+# Extend timeout for these tests - Windows SSL initialization is slow
+pytestmark = pytest.mark.timeout(30)
+
 
 class _DummyReranker:
     def predict(self, pairs: list[tuple[str, str]]) -> list[float]:

From 718cb70dba8c9defbe730c50776854328c1a8868 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 10:20:25 -0800
Subject: [PATCH 18/20] fix(chat): run memory extraction in background (don't
 block conversation)

---
 agent_cli/agents/chat.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 39fbaf674..95784c56f 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -387,16 +387,19 @@ async def _handle_conversation_turn(
         },
     )
 
-    # 5b. Auto-extract memories in "auto" mode
-    await _maybe_extract_memories(
-        memory_cfg,
-        memory_client,
-        instruction,
-        response_text,
-        conversation_id,
-        openai_llm_cfg.llm_openai_model,
-        general_cfg.quiet,
-    )
+    # 5b. Auto-extract memories in "auto" mode (run in background, don't block)
+    if memory_cfg.mode == "auto" and memory_client is not None:
+        asyncio.create_task(  # noqa: RUF006
+            _maybe_extract_memories(
+                memory_cfg,
+                memory_client,
+                instruction,
+                response_text,
+                conversation_id,
+                openai_llm_cfg.llm_openai_model,
+                general_cfg.quiet,
+            ),
+        )
 
     # 6. Save history
     if history_cfg.history_dir:

From 17de4da1e9ed0e83dcf453c9d67729fb40f12daf Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Sun, 4 Jan 2026 10:30:57 -0800
Subject: [PATCH 19/20] fix(chat): enable memory search in auto mode

In "auto" mode, the LLM now has read-only access to memory tools
(search_memory, list_all_memories) while extraction still happens
automatically. Previously, auto mode disabled all memory access
for the LLM, meaning stored facts couldn't be searched.

Also added read_only parameter to create_memory_tools() and
memory_read_only parameter to tools() function with tests.
---
 agent_cli/_tools.py        | 21 +++++++++++++++++----
 agent_cli/agents/chat.py   | 10 +++++++---
 tests/test_memory_tools.py | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index 180fa3f7d..ad94fab48 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -193,12 +193,16 @@ def list_all_memories(self, limit: int = 10) -> str:
 def create_memory_tools(
     memory_client: MemoryClient | None,
     conversation_id: str = "default",
+    *,
+    read_only: bool = False,
 ) -> list:
     """Create memory tools bound to a specific client and conversation.
 
     Args:
         memory_client: The MemoryClient instance, or None if not available.
         conversation_id: The conversation ID for scoping memories.
+        read_only: If True, only include search/list tools (not add_memory).
+            Use this for "auto" mode where extraction happens automatically.
 
     Returns:
         List of pydantic_ai Tool objects for memory operations.
@@ -207,19 +211,28 @@ def create_memory_tools(
     from pydantic_ai.tools import Tool  # noqa: PLC0415
 
     mt = MemoryTools(memory_client, conversation_id)
-    return [
-        Tool(mt.add_memory),
+    tools_list = [
         Tool(mt.search_memory),
         Tool(mt.list_all_memories),
     ]
+    if not read_only:
+        tools_list.insert(0, Tool(mt.add_memory))
+    return tools_list
 
 
-def tools(memory_client: MemoryClient | None = None, conversation_id: str = "default") -> list:
+def tools(
+    memory_client: MemoryClient | None = None,
+    conversation_id: str = "default",
+    *,
+    memory_read_only: bool = False,
+) -> list:
     """Return a list of all tools for the chat agent.
 
     Args:
         memory_client: The MemoryClient instance, or None if not available.
         conversation_id: The conversation ID for scoping memories.
+        memory_read_only: If True, only include search/list memory tools (not add).
+            Use this for "auto" mode where extraction happens automatically.
 
     """
     from pydantic_ai.common_tools.duckduckgo import duckduckgo_search_tool  # noqa: PLC0415
@@ -228,6 +241,6 @@ def tools(memory_client: MemoryClient | None = None, conversation_id: str = "def
     return [
         Tool(read_file),
         Tool(execute_code),
-        *create_memory_tools(memory_client, conversation_id),
+        *create_memory_tools(memory_client, conversation_id, read_only=memory_read_only),
         duckduckgo_search_tool(),
     ]
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 95784c56f..8fba37c81 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -348,8 +348,12 @@ async def _handle_conversation_turn(
         quiet=general_cfg.quiet,
         stop_event=stop_event,
     ):
-        # Only include memory tools in "tools" mode
-        tool_memory_client = memory_client if memory_cfg.mode == "tools" else None
+        # Memory tools access:
+        # - "off": no memory tools
+        # - "tools": full access (add, search, list)
+        # - "auto": read-only access (search, list) - extraction happens automatically
+        tool_memory_client = memory_client if memory_cfg.mode != "off" else None
+        memory_read_only = memory_cfg.mode == "auto"
         response_text = await get_llm_response(
             system_prompt=SYSTEM_PROMPT,
             agent_instructions=AGENT_INSTRUCTIONS,
@@ -359,7 +363,7 @@ async def _handle_conversation_turn(
             openai_cfg=openai_llm_cfg,
             gemini_cfg=gemini_llm_cfg,
             logger=LOGGER,
-            tools=tools(tool_memory_client, conversation_id),
+            tools=tools(tool_memory_client, conversation_id, memory_read_only=memory_read_only),
             quiet=True,  # Suppress internal output since we're showing our own timer
             live=live,
         )
diff --git a/tests/test_memory_tools.py b/tests/test_memory_tools.py
index d63a0666c..cb523bec2 100644
--- a/tests/test_memory_tools.py
+++ b/tests/test_memory_tools.py
@@ -276,6 +276,24 @@ def test_create_memory_tools_with_none_client() -> None:
     assert len(result) == 3
 
 
+def test_create_memory_tools_read_only() -> None:
+    """Test create_memory_tools with read_only=True excludes add_memory."""
+    mock_client = MagicMock()
+    result = create_memory_tools(mock_client, "test", read_only=True)
+
+    assert isinstance(result, list)
+    assert len(result) == 2  # Only search_memory and list_all_memories
+
+
+def test_create_memory_tools_read_only_false() -> None:
+    """Test create_memory_tools with read_only=False includes add_memory."""
+    mock_client = MagicMock()
+    result = create_memory_tools(mock_client, "test", read_only=False)
+
+    assert isinstance(result, list)
+    assert len(result) == 3  # add_memory, search_memory, list_all_memories
+
+
 # --- Tests for tools function ---
 
 
@@ -297,6 +315,26 @@ def test_tools_with_memory_client() -> None:
     assert len(result) == 6
 
 
+def test_tools_memory_read_only() -> None:
+    """Test tools function with memory_read_only=True has fewer memory tools."""
+    mock_client = MagicMock()
+    result = tools(mock_client, "test", memory_read_only=True)
+
+    assert isinstance(result, list)
+    # Should have: read_file, execute_code, 2 memory tools (no add_memory), duckduckgo_search
+    assert len(result) == 5
+
+
+def test_tools_memory_read_only_false() -> None:
+    """Test tools function with memory_read_only=False includes all memory tools."""
+    mock_client = MagicMock()
+    result = tools(mock_client, "test", memory_read_only=False)
+
+    assert isinstance(result, list)
+    # Should have: read_file, execute_code, 3 memory tools, duckduckgo_search
+    assert len(result) == 6
+
+
 # --- Tests for chat.py integration functions ---
 
 

From bf31697e84d40e3adeb18e6b46d926527683ba95 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Mon, 5 Jan 2026 01:00:48 -0800
Subject: [PATCH 20/20] feat(chat): auto-inject memories in auto mode

In "auto" mode, relevant memories are now automatically retrieved
and injected into the system prompt before each LLM call. This
mirrors the memory-proxy behavior but only in auto mode.

Memory mode behavior:
- off: No memory at all
- tools: LLM has full control via tools (no auto-injection)
- auto: Auto-inject + read-only tools + auto-extract after turn
---
 agent_cli/agents/chat.py   |  40 ++++++++++++-
 tests/test_memory_tools.py | 111 +++++++++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 8fba37c81..9a7b55e1d 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -259,6 +259,35 @@ async def _maybe_extract_memories(
         LOGGER.warning("Failed to extract memories: %s", e)
 
 
+async def _maybe_retrieve_memories(
+    memory_cfg: config.Memory,
+    memory_client: MemoryClient | None,
+    instruction: str,
+    conversation_id: str,
+) -> str:
+    """Retrieve relevant memories in auto mode for prompt injection.
+
+    Returns formatted memory context string, or empty string if not applicable.
+    """
+    if memory_cfg.mode != "auto" or memory_client is None:
+        return ""
+    try:
+        retrieval = await memory_client.search(
+            query=instruction,
+            conversation_id=conversation_id,
+            top_k=memory_cfg.top_k,
+        )
+        if not retrieval.entries:
+            return ""
+        lines = ["\n<relevant-memories>"]
+        lines.extend(f"- {entry.content}" for entry in retrieval.entries)
+        lines.append("</relevant-memories>")
+        return "\n".join(lines)
+    except Exception as e:
+        LOGGER.warning("Failed to retrieve memories: %s", e)
+        return ""
+
+
 async def _handle_conversation_turn(
     *,
     stop_event: InteractiveStopEvent,
@@ -331,6 +360,15 @@ async def _handle_conversation_turn(
         instruction=instruction,
     )
 
+    # 3b. Auto-retrieve and inject memories in "auto" mode
+    memory_context = await _maybe_retrieve_memories(
+        memory_cfg,
+        memory_client,
+        instruction,
+        conversation_id,
+    )
+    system_prompt = SYSTEM_PROMPT + memory_context
+
     # 4. Get LLM response with timing
 
     start_time = time.monotonic()
@@ -355,7 +393,7 @@ async def _handle_conversation_turn(
         tool_memory_client = memory_client if memory_cfg.mode != "off" else None
         memory_read_only = memory_cfg.mode == "auto"
         response_text = await get_llm_response(
-            system_prompt=SYSTEM_PROMPT,
+            system_prompt=system_prompt,
             agent_instructions=AGENT_INSTRUCTIONS,
             user_input=user_message_with_context,
             provider_cfg=provider_cfg,
diff --git a/tests/test_memory_tools.py b/tests/test_memory_tools.py
index cb523bec2..dabe3ae73 100644
--- a/tests/test_memory_tools.py
+++ b/tests/test_memory_tools.py
@@ -17,6 +17,7 @@
     _get_conversation_id,
     _maybe_extract_memories,
     _maybe_init_memory,
+    _maybe_retrieve_memories,
 )
 from agent_cli.config import History, Memory, OpenAILLM
 
@@ -454,3 +455,113 @@ def test_maybe_init_memory_off_mode() -> None:
 
     result = _maybe_init_memory(memory_cfg, history_cfg, openai_cfg, quiet=True)
     assert result is None
+
+
+# --- Tests for _maybe_retrieve_memories ---
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_off_mode() -> None:
+    """Test _maybe_retrieve_memories returns empty string when mode is not 'auto'."""
+    memory_cfg = Memory(mode="tools")  # Not 'auto'
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock()
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""
+    mock_client.search.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_auto_mode_with_results() -> None:
+    """Test _maybe_retrieve_memories returns formatted context in auto mode."""
+    memory_cfg = Memory(mode="auto", top_k=3)
+
+    # Create mock entries
+    entry1 = MagicMock()
+    entry1.content = "User likes pizza"
+    entry2 = MagicMock()
+    entry2.content = "User prefers Italian food"
+
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = [entry1, entry2]
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="What food do I like?",
+        conversation_id="conv123",
+    )
+
+    assert "<relevant-memories>" in result
+    assert "</relevant-memories>" in result
+    assert "User likes pizza" in result
+    assert "User prefers Italian food" in result
+    mock_client.search.assert_called_once_with(
+        query="What food do I like?",
+        conversation_id="conv123",
+        top_k=3,
+    )
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_auto_mode_no_results() -> None:
+    """Test _maybe_retrieve_memories returns empty string when no memories found."""
+    memory_cfg = Memory(mode="auto")
+
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = []
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_no_client() -> None:
+    """Test _maybe_retrieve_memories returns empty string when client is None."""
+    memory_cfg = Memory(mode="auto")
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=None,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_handles_exception() -> None:
+    """Test _maybe_retrieve_memories handles exceptions gracefully."""
+    memory_cfg = Memory(mode="auto")
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(side_effect=RuntimeError("Search failed"))
+
+    # Should not raise, just return empty string
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""