diff --git a/README.md b/README.md
index f0a5b2d5f..c0676575c 100644
--- a/README.md
+++ b/README.md
@@ -1529,6 +1529,57 @@ uv tool install "agent-cli[vad]"
 │                                   history.                                   │
 │                                   [default: 50]                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-mode                                  TEXT     Memory mode: 'off'   │
+│                                                         (disabled), 'tools'  │
+│                                                         (LLM decides via     │
+│                                                         tools), 'auto'       │
+│                                                         (automatic           │
+│                                                         extraction).         │
+│                                                         [default: tools]     │
+│ --memory-path                                  PATH     Path for memory      │
+│                                                         database storage.    │
+│                                                         Default:             │
+│                                                         ~/.config/agent-cli… │
+│ --memory-top-k                                 INTEGER  Number of memories   │
+│                                                         to retrieve per      │
+│                                                         search.              │
+│                                                         [default: 5]         │
+│ --memory-score-thre…                           FLOAT    Minimum relevance    │
+│                                                         score threshold for  │
+│                                                         memory retrieval     │
+│                                                         (0.0-1.0).           │
+│                                                         [default: 0.35]      │
+│ --memory-max-entries                           INTEGER  Maximum stored       │
+│                                                         memory entries per   │
+│                                                         conversation         │
+│                                                         (excluding summary). │
+│                                                         [default: 500]       │
+│ --memory-mmr-lambda                            FLOAT    MMR lambda (0-1):    │
+│                                                         higher favors        │
+│                                                         relevance, lower     │
+│                                                         favors diversity.    │
+│                                                         [default: 0.7]       │
+│ --memory-recency-we…                           FLOAT    Recency score weight │
+│                                                         (0.0-1.0). Controls  │
+│                                                         freshness vs.        │
+│                                                         relevance.           │
+│                                                         [default: 0.2]       │
+│ --memory-summarizat…    --no-memory-summar…             Enable automatic     │
+│                                                         fact extraction and  │
+│                                                         summaries.           │
+│                                                         [default:            │
+│                                                         memory-summarizatio… │
+│ --memory-git-versio…    --no-memory-git-ve…             Enable automatic git │
+│                                                         commit of memory     │
+│                                                         changes.             │
+│                                                         [default:            │
+│                                                         no-memory-git-versi… │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model        TEXT  Embedding model to use for vectorization.     │
+│                                [default: text-embedding-3-small]             │
+╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ General Options ────────────────────────────────────────────────────────────╮
 │ --save-file           PATH  Save TTS response audio to WAV file.             │
 │ --log-level           TEXT  Set logging level.                               │
@@ -1718,49 +1769,45 @@ The `memory proxy` command is the core feature—a middleware server that gives
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
 │ --help  -h        Show this message and exit.                                │
 ╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ───────────────────────────────────────────────────────╮
-│ --memory-path                               PATH     Path to the memory      │
-│                                                      store (files + derived  │
-│                                                      vector index).          │
-│                                                      [default: ./memory_db]  │
-│ --default-top-k                             INTEGER  Number of memory        │
-│                                                      entries to retrieve per │
-│                                                      query.                  │
-│                                                      [default: 5]            │
-│ --max-entries                               INTEGER  Maximum stored memory   │
-│                                                      entries per             │
-│                                                      conversation (excluding │
-│                                                      summary).               │
-│                                                      [default: 500]          │
-│ --mmr-lambda                                FLOAT    MMR lambda (0-1):       │
-│                                                      higher favors           │
-│                                                      relevance, lower favors │
-│                                                      diversity.              │
-│                                                      [default: 0.7]          │
-│ --recency-weight                            FLOAT    Recency score weight    │
-│                                                      (0.0-1.0). Controls     │
-│                                                      freshness vs.           │
-│                                                      relevance. Default 0.2  │
-│                                                      (20% recency, 80%       │
-│                                                      semantic relevance).    │
-│                                                      [default: 0.2]          │
-│ --score-threshold                           FLOAT    Minimum semantic        │
-│                                                      relevance threshold     │
-│                                                      (0.0-1.0). Memories     │
-│                                                      below this score are    │
-│                                                      discarded to reduce     │
-│                                                      noise.                  │
-│                                                      [default: 0.35]         │
-│ --summarization      --no-summarization              Enable automatic fact   │
-│                                                      extraction and          │
-│                                                      summaries.              │
-│                                                      [default:               │
-│                                                      summarization]          │
-│ --git-versioning     --no-git-versioning             Enable automatic git    │
-│                                                      commit of memory        │
-│                                                      changes.                │
-│                                                      [default:               │
-│                                                      git-versioning]         │
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path                                  PATH     Path for memory      │
+│                                                         database storage.    │
+│                                                         Default:             │
+│                                                         ~/.config/agent-cli… │
+│ --memory-top-k                                 INTEGER  Number of memories   │
+│                                                         to retrieve per      │
+│                                                         search.              │
+│                                                         [default: 5]         │
+│ --memory-max-entries                           INTEGER  Maximum stored       │
+│                                                         memory entries per   │
+│                                                         conversation         │
+│                                                         (excluding summary). │
+│                                                         [default: 500]       │
+│ --memory-mmr-lambda                            FLOAT    MMR lambda (0-1):    │
+│                                                         higher favors        │
+│                                                         relevance, lower     │
+│                                                         favors diversity.    │
+│                                                         [default: 0.7]       │
+│ --memory-recency-we…                           FLOAT    Recency score weight │
+│                                                         (0.0-1.0). Controls  │
+│                                                         freshness vs.        │
+│                                                         relevance.           │
+│                                                         [default: 0.2]       │
+│ --memory-score-thre…                           FLOAT    Minimum relevance    │
+│                                                         score threshold for  │
+│                                                         memory retrieval     │
+│                                                         (0.0-1.0).           │
+│                                                         [default: 0.35]      │
+│ --memory-summarizat…    --no-memory-summar…             Enable automatic     │
+│                                                         fact extraction and  │
+│                                                         summaries.           │
+│                                                         [default:            │
+│                                                         memory-summarizatio… │
+│ --memory-git-versio…    --no-memory-git-ve…             Enable automatic git │
+│                                                         commit of memory     │
+│                                                         changes.             │
+│                                                         [default:            │
+│                                                         memory-git-versioni… │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
 │ --openai-base-url        TEXT  Custom base URL for OpenAI-compatible API     │
@@ -1868,23 +1915,24 @@ agent-cli memory add -c work "Project deadline is Friday"
 │                                fact.                                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --file             -f                         PATH  Read memories from file. │
-│                                                     Use '-' for stdin.       │
-│                                                     Supports JSON array,     │
-│                                                     JSON object with         │
-│                                                     'memories' key, or plain │
-│                                                     text (one per line).     │
-│ --conversation-id  -c                         TEXT  Conversation ID to add   │
-│                                                     memories to.             │
-│                                                     [default: default]       │
-│ --memory-path                                 PATH  Path to the memory       │
-│                                                     store.                   │
-│                                                     [default: ./memory_db]   │
-│ --git-versioning       --no-git-versioning          Commit changes to git.   │
-│                                                     [default:                │
-│                                                     git-versioning]          │
-│ --help             -h                               Show this message and    │
-│                                                     exit.                    │
+│ --file             -f      PATH  Read memories from file. Use '-' for stdin. │
+│                                  Supports JSON array, JSON object with       │
+│                                  'memories' key, or plain text (one per      │
+│                                  line).                                      │
+│ --conversation-id  -c      TEXT  Conversation ID to add memories to.         │
+│                                  [default: default]                          │
+│ --help             -h            Show this message and exit.                 │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path                                    PATH  Path for memory       │
+│                                                        database storage.     │
+│                                                        Default:              │
+│                                                        ~/.config/agent-cli/… │
+│ --memory-git-version…    --no-memory-git-ver…          Enable automatic git  │
+│                                                        commit of memory      │
+│                                                        changes.              │
+│                                                        [default:             │
+│                                                        memory-git-versionin… │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ General Options ────────────────────────────────────────────────────────────╮
 │ --quiet       -q            Suppress console output from rich.               │
diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index 1a7ce8957..ad94fab48 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -2,99 +2,12 @@
 
 from __future__ import annotations
 
-import json
-import os
 import subprocess
-from datetime import UTC, datetime
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
-
-
-# Memory system helpers
-
-
-def _get_memory_file_path() -> Path:
-    """Get the path to the memory file.
-
-    If the environment variable ``AGENT_CLI_HISTORY_DIR`` is set (by the
-    running agent), store the memory file in that directory.
-    Otherwise fall back to the user's config directory.
-    """
-    history_dir = os.getenv("AGENT_CLI_HISTORY_DIR")
-    if history_dir:
-        return Path(history_dir).expanduser() / "long_term_memory.json"
-
-    return Path.home() / ".config" / "agent-cli" / "memory" / "long_term_memory.json"
-
-
-def _load_memories() -> list[dict[str, Any]]:
-    """Load memories from file, returning empty list if file doesn't exist."""
-    memory_file = _get_memory_file_path()
-    if not memory_file.exists():
-        return []
-
-    with memory_file.open("r") as f:
-        return json.load(f)
-
-
-def _save_memories(memories: list[dict[str, Any]]) -> None:
-    """Save memories to file, creating directories if needed."""
-    memory_file = _get_memory_file_path()
-    memory_file.parent.mkdir(parents=True, exist_ok=True)
-
-    with memory_file.open("w") as f:
-        json.dump(memories, f, indent=2)
-
-
-def _find_memory_by_id(memories: list[dict[str, Any]], memory_id: int) -> dict[str, Any] | None:
-    """Find a memory by ID in the memories list."""
-    for memory in memories:
-        if memory["id"] == memory_id:
-            return memory
-    return None
-
-
-def _format_memory_summary(memory: dict[str, Any]) -> str:
-    """Format a memory for display in search results."""
-    return (
-        f"ID: {memory['id']} | Category: {memory['category']} | "
-        f"Content: {memory['content']} | Tags: {', '.join(memory['tags'])}"
-    )
-
-
-def _format_memory_detailed(memory: dict[str, Any]) -> str:
-    """Format a memory with full details for listing."""
-    created = datetime.fromisoformat(memory["timestamp"]).strftime("%Y-%m-%d %H:%M")
-    updated_info = ""
-    if "updated_at" in memory:
-        updated = datetime.fromisoformat(memory["updated_at"]).strftime("%Y-%m-%d %H:%M")
-        updated_info = f" (updated: {updated})"
-
-    return (
-        f"ID: {memory['id']} | Category: {memory['category']}\n"
-        f"Content: {memory['content']}\n"
-        f"Tags: {', '.join(memory['tags']) if memory['tags'] else 'None'}\n"
-        f"Created: {created}{updated_info}\n"
-    )
-
-
-def _parse_tags(tags_string: str) -> list[str]:
-    """Parse comma-separated tags string into a list of clean tags."""
-    return [tag.strip() for tag in tags_string.split(",") if tag.strip()]
-
-
-R = TypeVar("R")
-
-
-def _memory_operation(operation_name: str, operation_func: Callable[[], str]) -> str:
-    """Wrapper for memory operations with consistent error handling."""
-    try:
-        return operation_func()
-    except Exception as e:
-        return f"Error {operation_name}: {e}"
+    from agent_cli.memory.client import MemoryClient
 
 
 def read_file(path: str) -> str:
@@ -133,236 +46,201 @@ def execute_code(code: str) -> str:
         return f"Error: Command not found: {code.split()[0]}"
 
 
-def add_memory(content: str, category: str = "general", tags: str = "") -> str:
-    """Add important information to long-term memory for future conversations.
-
-    Use this when the user shares:
-    - Personal information (name, job, location, family, etc.)
-    - Preferences (favorite foods, work style, communication preferences, etc.)
-    - Important facts they want remembered (birthdays, project details, goals, etc.)
-    - Tasks or commitments they mention
-
-    Always ask for permission before storing personal or sensitive information.
-
-    Args:
-        content: The specific information to remember (be descriptive and clear)
-        category: Type of memory - use "personal", "preferences", "facts", "tasks", "projects", or "general"
-        tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
-
-    Returns:
-        Confirmation message with the memory ID
-
-    """
-
-    def _add_memory_operation() -> str:
-        memories = _load_memories()
-
-        memory = {
-            "id": len(memories) + 1,
-            "content": content,
-            "category": category,
-            "tags": _parse_tags(tags),
-            "timestamp": datetime.now(UTC).isoformat(),
-        }
-
-        memories.append(memory)
-        _save_memories(memories)
-
-        return f"Memory added successfully with ID {memory['id']}"
-
-    return _memory_operation("adding memory", _add_memory_operation)
-
-
-def search_memory(query: str, category: str = "") -> str:
-    """Search long-term memory for relevant information before answering questions.
-
-    Use this tool:
-    - Before answering questions about the user's preferences, personal info, or past conversations
-    - When the user asks "what do you remember about..." or similar questions
-    - When you need context about the user's work, projects, or goals
-    - To check if you've discussed a topic before
-
-    The search looks through memory content and tags for matches.
-
-    Args:
-        query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
-        category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
-
-    Returns:
-        Relevant memories found, or message if none found
-
-    """
-
-    def _search_memory_operation() -> str:
-        memories = _load_memories()
-
-        if not memories:
-            return "No memories found. Memory system not initialized."
-
-        # Simple text-based search
-        query_lower = query.lower()
-        relevant_memories = []
-
-        for memory in memories:
-            # Check if query matches content, tags, or category
-            content_match = query_lower in memory["content"].lower()
-            tag_match = any(query_lower in tag.lower() for tag in memory["tags"])
-            category_match = not category or memory["category"].lower() == category.lower()
-
-            if (content_match or tag_match) and category_match:
-                relevant_memories.append(memory)
-
-        if not relevant_memories:
-            return f"No memories found matching '{query}'"
+def _format_memory_content(content: str, category: str, tags: str) -> str:
+    """Format memory content with category and tags."""
+    formatted = f"[{category}] {content}"
+    if tags:
+        formatted += f" (tags: {tags})"
+    return formatted
+
+
+class MemoryTools:
+    """Memory tools bound to a specific client and conversation."""
+
+    def __init__(
+        self,
+        memory_client: MemoryClient | None,
+        conversation_id: str = "default",
+    ) -> None:
+        self._client = memory_client
+        self._conversation_id = conversation_id
+
+    def _check(self) -> str | None:
+        if self._client is None:
+            return "Error: Memory system not initialized. Install with: pip install 'agent-cli[memory]'"
+        return None
+
+    async def add_memory(
+        self,
+        content: str,
+        category: str = "general",
+        tags: str = "",
+    ) -> str:
+        """Add important information to long-term memory for future conversations.
+
+        Use this when the user shares:
+        - Personal information (name, job, location, family, etc.)
+        - Preferences (favorite foods, work style, communication preferences, etc.)
+        - Important facts they want remembered (birthdays, project details, goals, etc.)
+        - Tasks or commitments they mention
+
+        Always ask for permission before storing personal or sensitive information.
+
+        Args:
+            content: The specific information to remember (be descriptive and clear)
+            category: Type of memory - use "personal", "preferences", "facts", "tasks", "projects", or "general"
+            tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
+
+        Returns:
+            Confirmation message
+
+        """
+        if error := self._check():
+            return error
+
+        try:
+            formatted = _format_memory_content(content, category, tags)
+            await self._client.add(formatted, conversation_id=self._conversation_id)  # type: ignore[union-attr]
+            return "Memory added successfully."
+        except Exception as e:
+            return f"Error adding memory: {e}"
+
+    async def search_memory(self, query: str, category: str = "") -> str:
+        """Search long-term memory for relevant information before answering questions.
+
+        Use this tool:
+        - Before answering questions about the user's preferences, personal info, or past conversations
+        - When the user asks "what do you remember about..." or similar questions
+        - When you need context about the user's work, projects, or goals
+        - To check if you've discussed a topic before
+
+        This performs semantic search to find conceptually related information.
+
+        Args:
+            query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
+            category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
+
+        Returns:
+            Relevant memories found, or message if none found
+
+        """
+        if error := self._check():
+            return error
+
+        search_query = f"{category} {query}" if category else query
+
+        try:
+            result = await self._client.search(search_query, conversation_id=self._conversation_id)  # type: ignore[union-attr]
+            if not result.entries:
+                return f"No memories found matching '{query}'"
+
+            lines = []
+            for entry in result.entries:
+                score_info = f" (relevance: {entry.score:.2f})" if entry.score else ""
+                lines.append(f"- {entry.content}{score_info}")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"Error searching memory: {e}"
+
+    def list_all_memories(self, limit: int = 10) -> str:
+        """List all memories with their details.
+
+        Use this tool:
+        - When the user asks "show me all my memories" or "list everything you remember"
+        - When they want to see what information is stored
+        - To provide a complete overview of stored information
+
+        Shows memories in reverse chronological order (newest first).
+
+        Args:
+            limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
+
+        Returns:
+            Formatted list of all memories
+
+        """
+        if error := self._check():
+            return error
+
+        try:
+            entries = self._client.list_all(  # type: ignore[union-attr]
+                conversation_id=self._conversation_id,
+                include_summary=False,
+            )
 
-        # Format results
-        results = [_format_memory_summary(memory) for memory in relevant_memories[-5:]]
+            if not entries:
+                return "No memories stored yet."
 
-        return "\n".join(results)
+            entries_to_show = entries[:limit]
 
-    return _memory_operation("searching memory", _search_memory_operation)
+            results = [f"Showing {len(entries_to_show)} of {len(entries)} total memories:\n"]
+            for entry in entries_to_show:
+                created_at = entry.get("created_at", "unknown")
+                role = entry.get("role", "memory")
+                content = entry.get("content", "")
+                results.append(f"- [{role}] {content} (created: {created_at})")
 
+            if len(entries) > limit:
+                results.append(
+                    f"\n... and {len(entries) - limit} more memories. Use a higher limit to see more.",
+                )
 
-def update_memory(memory_id: int, content: str = "", category: str = "", tags: str = "") -> str:
-    """Update an existing memory by ID.
+            return "\n".join(results)
+        except Exception as e:
+            return f"Error listing memories: {e}"
 
-    Use this tool:
-    - When the user wants to correct or modify previously stored information
-    - When information has changed (e.g., job change, preference updates)
-    - When the user says "update my memory about..." or "change the memory where..."
 
-    Only provide the fields that should be updated - empty fields will keep existing values.
+def create_memory_tools(
+    memory_client: MemoryClient | None,
+    conversation_id: str = "default",
+    *,
+    read_only: bool = False,
+) -> list:
+    """Create memory tools bound to a specific client and conversation.
 
     Args:
-        memory_id: The ID of the memory to update (use search_memory or list_all_memories to find IDs)
-        content: New content for the memory (leave empty to keep existing)
-        category: New category (leave empty to keep existing)
-        tags: New comma-separated tags (leave empty to keep existing)
+        memory_client: The MemoryClient instance, or None if not available.
+        conversation_id: The conversation ID for scoping memories.
+        read_only: If True, only include search/list tools (not add_memory).
+            Use this for "auto" mode where extraction happens automatically.
 
     Returns:
-        Confirmation message or error if memory ID not found
+        List of pydantic_ai Tool objects for memory operations.
 
     """
+    from pydantic_ai.tools import Tool  # noqa: PLC0415
 
-    def _update_memory_operation() -> str:
-        memories = _load_memories()
-
-        if not memories:
-            return "No memories found. Memory system not initialized."
-
-        # Find memory to update
-        memory_to_update = _find_memory_by_id(memories, memory_id)
-        if not memory_to_update:
-            return f"Memory with ID {memory_id} not found."
-
-        # Update fields if provided
-        if content:
-            memory_to_update["content"] = content
-        if category:
-            memory_to_update["category"] = category
-        if tags:
-            memory_to_update["tags"] = _parse_tags(tags)
-
-        # Add update timestamp
-        memory_to_update["updated_at"] = datetime.now(UTC).isoformat()
-
-        _save_memories(memories)
-        return f"Memory ID {memory_id} updated successfully."
-
-    return _memory_operation("updating memory", _update_memory_operation)
-
-
-def list_all_memories(limit: int = 10) -> str:
-    """List all memories with their details.
+    mt = MemoryTools(memory_client, conversation_id)
+    tools_list = [
+        Tool(mt.search_memory),
+        Tool(mt.list_all_memories),
+    ]
+    if not read_only:
+        tools_list.insert(0, Tool(mt.add_memory))
+    return tools_list
 
-    Use this tool:
-    - When the user asks "show me all my memories" or "list everything you remember"
-    - When they want to see specific memory IDs for updating or reference
-    - To provide a complete overview of stored information
 
-    Shows memories in reverse chronological order (newest first).
+def tools(
+    memory_client: MemoryClient | None = None,
+    conversation_id: str = "default",
+    *,
+    memory_read_only: bool = False,
+) -> list:
+    """Return a list of all tools for the chat agent.
 
     Args:
-        limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
-
-    Returns:
-        Formatted list of all memories with IDs, content, categories, and tags
+        memory_client: The MemoryClient instance, or None if not available.
+        conversation_id: The conversation ID for scoping memories.
+        memory_read_only: If True, only include search/list memory tools (not add).
+            Use this for "auto" mode where extraction happens automatically.
 
     """
-
-    def _list_all_memories_operation() -> str:
-        memories = _load_memories()
-
-        if not memories:
-            return "No memories stored yet."
-
-        # Sort by ID (newest first) and limit results
-        memories_to_show = sorted(memories, key=lambda x: x["id"], reverse=True)[:limit]
-
-        results = [f"Showing {len(memories_to_show)} of {len(memories)} total memories:\n"]
-        results.extend(_format_memory_detailed(memory) for memory in memories_to_show)
-
-        if len(memories) > limit:
-            results.append(
-                f"... and {len(memories) - limit} more memories. Use a higher limit to see more.",
-            )
-
-        return "\n".join(results)
-
-    return _memory_operation("listing memories", _list_all_memories_operation)
-
-
-def list_memory_categories() -> str:
-    """List all memory categories and their counts to see what has been remembered.
-
-    Use this tool:
-    - When the user asks "what categories do you have?"
-    - To get a quick overview of memory organization
-    - When the user wants to know what types of information are stored
-
-    This provides a summary view before using list_all_memories for details.
-
-    Returns:
-        Summary of memory categories with counts (e.g., "personal: 5 memories")
-
-    """
-
-    def _list_categories_operation() -> str:
-        memories = _load_memories()
-
-        if not memories:
-            return "No memories found. Memory system not initialized."
-
-        # Count categories
-        categories: dict[str, int] = {}
-        for memory in memories:
-            category = memory["category"]
-            categories[category] = categories.get(category, 0) + 1
-
-        if not categories:
-            return "No memory categories found."
-
-        results = ["Memory Categories:"]
-        for category, count in sorted(categories.items()):
-            results.append(f"- {category}: {count} memories")
-
-        return "\n".join(results)
-
-    return _memory_operation("listing categories", _list_categories_operation)
-
-
-def tools() -> list:
-    """Return a list of tools."""
     from pydantic_ai.common_tools.duckduckgo import duckduckgo_search_tool  # noqa: PLC0415
     from pydantic_ai.tools import Tool  # noqa: PLC0415
 
     return [
         Tool(read_file),
         Tool(execute_code),
-        Tool(add_memory),
-        Tool(search_memory),
-        Tool(update_memory),
-        Tool(list_all_memories),
-        Tool(list_memory_categories),
+        *create_memory_tools(memory_client, conversation_id, read_only=memory_read_only),
         duckduckgo_search_tool(),
     ]
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 507e22c4b..9a7b55e1d 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -13,6 +13,7 @@
 from __future__ import annotations
 
 import asyncio
+import hashlib
 import json
 import logging
 import os
@@ -50,9 +51,100 @@
 if TYPE_CHECKING:
     from rich.live import Live
 
+    from agent_cli.memory.client import MemoryClient
+
 
 LOGGER = logging.getLogger(__name__)
 
+
+def _get_conversation_id(history_cfg: config.History) -> str:
+    """Generate a stable conversation ID from history configuration.
+
+    Uses a hash of the history directory path to ensure consistency across sessions.
+    """
+    if history_cfg.history_dir:
+        return hashlib.md5(
+            str(Path(history_cfg.history_dir).resolve()).encode(),
+            usedforsecurity=False,
+        ).hexdigest()[:12]
+    return "default"
+
+
+def _try_init_memory(
+    memory_cfg: config.Memory,
+    history_cfg: config.History,
+    openai_llm_cfg: config.OpenAILLM,
+    quiet: bool,
+) -> MemoryClient | None:
+    """Try to initialize the memory system.
+
+    Returns the MemoryClient if successful, None otherwise.
+    """
+    from agent_cli.memory.client import MemoryClient  # noqa: PLC0415
+
+    # Determine memory path
+    memory_path = memory_cfg.memory_path
+    if memory_path is None:
+        if history_cfg.history_dir:
+            memory_path = Path(history_cfg.history_dir).expanduser() / "vector_memory"
+        else:
+            memory_path = Path.home() / ".config" / "agent-cli" / "memory" / "vector_db"
+
+    # Determine OpenAI base URL for embeddings
+    openai_base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"
+
+    if not quiet:
+        console.print("[dim]Initializing memory system...[/dim]")
+
+    memory_client = MemoryClient(
+        memory_path=memory_path,
+        openai_base_url=openai_base_url,
+        embedding_model=memory_cfg.embedding_model,
+        embedding_api_key=openai_llm_cfg.openai_api_key,
+        chat_api_key=openai_llm_cfg.openai_api_key,
+        default_top_k=memory_cfg.top_k,
+        score_threshold=memory_cfg.score_threshold,
+        recency_weight=memory_cfg.recency_weight,
+        mmr_lambda=memory_cfg.mmr_lambda,
+        enable_summarization=memory_cfg.enable_summarization,
+        enable_git_versioning=memory_cfg.enable_git_versioning,
+        max_entries=memory_cfg.max_entries,
+        start_watcher=False,
+    )
+
+    # Start the memory client's file watcher
+    memory_client.start()
+
+    if not quiet:
+        console.print("[green]Memory system initialized[/green]")
+
+    return memory_client
+
+
+def _maybe_init_memory(
+    memory_cfg: config.Memory,
+    history_cfg: config.History,
+    openai_llm_cfg: config.OpenAILLM,
+    quiet: bool,
+) -> MemoryClient | None:
+    """Initialize memory if mode is not 'off', handling errors gracefully."""
+    if memory_cfg.mode == "off":
+        return None
+    try:
+        return _try_init_memory(memory_cfg, history_cfg, openai_llm_cfg, quiet)
+    except ImportError:
+        if not quiet:
+            console.print(
+                "[yellow]Memory system not available. "
+                "Install with: pip install 'agent-cli[memory]'[/yellow]",
+            )
+    except Exception as e:
+        if not quiet:
+            console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
+        LOGGER.warning("Failed to initialize memory: %s", e)
+    return None
+
+
 # --- Conversation History ---
 
 
@@ -74,9 +166,7 @@ class ConversationEntry(TypedDict):
 - execute_code: Execute a shell command.
 - add_memory: Add important information to long-term memory for future recall.
 - search_memory: Search your long-term memory for relevant information.
-- update_memory: Modify existing memories by ID when information changes.
-- list_all_memories: Show all stored memories with their IDs and details.
-- list_memory_categories: See what types of information you've remembered.
+- list_all_memories: Show all stored memories with their details.
 - duckduckgo_search: Search the web for current information.
 
 Memory Guidelines:
@@ -144,10 +234,67 @@ def _format_conversation_for_llm(history: list[ConversationEntry]) -> str:
     return "\n".join(formatted_lines)
 
 
+async def _maybe_extract_memories(
+    memory_cfg: config.Memory,
+    memory_client: MemoryClient | None,
+    instruction: str,
+    response_text: str,
+    conversation_id: str,
+    model: str,
+    quiet: bool,
+) -> None:
+    """Extract memories in auto mode, silently skip otherwise."""
+    if memory_cfg.mode != "auto" or memory_client is None:
+        return
+    try:
+        await memory_client.extract_from_turn(
+            user_message=instruction,
+            assistant_message=response_text,
+            conversation_id=conversation_id,
+            model=model,
+        )
+        if not quiet:
+            console.print("[dim]💾 Memory extraction complete[/dim]")
+    except Exception as e:
+        LOGGER.warning("Failed to extract memories: %s", e)
+
+
+async def _maybe_retrieve_memories(
+    memory_cfg: config.Memory,
+    memory_client: MemoryClient | None,
+    instruction: str,
+    conversation_id: str,
+) -> str:
+    """Retrieve relevant memories in auto mode for prompt injection.
+
+    Returns formatted memory context string, or empty string if not applicable.
+    """
+    if memory_cfg.mode != "auto" or memory_client is None:
+        return ""
+    try:
+        retrieval = await memory_client.search(
+            query=instruction,
+            conversation_id=conversation_id,
+            top_k=memory_cfg.top_k,
+        )
+        if not retrieval.entries:
+            return ""
+        lines = ["\n<relevant-memories>"]
+        lines.extend(f"- {entry.content}" for entry in retrieval.entries)
+        lines.append("</relevant-memories>")
+        return "\n".join(lines)
+    except Exception as e:
+        LOGGER.warning("Failed to retrieve memories: %s", e)
+        return ""
+
+
 async def _handle_conversation_turn(
     *,
     stop_event: InteractiveStopEvent,
     conversation_history: list[ConversationEntry],
+    memory_client: MemoryClient | None,
+    conversation_id: str,
+    memory_cfg: config.Memory,
     provider_cfg: config.ProviderSelection,
     general_cfg: config.General,
     history_cfg: config.History,
@@ -213,6 +360,15 @@ async def _handle_conversation_turn(
         instruction=instruction,
     )
 
+    # 3b. Auto-retrieve and inject memories in "auto" mode
+    memory_context = await _maybe_retrieve_memories(
+        memory_cfg,
+        memory_client,
+        instruction,
+        conversation_id,
+    )
+    system_prompt = SYSTEM_PROMPT + memory_context
+
     # 4. Get LLM response with timing
 
     start_time = time.monotonic()
@@ -230,8 +386,14 @@ async def _handle_conversation_turn(
         quiet=general_cfg.quiet,
         stop_event=stop_event,
     ):
+        # Memory tools access:
+        # - "off": no memory tools
+        # - "tools": full access (add, search, list)
+        # - "auto": read-only access (search, list) - extraction happens automatically
+        tool_memory_client = memory_client if memory_cfg.mode != "off" else None
+        memory_read_only = memory_cfg.mode == "auto"
         response_text = await get_llm_response(
-            system_prompt=SYSTEM_PROMPT,
+            system_prompt=system_prompt,
             agent_instructions=AGENT_INSTRUCTIONS,
             user_input=user_message_with_context,
             provider_cfg=provider_cfg,
@@ -239,7 +401,7 @@ async def _handle_conversation_turn(
             openai_cfg=openai_llm_cfg,
             gemini_cfg=gemini_llm_cfg,
             logger=LOGGER,
-            tools=tools(),
+            tools=tools(tool_memory_client, conversation_id, memory_read_only=memory_read_only),
             quiet=True,  # Suppress internal output since we're showing our own timer
             live=live,
         )
@@ -267,6 +429,20 @@ async def _handle_conversation_turn(
         },
     )
 
+    # 5b. Auto-extract memories in "auto" mode (run in background, don't block)
+    if memory_cfg.mode == "auto" and memory_client is not None:
+        asyncio.create_task(  # noqa: RUF006
+            _maybe_extract_memories(
+                memory_cfg,
+                memory_client,
+                instruction,
+                response_text,
+                conversation_id,
+                openai_llm_cfg.llm_openai_model,
+                general_cfg.quiet,
+            ),
+        )
+
     # 6. Save history
     if history_cfg.history_dir:
         history_path = Path(history_cfg.history_dir).expanduser()
@@ -318,8 +494,11 @@ async def _async_main(
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
     gemini_tts_cfg: config.GeminiTTS,
+    memory_cfg: config.Memory,
 ) -> None:
     """Main async function, consumes parsed arguments."""
+    memory_client = None
+
     try:
         device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
         if device_info is None:
@@ -329,6 +508,14 @@ async def _async_main(
         if audio_out_cfg.enable_tts:
             audio_out_cfg.output_device_index = tts_output_device_index
 
+        # Initialize memory system (if not disabled)
+        memory_client = _maybe_init_memory(
+            memory_cfg,
+            history_cfg,
+            openai_llm_cfg,
+            general_cfg.quiet,
+        )
+
         # Load conversation history
         conversation_history = []
         if history_cfg.history_dir:
@@ -342,6 +529,9 @@ async def _async_main(
                 history_cfg.last_n_messages,
             )
 
+        # Generate conversation ID for memory scoping
+        conversation_id = _get_conversation_id(history_cfg)
+
         with (
             maybe_live(not general_cfg.quiet) as live,
             signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
@@ -350,6 +540,9 @@ async def _async_main(
                 await _handle_conversation_turn(
                     stop_event=stop_event,
                     conversation_history=conversation_history,
+                    memory_client=memory_client,
+                    conversation_id=conversation_id,
+                    memory_cfg=memory_cfg,
                     provider_cfg=provider_cfg,
                     general_cfg=general_cfg,
                     history_cfg=history_cfg,
@@ -371,6 +564,10 @@ async def _async_main(
         if not general_cfg.quiet:
             console.print_exception()
         raise
+    finally:
+        # Clean up memory client
+        if memory_client is not None:
+            await memory_client.stop()
 
 
 @app.command("chat")
@@ -433,6 +630,17 @@ def chat(
         " Set to 0 to disable history.",
         rich_help_panel="History Options",
     ),
+    # --- Memory Options ---
+    memory_mode: str = opts.MEMORY_MODE,
+    memory_path: Path | None = opts.MEMORY_PATH,
+    embedding_model: str = opts.EMBEDDING_MODEL,
+    memory_top_k: int = opts.MEMORY_TOP_K,
+    memory_score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+    memory_max_entries: int = opts.MEMORY_MAX_ENTRIES,
+    memory_mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+    memory_recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+    memory_summarization: bool = opts.MEMORY_SUMMARIZATION,
+    memory_git_versioning: bool = opts.MEMORY_GIT_VERSIONING,
     # --- General Options ---
     save_file: Path | None = opts.SAVE_FILE,
     log_level: str = opts.LOG_LEVEL,
@@ -535,6 +743,18 @@ def chat(
             history_dir=history_dir,
             last_n_messages=last_n_messages,
         )
+        memory_cfg = config.Memory(
+            mode=memory_mode,  # type: ignore[arg-type]
+            memory_path=memory_path,
+            embedding_model=embedding_model,
+            top_k=memory_top_k,
+            score_threshold=memory_score_threshold,
+            max_entries=memory_max_entries,
+            mmr_lambda=memory_mmr_lambda,
+            recency_weight=memory_recency_weight,
+            enable_summarization=memory_summarization,
+            enable_git_versioning=memory_git_versioning,
+        )
 
         asyncio.run(
             _async_main(
@@ -553,5 +773,6 @@ def chat(
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
+                memory_cfg=memory_cfg,
             ),
         )
diff --git a/agent_cli/agents/memory/add.py b/agent_cli/agents/memory/add.py
index 0675e9920..aea4b0301 100644
--- a/agent_cli/agents/memory/add.py
+++ b/agent_cli/agents/memory/add.py
@@ -6,7 +6,7 @@
 import re
 import sys
 from datetime import UTC, datetime
-from pathlib import Path  # noqa: TC003
+from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import typer
@@ -127,16 +127,8 @@ def add(
         "-c",
         help="Conversation ID to add memories to.",
     ),
-    memory_path: Path = typer.Option(  # noqa: B008
-        "./memory_db",
-        "--memory-path",
-        help="Path to the memory store.",
-    ),
-    git_versioning: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--git-versioning/--no-git-versioning",
-        help="Commit changes to git.",
-    ),
+    memory_path: Path | None = opts.MEMORY_PATH,
+    git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
     quiet: bool = opts.QUIET,
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
@@ -176,6 +168,8 @@ def add(
         console.print("[red]No memories provided. Use arguments or --file.[/red]")
         raise typer.Exit(1)
 
+    if memory_path is None:
+        memory_path = Path("./memory_db")
     memory_path = memory_path.resolve()
     records = _write_memories(memory_path, parsed, git_versioning)
 
diff --git a/agent_cli/agents/memory/proxy.py b/agent_cli/agents/memory/proxy.py
index 73906c62d..18796eedc 100644
--- a/agent_cli/agents/memory/proxy.py
+++ b/agent_cli/agents/memory/proxy.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import logging
-from pathlib import Path  # noqa: TC003
+from pathlib import Path
 
 import typer
 from rich.logging import RichHandler
@@ -15,57 +15,23 @@
 
 @memory_app.command("proxy")
 def proxy(
-    memory_path: Path = typer.Option(  # noqa: B008
-        "./memory_db",
-        help="Path to the memory store (files + derived vector index).",
-        rich_help_panel="Memory Configuration",
-    ),
+    memory_path: Path | None = opts.MEMORY_PATH,
     openai_base_url: str | None = opts.OPENAI_BASE_URL,
     embedding_model: str = opts.EMBEDDING_MODEL,
     openai_api_key: str | None = opts.OPENAI_API_KEY,
-    default_top_k: int = typer.Option(
-        5,
-        help="Number of memory entries to retrieve per query.",
-        rich_help_panel="Memory Configuration",
-    ),
+    default_top_k: int = opts.MEMORY_TOP_K,
     host: str = opts.SERVER_HOST,
     port: int = typer.Option(
         8100,
         help="Port to bind to",
         rich_help_panel="Server Configuration",
     ),
-    max_entries: int = typer.Option(
-        500,
-        help="Maximum stored memory entries per conversation (excluding summary).",
-        rich_help_panel="Memory Configuration",
-    ),
-    mmr_lambda: float = typer.Option(
-        0.7,
-        help="MMR lambda (0-1): higher favors relevance, lower favors diversity.",
-        rich_help_panel="Memory Configuration",
-    ),
-    recency_weight: float = typer.Option(
-        0.2,
-        help="Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance).",
-        rich_help_panel="Memory Configuration",
-    ),
-    score_threshold: float = typer.Option(
-        0.35,
-        help="Minimum semantic relevance threshold (0.0-1.0). Memories below this score are discarded to reduce noise.",
-        rich_help_panel="Memory Configuration",
-    ),
-    summarization: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--summarization/--no-summarization",
-        help="Enable automatic fact extraction and summaries.",
-        rich_help_panel="Memory Configuration",
-    ),
-    git_versioning: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--git-versioning/--no-git-versioning",
-        help="Enable automatic git commit of memory changes.",
-        rich_help_panel="Memory Configuration",
-    ),
+    max_entries: int = opts.MEMORY_MAX_ENTRIES,
+    mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+    recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+    score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+    summarization: bool = opts.MEMORY_SUMMARIZATION,
+    git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
     log_level: str = opts.with_default(opts.LOG_LEVEL, "INFO"),
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
@@ -127,6 +93,8 @@ def proxy(
     logging.getLogger("chromadb").setLevel(logging.WARNING)
     logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
 
+    if memory_path is None:
+        memory_path = Path("./memory_db")
     memory_path = memory_path.resolve()
     entries_dir, _ = ensure_store_dirs(memory_path)
     if openai_base_url is None:
diff --git a/agent_cli/config.py b/agent_cli/config.py
index 65c078dfa..db17115d8 100644
--- a/agent_cli/config.py
+++ b/agent_cli/config.py
@@ -224,6 +224,43 @@ def _expand_user_path(cls, v: str | None) -> Path | None:
         return None
 
 
+# --- Panel: Memory Options ---
+
+
+MemoryMode = Literal["off", "tools", "auto"]
+
+
+class Memory(BaseModel):
+    """Configuration for the vector-backed memory system.
+
+    The memory system uses ChromaDB with vector embeddings for semantic search,
+    recency-aware scoring, and automatic fact reconciliation.
+
+    Modes:
+        - off: Memory disabled
+        - tools: LLM decides via add_memory/search_memory tools (default)
+        - auto: Automatic extraction and injection each turn
+    """
+
+    mode: MemoryMode = "tools"
+    memory_path: Path | None = None
+    embedding_model: str = "text-embedding-3-small"
+    top_k: int = 5
+    score_threshold: float = 0.35
+    recency_weight: float = 0.2
+    mmr_lambda: float = 0.7
+    enable_summarization: bool = True
+    enable_git_versioning: bool = False
+    max_entries: int = 500
+
+    @field_validator("memory_path", mode="before")
+    @classmethod
+    def _expand_user_path(cls, v: str | None) -> Path | None:
+        if v:
+            return Path(v).expanduser()
+        return None
+
+
 def _config_path(config_path_str: str | None = None) -> Path | None:
     """Return a usable config path, expanding user directories."""
     if config_path_str:
diff --git a/agent_cli/memory/client.py b/agent_cli/memory/client.py
index 3ca4762f6..6e8d62671 100644
--- a/agent_cli/memory/client.py
+++ b/agent_cli/memory/client.py
@@ -137,6 +137,32 @@ async def add(
         )
         evict_if_needed(self.collection, self.memory_path, conversation_id, self.max_entries)
 
+    async def extract_from_turn(
+        self,
+        user_message: str,
+        assistant_message: str,
+        conversation_id: str = "default",
+        model: str = DEFAULT_OPENAI_MODEL,
+    ) -> None:
+        """Extract and store facts from a conversation turn.
+
+        This is used for automatic memory extraction mode, where facts are
+        extracted from both user and assistant messages after each turn.
+        """
+        await extract_and_store_facts_and_summaries(
+            collection=self.collection,
+            memory_root=self.memory_path,
+            conversation_id=conversation_id,
+            user_message=user_message,
+            assistant_message=assistant_message,
+            openai_base_url=self.openai_base_url,
+            api_key=self.chat_api_key,
+            model=model,
+            enable_git_versioning=self.enable_git_versioning,
+            enable_summarization=self.enable_summarization,
+        )
+        evict_if_needed(self.collection, self.memory_path, conversation_id, self.max_entries)
+
     async def search(
         self,
         query: str,
diff --git a/agent_cli/opts.py b/agent_cli/opts.py
index 1002066de..ac149b80f 100644
--- a/agent_cli/opts.py
+++ b/agent_cli/opts.py
@@ -2,6 +2,7 @@
 
 import copy
 from pathlib import Path
+from typing import Any
 
 import typer
 from typer.models import OptionInfo
@@ -9,7 +10,7 @@
 from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL, DEFAULT_OPENAI_MODEL
 
 
-def with_default(option: OptionInfo, default: str) -> OptionInfo:
+def with_default(option: OptionInfo, default: Any) -> OptionInfo:
     """Create a copy of a typer Option with a different default value."""
     opt = copy.copy(option)
     opt.default = default
@@ -381,6 +382,63 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
     rich_help_panel="General Options",
 )
 
+# --- Memory Options ---
+MEMORY_MODE: str = typer.Option(
+    "tools",
+    "--memory-mode",
+    help="Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction).",
+    rich_help_panel="Memory Options",
+)
+MEMORY_PATH: Path | None = typer.Option(
+    None,
+    "--memory-path",
+    help="Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db",
+    rich_help_panel="Memory Options",
+)
+MEMORY_TOP_K: int = typer.Option(
+    5,
+    "--memory-top-k",
+    help="Number of memories to retrieve per search.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_SCORE_THRESHOLD: float = typer.Option(
+    0.35,
+    "--memory-score-threshold",
+    help="Minimum relevance score threshold for memory retrieval (0.0-1.0).",
+    rich_help_panel="Memory Options",
+)
+MEMORY_MAX_ENTRIES: int = typer.Option(
+    500,
+    "--memory-max-entries",
+    help="Maximum stored memory entries per conversation (excluding summary).",
+    rich_help_panel="Memory Options",
+)
+MEMORY_MMR_LAMBDA: float = typer.Option(
+    0.7,
+    "--memory-mmr-lambda",
+    help="MMR lambda (0-1): higher favors relevance, lower favors diversity.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_RECENCY_WEIGHT: float = typer.Option(
+    0.2,
+    "--memory-recency-weight",
+    help="Recency score weight (0.0-1.0). Controls freshness vs. relevance.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_SUMMARIZATION: bool = typer.Option(
+    True,  # noqa: FBT003
+    "--memory-summarization/--no-memory-summarization",
+    help="Enable automatic fact extraction and summaries.",
+    rich_help_panel="Memory Options",
+)
+MEMORY_GIT_VERSIONING: bool = typer.Option(
+    False,  # noqa: FBT003
+    "--memory-git-versioning/--no-memory-git-versioning",
+    help="Enable automatic git commit of memory changes.",
+    rich_help_panel="Memory Options",
+)
+
+
 # --- Server Options ---
 SERVER_HOST: str = typer.Option(
     "0.0.0.0",  # noqa: S104
diff --git a/docs/architecture/memory.md b/docs/architecture/memory.md
index f2cb3600f..6a70e50ff 100644
--- a/docs/architecture/memory.md
+++ b/docs/architecture/memory.md
@@ -39,7 +39,8 @@ A local-first system that gives LLMs persistent memory across conversations, wit
 
 ### Related
 
-- [memory command](../commands/memory.md) - How to run the memory proxy and add memories
+- [chat command](../commands/chat.md) - Voice-based chat agent with integrated memory
+- [memory command](../commands/memory.md) - Memory proxy server for any OpenAI-compatible app
 - [Configuration](../configuration.md) - Config file keys and defaults
 - [RAG System Architecture](rag.md) - Related retrieval stack for documents
 - [rag-proxy command](../commands/rag-proxy.md) - Document retrieval server
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index fc3a9fbb8..f83b4a7ff 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -165,6 +165,26 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 | `--history-dir` | `~/.config/agent-cli/history` | Directory to store conversation history. |
 | `--last-n-messages` | `50` | Number of messages to include in the conversation history. Set to 0 to disable history. |
 
+### Memory Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--memory-mode` | `tools` | Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction). |
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-top-k` | `5` | Number of memories to retrieve per search. |
+| `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+| `--memory-max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
+| `--memory-mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
+| `--memory-recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. |
+| `--memory-summarization/--no-memory-summarization` | `true` | Enable automatic fact extraction and summaries. |
+| `--memory-git-versioning/--no-memory-git-versioning` | `false` | Enable automatic git commit of memory changes. |
+
+### LLM Configuration
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--embedding-model` | `text-embedding-3-small` | Embedding model to use for vectorization. |
+
 ### General Options
 
 | Option | Default | Description |
@@ -179,22 +199,53 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
 
 <!-- OUTPUT:END -->
 
-## Available Tools
+## Memory System
 
-The chat agent has access to tools that let it interact with your system:
+The chat agent includes a built-in long-term memory system that allows it to remember information across conversations.
+
+The memory system uses a **vector-backed architecture** with semantic search. This provides:
+
+- **Semantic search**: Find relevant memories based on meaning, not just keywords
+- **Recency-aware scoring**: Recent memories are weighted higher
+- **Diversity selection (MMR)**: Avoids redundant memories in context
+- **Automatic reconciliation**: Contradicting facts are updated, not duplicated
+
+### Memory Modes
+
+Use `--memory-mode` to control how memory works:
+
+| Mode | Description |
+|------|-------------|
+| `off` | Memory system disabled |
+| `tools` (default) | LLM decides when to store/retrieve via tools. LLM asks permission before storing. |
+| `auto` | Automatic extraction after each conversation turn (no LLM tools exposed). |
+
+Example:
+
+```bash
+# Automatic memory extraction (no prompting, just remembers)
+agent-cli chat --memory-mode auto
+
+# Disable memory entirely
+agent-cli chat --memory-mode off
+```
 
 > [!NOTE]
-> The memory tools below use a simple, built-in JSON storage system.
-> For the advanced, vector-backed memory system, see the [`memory`](memory.md) command.
+> The memory system requires the `[memory]` extra: `pip install "agent-cli[memory]"`.
+> If not installed, memory tools will not be available.
+
+For more details on how the memory system works, see [Memory System Architecture](../architecture/memory.md).
+
+## Available Tools
+
+The chat agent has access to tools that let it interact with your system:
 
 - **read_file**: Read file contents
 - **execute_code**: Run a single command (no shell features like pipes or redirects)
 - **duckduckgo_search**: Search the web via DuckDuckGo
-- **add_memory**: Store information for future conversations
-- **search_memory**: Search stored memories
-- **update_memory**: Update existing memories
+- **add_memory**: Store information for future conversations (uses [vector memory](../architecture/memory.md))
+- **search_memory**: Search stored memories with semantic search
 - **list_all_memories**: List all stored memories
-- **list_memory_categories**: Show memory category summary
 
 ## Example Conversation
 
diff --git a/docs/commands/memory.md b/docs/commands/memory.md
index 5ff97a459..6277f3c7a 100644
--- a/docs/commands/memory.md
+++ b/docs/commands/memory.md
@@ -69,18 +69,18 @@ agent-cli chat --openai-base-url http://localhost:8100/v1 --llm-provider openai
 <!-- CODE:END -->
 <!-- OUTPUT:START -->
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
-### Memory Configuration
+### Memory Options
 
 | Option | Default | Description |
 |--------|---------|-------------|
-| `--memory-path` | `./memory_db` | Path to the memory store (files + derived vector index). |
-| `--default-top-k` | `5` | Number of memory entries to retrieve per query. |
-| `--max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
-| `--mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
-| `--recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance). |
-| `--score-threshold` | `0.35` | Minimum semantic relevance threshold (0.0-1.0). Memories below this score are discarded to reduce noise. |
-| `--summarization/--no-summarization` | `true` | Enable automatic fact extraction and summaries. |
-| `--git-versioning/--no-git-versioning` | `true` | Enable automatic git commit of memory changes. |
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-top-k` | `5` | Number of memories to retrieve per search. |
+| `--memory-max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
+| `--memory-mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
+| `--memory-recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. |
+| `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+| `--memory-summarization/--no-memory-summarization` | `true` | Enable automatic fact extraction and summaries. |
+| `--memory-git-versioning/--no-memory-git-versioning` | `true` | Enable automatic git commit of memory changes. |
 
 ### LLM: OpenAI-compatible
 
@@ -162,8 +162,13 @@ agent-cli memory add -c work "Project deadline is Friday"
 |--------|---------|-------------|
 | `--file` | - | Read memories from file. Use '-' for stdin. Supports JSON array, JSON object with 'memories' key, or plain text (one per line). |
 | `--conversation-id` | `default` | Conversation ID to add memories to. |
-| `--memory-path` | `./memory_db` | Path to the memory store. |
-| `--git-versioning/--no-git-versioning` | `true` | Commit changes to git. |
+
+### Memory Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-git-versioning/--no-memory-git-versioning` | `true` | Enable automatic git commit of memory changes. |
 
 ### General Options
 
@@ -221,6 +226,8 @@ See [Memory System Architecture](../architecture/memory.md) for the full schema
 
 ## Related
 
+- [chat command](chat.md) - Voice-based chat agent with integrated memory
+- [Memory System Architecture](../architecture/memory.md) - Full technical specification
 - [Configuration](../configuration.md) - Config file keys for memory proxy defaults
 - [rag-proxy](rag-proxy.md) - Document RAG proxy server (contrast with memory)
 - [RAG System Architecture](../architecture/rag.md) - How RAG indexing and retrieval works
diff --git a/tests/agents/test_interactive.py b/tests/agents/test_interactive.py
index bc4cc7292..e51648ddc 100644
--- a/tests/agents/test_interactive.py
+++ b/tests/agents/test_interactive.py
@@ -140,6 +140,7 @@ async def test_async_main_list_devices(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
+            memory_cfg=config.Memory(),
         )
         mock_setup_devices.assert_called_once()
 
@@ -209,6 +210,7 @@ async def test_async_main_list_output_devices(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
+            memory_cfg=config.Memory(),
         )
         mock_setup_devices.assert_called_once()
 
@@ -265,6 +267,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
 
     with (
         patch("agent_cli.agents.chat.setup_devices", return_value=(1, "mock_input", 1)),
+        patch("agent_cli.agents.chat._try_init_memory", return_value=None),
         patch("agent_cli.agents.chat.asr.create_transcriber") as mock_create_transcriber,
         patch(
             "agent_cli.agents.chat.get_llm_response",
@@ -302,6 +305,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
             openai_tts_cfg=openai_tts_cfg,
             kokoro_tts_cfg=kokoro_tts_cfg,
             gemini_tts_cfg=gemini_tts_cfg,
+            memory_cfg=config.Memory(),
         )
 
         # Verify that the core functions were called
diff --git a/tests/agents/test_interactive_extra.py b/tests/agents/test_interactive_extra.py
index 6d14bafec..dcb020877 100644
--- a/tests/agents/test_interactive_extra.py
+++ b/tests/agents/test_interactive_extra.py
@@ -68,6 +68,9 @@ async def test_handle_conversation_turn_no_llm_response():
         await _handle_conversation_turn(
             stop_event=stop_event,
             conversation_history=conversation_history,
+            memory_client=None,
+            conversation_id="test",
+            memory_cfg=config.Memory(),
             provider_cfg=provider_cfg,
             general_cfg=general_cfg,
             history_cfg=history_cfg,
@@ -138,6 +141,9 @@ async def test_handle_conversation_turn_no_instruction():
         await _handle_conversation_turn(
             stop_event=stop_event,
             conversation_history=conversation_history,
+            memory_client=None,
+            conversation_id="test",
+            memory_cfg=config.Memory(),
             provider_cfg=provider_cfg,
             general_cfg=general_cfg,
             history_cfg=history_cfg,
@@ -260,5 +266,6 @@ async def test_async_main_exception_handling():
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
+                memory_cfg=config.Memory(),
             )
         mock_console.print_exception.assert_called_once()
diff --git a/tests/agents/test_memory_add.py b/tests/agents/test_memory_add.py
index d81e300c9..52c0f3064 100644
--- a/tests/agents/test_memory_add.py
+++ b/tests/agents/test_memory_add.py
@@ -154,7 +154,7 @@ def test_memory_add_single_memory(tmp_path: Path) -> None:
             "User likes Python",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -181,7 +181,7 @@ def test_memory_add_multiple_memories(tmp_path: Path) -> None:
             "Fact three",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -203,7 +203,7 @@ def test_memory_add_from_file(tmp_path: Path) -> None:
             str(input_file),
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -225,7 +225,7 @@ def test_memory_add_with_conversation_id(tmp_path: Path) -> None:
             "work",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 0
@@ -247,7 +247,7 @@ def test_memory_add_no_memories_error(tmp_path: Path) -> None:
             "add",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
         ],
     )
     assert result.exit_code == 1
@@ -265,7 +265,7 @@ def test_memory_add_quiet_mode(tmp_path: Path) -> None:
             "Silent fact",
             "--memory-path",
             str(memory_path),
-            "--no-git-versioning",
+            "--no-memory-git-versioning",
             "--quiet",
         ],
     )
diff --git a/tests/memory/test_memory_integration.py b/tests/memory/test_memory_integration.py
index 28c7d48a9..e3dfcaf44 100644
--- a/tests/memory/test_memory_integration.py
+++ b/tests/memory/test_memory_integration.py
@@ -15,6 +15,9 @@
 from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
 from agent_cli.memory import api as memory_api
 
+# Extend timeout for these tests - Windows SSL initialization is slow
+pytestmark = pytest.mark.timeout(30)
+
 
 class _DummyReranker:
     def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
diff --git a/tests/test_memory_tools.py b/tests/test_memory_tools.py
index 6c017b0a4..dabe3ae73 100644
--- a/tests/test_memory_tools.py
+++ b/tests/test_memory_tools.py
@@ -1,117 +1,567 @@
-"""Tests for the memory tools."""
+"""Tests for the memory tools in _tools.py."""
 
 from __future__ import annotations
 
-import json
 from pathlib import Path
-from unittest.mock import patch
-
-import pytest  # noqa: TC002
-
-from agent_cli import _tools
+from unittest.mock import AsyncMock, MagicMock
 
+import pytest
 
-def test_get_memory_file_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
-    """Test the _get_memory_file_path function."""
-    # Test with AGENT_CLI_HISTORY_DIR set
-    history_dir = tmp_path / "history"
-    monkeypatch.setenv("AGENT_CLI_HISTORY_DIR", str(history_dir))
-    path = _tools._get_memory_file_path()
-    assert path == history_dir / "long_term_memory.json"
-
-    # Test without AGENT_CLI_HISTORY_DIR set
-    monkeypatch.delenv("AGENT_CLI_HISTORY_DIR", raising=False)
-    path = _tools._get_memory_file_path()
-    assert path == Path.home() / ".config" / "agent-cli" / "memory" / "long_term_memory.json"
-
-
-def test_load_and_save_memories(tmp_path: Path) -> None:
-    """Test the _load_memories and _save_memories functions."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Test loading from a non-existent file
-        memories = _tools._load_memories()
-        assert memories == []
-
-        # Test saving and then loading
-        memories_to_save = [{"id": 1, "content": "test"}]
-        _tools._save_memories(memories_to_save)
-
-        loaded_memories = _tools._load_memories()
-        assert loaded_memories == memories_to_save
-
-        # Verify the file content
-        with memory_file.open("r") as f:
-            assert json.load(f) == memories_to_save
-
-
-def test_add_and_search_memory(tmp_path: Path) -> None:
-    """Test the add_memory and search_memory functions."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Test searching in an empty memory
-        assert "No memories found" in _tools.search_memory("test")
-
-        # Test adding a memory
-        result = _tools.add_memory("test content", "test_category", "tag1, tag2")
-        assert "Memory added successfully with ID 1" in result
-
-        # Test searching for the new memory
-        search_result = _tools.search_memory("test content")
-        assert "ID: 1" in search_result
-        assert "Category: test_category" in search_result
-        assert "Content: test content" in search_result
-        assert "Tags: tag1, tag2" in search_result
-
-        # Test searching with a category filter
-        search_result_cat = _tools.search_memory("test", category="test_category")
-        assert "ID: 1" in search_result_cat
-
-        # Test searching with a non-matching category
-        search_result_no_cat = _tools.search_memory("test", category="wrong_category")
-        assert "No memories found" in search_result_no_cat
-
-
-def test_update_memory(tmp_path: Path) -> None:
-    """Test the update_memory function."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Add a memory to work with
-        _tools.add_memory("original content", "original_category", "original_tag")
-
-        # Test updating a non-existent memory
-        assert "not found" in _tools.update_memory(2, content="new")
-
-        # Test updating the existing memory
-        update_result = _tools.update_memory(1, content="new content", category="new_category")
-        assert "updated successfully" in update_result
-
-        # Verify the update
-        search_result = _tools.search_memory("new content")
-        assert "Category: new_category" in search_result
-
-
-def test_list_all_and_categories(tmp_path: Path) -> None:
-    """Test the list_all_memories and list_memory_categories functions."""
-    memory_file = tmp_path / "long_term_memory.json"
-    with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
-        # Test with no memories
-        assert "No memories stored" in _tools.list_all_memories()
-        assert "No memories found" in _tools.list_memory_categories()
-
-        # Add some memories
-        _tools.add_memory("content1", "cat1", "tag1")
-        _tools.add_memory("content2", "cat2", "tag2")
-        _tools.add_memory("content3", "cat1", "tag3")
-
-        # Test list_all_memories
-        list_all_result = _tools.list_all_memories()
-        assert "Showing 3 of 3 total memories" in list_all_result
-        assert "ID: 1" in list_all_result
-        assert "ID: 2" in list_all_result
-        assert "ID: 3" in list_all_result
-
-        # Test list_memory_categories
-        list_cat_result = _tools.list_memory_categories()
-        assert "cat1: 2 memories" in list_cat_result
-        assert "cat2: 1 memories" in list_cat_result
+from agent_cli._tools import (
+    MemoryTools,
+    _format_memory_content,
+    create_memory_tools,
+    tools,
+)
+from agent_cli.agents.chat import (
+    _get_conversation_id,
+    _maybe_extract_memories,
+    _maybe_init_memory,
+    _maybe_retrieve_memories,
+)
+from agent_cli.config import History, Memory, OpenAILLM
+
+# --- Tests for _format_memory_content ---
+
+
+def test_format_memory_content_basic() -> None:
+    """Test basic memory content formatting."""
+    result = _format_memory_content("User likes Python", "preferences", "")
+    assert result == "[preferences] User likes Python"
+
+
+def test_format_memory_content_with_tags() -> None:
+    """Test memory content formatting with tags."""
+    result = _format_memory_content("User likes Python", "preferences", "programming, languages")
+    assert result == "[preferences] User likes Python (tags: programming, languages)"
+
+
+def test_format_memory_content_empty_category() -> None:
+    """Test memory content formatting with empty category."""
+    result = _format_memory_content("Some content", "", "")
+    assert result == "[] Some content"
+
+
+# --- Tests for MemoryTools._check ---
+
+
+def test_memory_tools_check_with_no_client() -> None:
+    """Test that _check returns error when client is None."""
+    mt = MemoryTools(None, "test_conversation")
+    error = mt._check()
+    assert error is not None
+    assert "Memory system not initialized" in error
+    assert "pip install 'agent-cli[memory]'" in error
+
+
+def test_memory_tools_check_with_client() -> None:
+    """Test that _check returns None when client exists."""
+    mock_client = MagicMock()
+    mt = MemoryTools(mock_client, "test_conversation")
+    error = mt._check()
+    assert error is None
+
+
+# --- Tests for MemoryTools.add_memory ---
+
+
+@pytest.mark.asyncio
+async def test_add_memory_without_client() -> None:
+    """Test add_memory returns error when no client."""
+    mt = MemoryTools(None, "test")
+    result = await mt.add_memory("content", "category", "tags")
+    assert "Error: Memory system not initialized" in result
+
+
+@pytest.mark.asyncio
+async def test_add_memory_success() -> None:
+    """Test successful memory addition."""
+    mock_client = MagicMock()
+    mock_client.add = AsyncMock()
+
+    mt = MemoryTools(mock_client, "test_conversation")
+    result = await mt.add_memory("User likes coffee", "preferences", "food")
+
+    assert result == "Memory added successfully."
+    mock_client.add.assert_called_once_with(
+        "[preferences] User likes coffee (tags: food)",
+        conversation_id="test_conversation",
+    )
+
+
+@pytest.mark.asyncio
+async def test_add_memory_exception() -> None:
+    """Test add_memory handles exceptions."""
+    mock_client = MagicMock()
+    mock_client.add = AsyncMock(side_effect=RuntimeError("Database error"))
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.add_memory("content", "category", "tags")
+
+    assert "Error adding memory" in result
+    assert "Database error" in result
+
+
+# --- Tests for MemoryTools.search_memory ---
+
+
+@pytest.mark.asyncio
+async def test_search_memory_without_client() -> None:
+    """Test search_memory returns error when no client."""
+    mt = MemoryTools(None, "test")
+    result = await mt.search_memory("query")
+    assert "Error: Memory system not initialized" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_no_results() -> None:
+    """Test search_memory with no matching results."""
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = []
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.search_memory("nonexistent")
+
+    assert "No memories found matching 'nonexistent'" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_with_results() -> None:
+    """Test search_memory returns formatted results."""
+    # Create mock entries
+    entry1 = MagicMock()
+    entry1.content = "User likes Python"
+    entry1.score = 0.95
+
+    entry2 = MagicMock()
+    entry2.content = "User prefers dark mode"
+    entry2.score = 0.87
+
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = [entry1, entry2]
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.search_memory("preferences")
+
+    assert "User likes Python" in result
+    assert "User prefers dark mode" in result
+    assert "relevance: 0.95" in result
+    assert "relevance: 0.87" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_with_category() -> None:
+    """Test search_memory includes category in query."""
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = []
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    mt = MemoryTools(mock_client, "test_conv")
+    await mt.search_memory("coffee", category="preferences")
+
+    # Verify category is prepended to the query
+    mock_client.search.assert_called_once_with(
+        "preferences coffee",
+        conversation_id="test_conv",
+    )
+
+
+@pytest.mark.asyncio
+async def test_search_memory_exception() -> None:
+    """Test search_memory handles exceptions."""
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(side_effect=RuntimeError("Search failed"))
+
+    mt = MemoryTools(mock_client, "test")
+    result = await mt.search_memory("query")
+
+    assert "Error searching memory" in result
+    assert "Search failed" in result
+
+
+# --- Tests for MemoryTools.list_all_memories ---
+
+
+def test_list_all_memories_without_client() -> None:
+    """Test list_all_memories returns error when no client."""
+    mt = MemoryTools(None, "test")
+    result = mt.list_all_memories()
+    assert "Error: Memory system not initialized" in result
+
+
+def test_list_all_memories_empty() -> None:
+    """Test list_all_memories with no stored memories."""
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(return_value=[])
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories()
+
+    assert result == "No memories stored yet."
+
+
+def test_list_all_memories_with_entries() -> None:
+    """Test list_all_memories returns formatted list."""
+    entries = [
+        {"content": "User likes Python", "role": "memory", "created_at": "2024-01-01T10:00:00"},
+        {
+            "content": "User lives in Amsterdam",
+            "role": "memory",
+            "created_at": "2024-01-02T12:00:00",
+        },
+    ]
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(return_value=entries)
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories()
+
+    assert "Showing 2 of 2 total memories" in result
+    assert "User likes Python" in result
+    assert "User lives in Amsterdam" in result
+    assert "[memory]" in result
+
+
+def test_list_all_memories_with_limit() -> None:
+    """Test list_all_memories respects limit parameter."""
+    entries = [
+        {"content": f"Memory {i}", "role": "memory", "created_at": "2024-01-01"} for i in range(5)
+    ]
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(return_value=entries)
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories(limit=3)
+
+    assert "Showing 3 of 5 total memories" in result
+    assert "... and 2 more memories" in result
+
+
+def test_list_all_memories_exception() -> None:
+    """Test list_all_memories handles exceptions."""
+    mock_client = MagicMock()
+    mock_client.list_all = MagicMock(side_effect=RuntimeError("List failed"))
+
+    mt = MemoryTools(mock_client, "test")
+    result = mt.list_all_memories()
+
+    assert "Error listing memories" in result
+    assert "List failed" in result
+
+
+# --- Tests for create_memory_tools ---
+
+
+def test_create_memory_tools_returns_list() -> None:
+    """Test create_memory_tools returns a list of Tool objects."""
+    mock_client = MagicMock()
+    result = create_memory_tools(mock_client, "test")
+
+    assert isinstance(result, list)
+    assert len(result) == 3  # add_memory, search_memory, list_all_memories
+
+
+def test_create_memory_tools_with_none_client() -> None:
+    """Test create_memory_tools works with None client."""
+    result = create_memory_tools(None, "test")
+
+    assert isinstance(result, list)
+    assert len(result) == 3
+
+
+def test_create_memory_tools_read_only() -> None:
+    """Test create_memory_tools with read_only=True excludes add_memory."""
+    mock_client = MagicMock()
+    result = create_memory_tools(mock_client, "test", read_only=True)
+
+    assert isinstance(result, list)
+    assert len(result) == 2  # Only search_memory and list_all_memories
+
+
+def test_create_memory_tools_read_only_false() -> None:
+    """Test create_memory_tools with read_only=False includes add_memory."""
+    mock_client = MagicMock()
+    result = create_memory_tools(mock_client, "test", read_only=False)
+
+    assert isinstance(result, list)
+    assert len(result) == 3  # add_memory, search_memory, list_all_memories
+
+
+# --- Tests for tools function ---
+
+
+def test_tools_returns_all_expected_tools() -> None:
+    """Test tools function returns all expected tools."""
+    result = tools(None, "test")
+
+    assert isinstance(result, list)
+    # Should have: read_file, execute_code, 3 memory tools, duckduckgo_search
+    assert len(result) == 6
+
+
+def test_tools_with_memory_client() -> None:
+    """Test tools function works with a memory client."""
+    mock_client = MagicMock()
+    result = tools(mock_client, "conversation_123")
+
+    assert isinstance(result, list)
+    assert len(result) == 6
+
+
+def test_tools_memory_read_only() -> None:
+    """Test tools function with memory_read_only=True has fewer memory tools."""
+    mock_client = MagicMock()
+    result = tools(mock_client, "test", memory_read_only=True)
+
+    assert isinstance(result, list)
+    # Should have: read_file, execute_code, 2 memory tools (no add_memory), duckduckgo_search
+    assert len(result) == 5
+
+
+def test_tools_memory_read_only_false() -> None:
+    """Test tools function with memory_read_only=False includes all memory tools."""
+    mock_client = MagicMock()
+    result = tools(mock_client, "test", memory_read_only=False)
+
+    assert isinstance(result, list)
+    # Should have: read_file, execute_code, 3 memory tools, duckduckgo_search
+    assert len(result) == 6
+
+
+# --- Tests for chat.py integration functions ---
+
+
+def test_get_conversation_id_with_history_dir() -> None:
+    """Test _get_conversation_id generates stable ID from history dir."""
+    history_cfg = History(history_dir=Path("/home/user/.chat-history"))
+    result = _get_conversation_id(history_cfg)
+
+    # Should be a 12-character hex string
+    assert len(result) == 12
+    assert all(c in "0123456789abcdef" for c in result)
+
+
+def test_get_conversation_id_without_history_dir() -> None:
+    """Test _get_conversation_id returns 'default' when no history dir."""
+    history_cfg = History(history_dir=None)
+    result = _get_conversation_id(history_cfg)
+
+    assert result == "default"
+
+
+def test_get_conversation_id_is_stable() -> None:
+    """Test _get_conversation_id produces same ID for same path."""
+    history_cfg1 = History(history_dir=Path("/some/path"))
+    history_cfg2 = History(history_dir=Path("/some/path"))
+
+    assert _get_conversation_id(history_cfg1) == _get_conversation_id(history_cfg2)
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_off_mode() -> None:
+    """Test _maybe_extract_memories does nothing when mode is not 'auto'."""
+    memory_cfg = Memory(mode="tools")  # Not 'auto'
+    mock_client = MagicMock()
+    mock_client.extract_from_turn = AsyncMock()
+
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        response_text="response",
+        conversation_id="test",
+        model="gpt-4",
+        quiet=True,
+    )
+
+    # Should not call extract_from_turn when mode is not 'auto'
+    mock_client.extract_from_turn.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_auto_mode() -> None:
+    """Test _maybe_extract_memories extracts when mode is 'auto'."""
+    memory_cfg = Memory(mode="auto")
+    mock_client = MagicMock()
+    mock_client.extract_from_turn = AsyncMock()
+
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="Hello world",
+        response_text="Hi there!",
+        conversation_id="conv123",
+        model="gpt-4",
+        quiet=True,
+    )
+
+    mock_client.extract_from_turn.assert_called_once_with(
+        user_message="Hello world",
+        assistant_message="Hi there!",
+        conversation_id="conv123",
+        model="gpt-4",
+    )
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_handles_exception() -> None:
+    """Test _maybe_extract_memories handles exceptions gracefully."""
+    memory_cfg = Memory(mode="auto")
+    mock_client = MagicMock()
+    mock_client.extract_from_turn = AsyncMock(side_effect=RuntimeError("Extraction failed"))
+
+    # Should not raise, just log warning
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        response_text="response",
+        conversation_id="test",
+        model="gpt-4",
+        quiet=True,
+    )
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_no_client() -> None:
+    """Test _maybe_extract_memories does nothing when client is None."""
+    memory_cfg = Memory(mode="auto")
+
+    # Should not raise even with None client
+    await _maybe_extract_memories(
+        memory_cfg=memory_cfg,
+        memory_client=None,
+        instruction="test",
+        response_text="response",
+        conversation_id="test",
+        model="gpt-4",
+        quiet=True,
+    )
+
+
+def test_maybe_init_memory_off_mode() -> None:
+    """Test _maybe_init_memory returns None when mode is 'off'."""
+    memory_cfg = Memory(mode="off")
+    history_cfg = History()
+    openai_cfg = OpenAILLM(llm_openai_model="gpt-4o-mini")
+
+    result = _maybe_init_memory(memory_cfg, history_cfg, openai_cfg, quiet=True)
+    assert result is None
+
+
+# --- Tests for _maybe_retrieve_memories ---
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_off_mode() -> None:
+    """Test _maybe_retrieve_memories returns empty string when mode is not 'auto'."""
+    memory_cfg = Memory(mode="tools")  # Not 'auto'
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock()
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""
+    mock_client.search.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_auto_mode_with_results() -> None:
+    """Test _maybe_retrieve_memories returns formatted context in auto mode."""
+    memory_cfg = Memory(mode="auto", top_k=3)
+
+    # Create mock entries
+    entry1 = MagicMock()
+    entry1.content = "User likes pizza"
+    entry2 = MagicMock()
+    entry2.content = "User prefers Italian food"
+
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = [entry1, entry2]
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="What food do I like?",
+        conversation_id="conv123",
+    )
+
+    assert "<relevant-memories>" in result
+    assert "</relevant-memories>" in result
+    assert "User likes pizza" in result
+    assert "User prefers Italian food" in result
+    mock_client.search.assert_called_once_with(
+        query="What food do I like?",
+        conversation_id="conv123",
+        top_k=3,
+    )
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_auto_mode_no_results() -> None:
+    """Test _maybe_retrieve_memories returns empty string when no memories found."""
+    memory_cfg = Memory(mode="auto")
+
+    mock_retrieval = MagicMock()
+    mock_retrieval.entries = []
+
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_no_client() -> None:
+    """Test _maybe_retrieve_memories returns empty string when client is None."""
+    memory_cfg = Memory(mode="auto")
+
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=None,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_handles_exception() -> None:
+    """Test _maybe_retrieve_memories handles exceptions gracefully."""
+    memory_cfg = Memory(mode="auto")
+    mock_client = MagicMock()
+    mock_client.search = AsyncMock(side_effect=RuntimeError("Search failed"))
+
+    # Should not raise, just return empty string
+    result = await _maybe_retrieve_memories(
+        memory_cfg=memory_cfg,
+        memory_client=mock_client,
+        instruction="test",
+        conversation_id="test",
+    )
+
+    assert result == ""