diff --git a/README.md b/README.md
index f0a5b2d5f..c0676575c 100644
--- a/README.md
+++ b/README.md
@@ -1529,6 +1529,57 @@ uv tool install "agent-cli[vad]"
│ history. │
│ [default: 50] │
╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-mode TEXT Memory mode: 'off' │
+│ (disabled), 'tools' │
+│ (LLM decides via │
+│ tools), 'auto' │
+│ (automatic │
+│ extraction). │
+│ [default: tools] │
+│ --memory-path PATH Path for memory │
+│ database storage. │
+│ Default: │
+│ ~/.config/agent-cli… │
+│ --memory-top-k INTEGER Number of memories │
+│ to retrieve per │
+│ search. │
+│ [default: 5] │
+│ --memory-score-thre… FLOAT Minimum relevance │
+│ score threshold for │
+│ memory retrieval │
+│ (0.0-1.0). │
+│ [default: 0.35] │
+│ --memory-max-entries INTEGER Maximum stored │
+│ memory entries per │
+│ conversation │
+│ (excluding summary). │
+│ [default: 500] │
+│ --memory-mmr-lambda FLOAT MMR lambda (0-1): │
+│ higher favors │
+│ relevance, lower │
+│ favors diversity. │
+│ [default: 0.7] │
+│ --memory-recency-we… FLOAT Recency score weight │
+│ (0.0-1.0). Controls │
+│ freshness vs. │
+│ relevance. │
+│ [default: 0.2] │
+│ --memory-summarizat… --no-memory-summar… Enable automatic │
+│ fact extraction and │
+│ summaries. │
+│ [default: │
+│ memory-summarizatio… │
+│ --memory-git-versio… --no-memory-git-ve… Enable automatic git │
+│ commit of memory │
+│ changes. │
+│ [default: │
+│ no-memory-git-versi… │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ LLM Configuration ──────────────────────────────────────────────────────────╮
+│ --embedding-model TEXT Embedding model to use for vectorization. │
+│ [default: text-embedding-3-small] │
+╰──────────────────────────────────────────────────────────────────────────────╯
╭─ General Options ────────────────────────────────────────────────────────────╮
│ --save-file PATH Save TTS response audio to WAV file. │
│ --log-level TEXT Set logging level. │
@@ -1718,49 +1769,45 @@ The `memory proxy` command is the core feature—a middleware server that gives
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help -h Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
-╭─ Memory Configuration ───────────────────────────────────────────────────────╮
-│ --memory-path PATH Path to the memory │
-│ store (files + derived │
-│ vector index). │
-│ [default: ./memory_db] │
-│ --default-top-k INTEGER Number of memory │
-│ entries to retrieve per │
-│ query. │
-│ [default: 5] │
-│ --max-entries INTEGER Maximum stored memory │
-│ entries per │
-│ conversation (excluding │
-│ summary). │
-│ [default: 500] │
-│ --mmr-lambda FLOAT MMR lambda (0-1): │
-│ higher favors │
-│ relevance, lower favors │
-│ diversity. │
-│ [default: 0.7] │
-│ --recency-weight FLOAT Recency score weight │
-│ (0.0-1.0). Controls │
-│ freshness vs. │
-│ relevance. Default 0.2 │
-│ (20% recency, 80% │
-│ semantic relevance). │
-│ [default: 0.2] │
-│ --score-threshold FLOAT Minimum semantic │
-│ relevance threshold │
-│ (0.0-1.0). Memories │
-│ below this score are │
-│ discarded to reduce │
-│ noise. │
-│ [default: 0.35] │
-│ --summarization --no-summarization Enable automatic fact │
-│ extraction and │
-│ summaries. │
-│ [default: │
-│ summarization] │
-│ --git-versioning --no-git-versioning Enable automatic git │
-│ commit of memory │
-│ changes. │
-│ [default: │
-│ git-versioning] │
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path PATH Path for memory │
+│ database storage. │
+│ Default: │
+│ ~/.config/agent-cli… │
+│ --memory-top-k INTEGER Number of memories │
+│ to retrieve per │
+│ search. │
+│ [default: 5] │
+│ --memory-max-entries INTEGER Maximum stored │
+│ memory entries per │
+│ conversation │
+│ (excluding summary). │
+│ [default: 500] │
+│ --memory-mmr-lambda FLOAT MMR lambda (0-1): │
+│ higher favors │
+│ relevance, lower │
+│ favors diversity. │
+│ [default: 0.7] │
+│ --memory-recency-we… FLOAT Recency score weight │
+│ (0.0-1.0). Controls │
+│ freshness vs. │
+│ relevance. │
+│ [default: 0.2] │
+│ --memory-score-thre… FLOAT Minimum relevance │
+│ score threshold for │
+│ memory retrieval │
+│ (0.0-1.0). │
+│ [default: 0.35] │
+│ --memory-summarizat… --no-memory-summar… Enable automatic │
+│ fact extraction and │
+│ summaries. │
+│ [default: │
+│ memory-summarizatio… │
+│ --memory-git-versio… --no-memory-git-ve… Enable automatic git │
+│ commit of memory │
+│ changes. │
+│ [default: │
+│ memory-git-versioni… │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ LLM: OpenAI-compatible ─────────────────────────────────────────────────────╮
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API │
@@ -1868,23 +1915,24 @@ agent-cli memory add -c work "Project deadline is Friday"
│ fact. │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --file -f PATH Read memories from file. │
-│ Use '-' for stdin. │
-│ Supports JSON array, │
-│ JSON object with │
-│ 'memories' key, or plain │
-│ text (one per line). │
-│ --conversation-id -c TEXT Conversation ID to add │
-│ memories to. │
-│ [default: default] │
-│ --memory-path PATH Path to the memory │
-│ store. │
-│ [default: ./memory_db] │
-│ --git-versioning --no-git-versioning Commit changes to git. │
-│ [default: │
-│ git-versioning] │
-│ --help -h Show this message and │
-│ exit. │
+│ --file -f PATH Read memories from file. Use '-' for stdin. │
+│ Supports JSON array, JSON object with │
+│ 'memories' key, or plain text (one per │
+│ line). │
+│ --conversation-id -c TEXT Conversation ID to add memories to. │
+│ [default: default] │
+│ --help -h Show this message and exit. │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Memory Options ─────────────────────────────────────────────────────────────╮
+│ --memory-path PATH Path for memory │
+│ database storage. │
+│ Default: │
+│ ~/.config/agent-cli/… │
+│ --memory-git-version… --no-memory-git-ver… Enable automatic git │
+│ commit of memory │
+│ changes. │
+│ [default: │
+│ memory-git-versionin… │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ General Options ────────────────────────────────────────────────────────────╮
│ --quiet -q Suppress console output from rich. │
diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
index 1a7ce8957..ad94fab48 100644
--- a/agent_cli/_tools.py
+++ b/agent_cli/_tools.py
@@ -2,99 +2,12 @@
from __future__ import annotations
-import json
-import os
import subprocess
-from datetime import UTC, datetime
from pathlib import Path
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING
if TYPE_CHECKING:
- from collections.abc import Callable
-
-
-# Memory system helpers
-
-
-def _get_memory_file_path() -> Path:
- """Get the path to the memory file.
-
- If the environment variable ``AGENT_CLI_HISTORY_DIR`` is set (by the
- running agent), store the memory file in that directory.
- Otherwise fall back to the user's config directory.
- """
- history_dir = os.getenv("AGENT_CLI_HISTORY_DIR")
- if history_dir:
- return Path(history_dir).expanduser() / "long_term_memory.json"
-
- return Path.home() / ".config" / "agent-cli" / "memory" / "long_term_memory.json"
-
-
-def _load_memories() -> list[dict[str, Any]]:
- """Load memories from file, returning empty list if file doesn't exist."""
- memory_file = _get_memory_file_path()
- if not memory_file.exists():
- return []
-
- with memory_file.open("r") as f:
- return json.load(f)
-
-
-def _save_memories(memories: list[dict[str, Any]]) -> None:
- """Save memories to file, creating directories if needed."""
- memory_file = _get_memory_file_path()
- memory_file.parent.mkdir(parents=True, exist_ok=True)
-
- with memory_file.open("w") as f:
- json.dump(memories, f, indent=2)
-
-
-def _find_memory_by_id(memories: list[dict[str, Any]], memory_id: int) -> dict[str, Any] | None:
- """Find a memory by ID in the memories list."""
- for memory in memories:
- if memory["id"] == memory_id:
- return memory
- return None
-
-
-def _format_memory_summary(memory: dict[str, Any]) -> str:
- """Format a memory for display in search results."""
- return (
- f"ID: {memory['id']} | Category: {memory['category']} | "
- f"Content: {memory['content']} | Tags: {', '.join(memory['tags'])}"
- )
-
-
-def _format_memory_detailed(memory: dict[str, Any]) -> str:
- """Format a memory with full details for listing."""
- created = datetime.fromisoformat(memory["timestamp"]).strftime("%Y-%m-%d %H:%M")
- updated_info = ""
- if "updated_at" in memory:
- updated = datetime.fromisoformat(memory["updated_at"]).strftime("%Y-%m-%d %H:%M")
- updated_info = f" (updated: {updated})"
-
- return (
- f"ID: {memory['id']} | Category: {memory['category']}\n"
- f"Content: {memory['content']}\n"
- f"Tags: {', '.join(memory['tags']) if memory['tags'] else 'None'}\n"
- f"Created: {created}{updated_info}\n"
- )
-
-
-def _parse_tags(tags_string: str) -> list[str]:
- """Parse comma-separated tags string into a list of clean tags."""
- return [tag.strip() for tag in tags_string.split(",") if tag.strip()]
-
-
-R = TypeVar("R")
-
-
-def _memory_operation(operation_name: str, operation_func: Callable[[], str]) -> str:
- """Wrapper for memory operations with consistent error handling."""
- try:
- return operation_func()
- except Exception as e:
- return f"Error {operation_name}: {e}"
+ from agent_cli.memory.client import MemoryClient
def read_file(path: str) -> str:
@@ -133,236 +46,201 @@ def execute_code(code: str) -> str:
return f"Error: Command not found: {code.split()[0]}"
-def add_memory(content: str, category: str = "general", tags: str = "") -> str:
- """Add important information to long-term memory for future conversations.
-
- Use this when the user shares:
- - Personal information (name, job, location, family, etc.)
- - Preferences (favorite foods, work style, communication preferences, etc.)
- - Important facts they want remembered (birthdays, project details, goals, etc.)
- - Tasks or commitments they mention
-
- Always ask for permission before storing personal or sensitive information.
-
- Args:
- content: The specific information to remember (be descriptive and clear)
- category: Type of memory - use "personal", "preferences", "facts", "tasks", "projects", or "general"
- tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
-
- Returns:
- Confirmation message with the memory ID
-
- """
-
- def _add_memory_operation() -> str:
- memories = _load_memories()
-
- memory = {
- "id": len(memories) + 1,
- "content": content,
- "category": category,
- "tags": _parse_tags(tags),
- "timestamp": datetime.now(UTC).isoformat(),
- }
-
- memories.append(memory)
- _save_memories(memories)
-
- return f"Memory added successfully with ID {memory['id']}"
-
- return _memory_operation("adding memory", _add_memory_operation)
-
-
-def search_memory(query: str, category: str = "") -> str:
- """Search long-term memory for relevant information before answering questions.
-
- Use this tool:
- - Before answering questions about the user's preferences, personal info, or past conversations
- - When the user asks "what do you remember about..." or similar questions
- - When you need context about the user's work, projects, or goals
- - To check if you've discussed a topic before
-
- The search looks through memory content and tags for matches.
-
- Args:
- query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
- category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
-
- Returns:
- Relevant memories found, or message if none found
-
- """
-
- def _search_memory_operation() -> str:
- memories = _load_memories()
-
- if not memories:
- return "No memories found. Memory system not initialized."
-
- # Simple text-based search
- query_lower = query.lower()
- relevant_memories = []
-
- for memory in memories:
- # Check if query matches content, tags, or category
- content_match = query_lower in memory["content"].lower()
- tag_match = any(query_lower in tag.lower() for tag in memory["tags"])
- category_match = not category or memory["category"].lower() == category.lower()
-
- if (content_match or tag_match) and category_match:
- relevant_memories.append(memory)
-
- if not relevant_memories:
- return f"No memories found matching '{query}'"
+def _format_memory_content(content: str, category: str, tags: str) -> str:
+ """Format memory content with category and tags."""
+ formatted = f"[{category}] {content}"
+ if tags:
+ formatted += f" (tags: {tags})"
+ return formatted
+
+
+class MemoryTools:
+ """Memory tools bound to a specific client and conversation."""
+
+ def __init__(
+ self,
+ memory_client: MemoryClient | None,
+ conversation_id: str = "default",
+ ) -> None:
+ self._client = memory_client
+ self._conversation_id = conversation_id
+
+ def _check(self) -> str | None:
+ if self._client is None:
+ return "Error: Memory system not initialized. Install with: pip install 'agent-cli[memory]'"
+ return None
+
+ async def add_memory(
+ self,
+ content: str,
+ category: str = "general",
+ tags: str = "",
+ ) -> str:
+ """Add important information to long-term memory for future conversations.
+
+ Use this when the user shares:
+ - Personal information (name, job, location, family, etc.)
+ - Preferences (favorite foods, work style, communication preferences, etc.)
+ - Important facts they want remembered (birthdays, project details, goals, etc.)
+ - Tasks or commitments they mention
+
+ Always ask for permission before storing personal or sensitive information.
+
+ Args:
+ content: The specific information to remember (be descriptive and clear)
+ category: Type of memory - use "personal", "preferences", "facts", "tasks", "projects", or "general"
+ tags: Comma-separated keywords that would help find this memory later (e.g., "work, python, programming")
+
+ Returns:
+ Confirmation message
+
+ """
+ if error := self._check():
+ return error
+
+ try:
+ formatted = _format_memory_content(content, category, tags)
+ await self._client.add(formatted, conversation_id=self._conversation_id) # type: ignore[union-attr]
+ return "Memory added successfully."
+ except Exception as e:
+ return f"Error adding memory: {e}"
+
+ async def search_memory(self, query: str, category: str = "") -> str:
+ """Search long-term memory for relevant information before answering questions.
+
+ Use this tool:
+ - Before answering questions about the user's preferences, personal info, or past conversations
+ - When the user asks "what do you remember about..." or similar questions
+ - When you need context about the user's work, projects, or goals
+ - To check if you've discussed a topic before
+
+ This performs semantic search to find conceptually related information.
+
+ Args:
+ query: Keywords to search for (e.g., "programming languages", "work schedule", "preferences")
+ category: Optional filter by category ("personal", "preferences", "facts", "tasks", "projects")
+
+ Returns:
+ Relevant memories found, or message if none found
+
+ """
+ if error := self._check():
+ return error
+
+ search_query = f"{category} {query}" if category else query
+
+ try:
+ result = await self._client.search(search_query, conversation_id=self._conversation_id) # type: ignore[union-attr]
+ if not result.entries:
+ return f"No memories found matching '{query}'"
+
+ lines = []
+ for entry in result.entries:
+ score_info = f" (relevance: {entry.score:.2f})" if entry.score else ""
+ lines.append(f"- {entry.content}{score_info}")
+ return "\n".join(lines)
+ except Exception as e:
+ return f"Error searching memory: {e}"
+
+ def list_all_memories(self, limit: int = 10) -> str:
+ """List all memories with their details.
+
+ Use this tool:
+ - When the user asks "show me all my memories" or "list everything you remember"
+ - When they want to see what information is stored
+ - To provide a complete overview of stored information
+
+ Shows memories in reverse chronological order (newest first).
+
+ Args:
+ limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
+
+ Returns:
+ Formatted list of all memories
+
+ """
+ if error := self._check():
+ return error
+
+ try:
+ entries = self._client.list_all( # type: ignore[union-attr]
+ conversation_id=self._conversation_id,
+ include_summary=False,
+ )
- # Format results
- results = [_format_memory_summary(memory) for memory in relevant_memories[-5:]]
+ if not entries:
+ return "No memories stored yet."
- return "\n".join(results)
+ entries_to_show = entries[:limit]
- return _memory_operation("searching memory", _search_memory_operation)
+ results = [f"Showing {len(entries_to_show)} of {len(entries)} total memories:\n"]
+ for entry in entries_to_show:
+ created_at = entry.get("created_at", "unknown")
+ role = entry.get("role", "memory")
+ content = entry.get("content", "")
+ results.append(f"- [{role}] {content} (created: {created_at})")
+ if len(entries) > limit:
+ results.append(
+ f"\n... and {len(entries) - limit} more memories. Use a higher limit to see more.",
+ )
-def update_memory(memory_id: int, content: str = "", category: str = "", tags: str = "") -> str:
- """Update an existing memory by ID.
+ return "\n".join(results)
+ except Exception as e:
+ return f"Error listing memories: {e}"
- Use this tool:
- - When the user wants to correct or modify previously stored information
- - When information has changed (e.g., job change, preference updates)
- - When the user says "update my memory about..." or "change the memory where..."
- Only provide the fields that should be updated - empty fields will keep existing values.
+def create_memory_tools(
+ memory_client: MemoryClient | None,
+ conversation_id: str = "default",
+ *,
+ read_only: bool = False,
+) -> list:
+ """Create memory tools bound to a specific client and conversation.
Args:
- memory_id: The ID of the memory to update (use search_memory or list_all_memories to find IDs)
- content: New content for the memory (leave empty to keep existing)
- category: New category (leave empty to keep existing)
- tags: New comma-separated tags (leave empty to keep existing)
+ memory_client: The MemoryClient instance, or None if not available.
+ conversation_id: The conversation ID for scoping memories.
+ read_only: If True, only include search/list tools (not add_memory).
+ Use this for "auto" mode where extraction happens automatically.
Returns:
- Confirmation message or error if memory ID not found
+ List of pydantic_ai Tool objects for memory operations.
"""
+ from pydantic_ai.tools import Tool # noqa: PLC0415
- def _update_memory_operation() -> str:
- memories = _load_memories()
-
- if not memories:
- return "No memories found. Memory system not initialized."
-
- # Find memory to update
- memory_to_update = _find_memory_by_id(memories, memory_id)
- if not memory_to_update:
- return f"Memory with ID {memory_id} not found."
-
- # Update fields if provided
- if content:
- memory_to_update["content"] = content
- if category:
- memory_to_update["category"] = category
- if tags:
- memory_to_update["tags"] = _parse_tags(tags)
-
- # Add update timestamp
- memory_to_update["updated_at"] = datetime.now(UTC).isoformat()
-
- _save_memories(memories)
- return f"Memory ID {memory_id} updated successfully."
-
- return _memory_operation("updating memory", _update_memory_operation)
-
-
-def list_all_memories(limit: int = 10) -> str:
- """List all memories with their details.
+ mt = MemoryTools(memory_client, conversation_id)
+ tools_list = [
+ Tool(mt.search_memory),
+ Tool(mt.list_all_memories),
+ ]
+ if not read_only:
+ tools_list.insert(0, Tool(mt.add_memory))
+ return tools_list
- Use this tool:
- - When the user asks "show me all my memories" or "list everything you remember"
- - When they want to see specific memory IDs for updating or reference
- - To provide a complete overview of stored information
- Shows memories in reverse chronological order (newest first).
+def tools(
+ memory_client: MemoryClient | None = None,
+ conversation_id: str = "default",
+ *,
+ memory_read_only: bool = False,
+) -> list:
+ """Return a list of all tools for the chat agent.
Args:
- limit: Maximum number of memories to show (default 10, use higher numbers if user wants more)
-
- Returns:
- Formatted list of all memories with IDs, content, categories, and tags
+ memory_client: The MemoryClient instance, or None if not available.
+ conversation_id: The conversation ID for scoping memories.
+ memory_read_only: If True, only include search/list memory tools (not add).
+ Use this for "auto" mode where extraction happens automatically.
"""
-
- def _list_all_memories_operation() -> str:
- memories = _load_memories()
-
- if not memories:
- return "No memories stored yet."
-
- # Sort by ID (newest first) and limit results
- memories_to_show = sorted(memories, key=lambda x: x["id"], reverse=True)[:limit]
-
- results = [f"Showing {len(memories_to_show)} of {len(memories)} total memories:\n"]
- results.extend(_format_memory_detailed(memory) for memory in memories_to_show)
-
- if len(memories) > limit:
- results.append(
- f"... and {len(memories) - limit} more memories. Use a higher limit to see more.",
- )
-
- return "\n".join(results)
-
- return _memory_operation("listing memories", _list_all_memories_operation)
-
-
-def list_memory_categories() -> str:
- """List all memory categories and their counts to see what has been remembered.
-
- Use this tool:
- - When the user asks "what categories do you have?"
- - To get a quick overview of memory organization
- - When the user wants to know what types of information are stored
-
- This provides a summary view before using list_all_memories for details.
-
- Returns:
- Summary of memory categories with counts (e.g., "personal: 5 memories")
-
- """
-
- def _list_categories_operation() -> str:
- memories = _load_memories()
-
- if not memories:
- return "No memories found. Memory system not initialized."
-
- # Count categories
- categories: dict[str, int] = {}
- for memory in memories:
- category = memory["category"]
- categories[category] = categories.get(category, 0) + 1
-
- if not categories:
- return "No memory categories found."
-
- results = ["Memory Categories:"]
- for category, count in sorted(categories.items()):
- results.append(f"- {category}: {count} memories")
-
- return "\n".join(results)
-
- return _memory_operation("listing categories", _list_categories_operation)
-
-
-def tools() -> list:
- """Return a list of tools."""
from pydantic_ai.common_tools.duckduckgo import duckduckgo_search_tool # noqa: PLC0415
from pydantic_ai.tools import Tool # noqa: PLC0415
return [
Tool(read_file),
Tool(execute_code),
- Tool(add_memory),
- Tool(search_memory),
- Tool(update_memory),
- Tool(list_all_memories),
- Tool(list_memory_categories),
+ *create_memory_tools(memory_client, conversation_id, read_only=memory_read_only),
duckduckgo_search_tool(),
]
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
index 507e22c4b..9a7b55e1d 100644
--- a/agent_cli/agents/chat.py
+++ b/agent_cli/agents/chat.py
@@ -13,6 +13,7 @@
from __future__ import annotations
import asyncio
+import hashlib
import json
import logging
import os
@@ -50,9 +51,100 @@
if TYPE_CHECKING:
from rich.live import Live
+ from agent_cli.memory.client import MemoryClient
+
LOGGER = logging.getLogger(__name__)
+
+def _get_conversation_id(history_cfg: config.History) -> str:
+ """Generate a stable conversation ID from history configuration.
+
+ Uses a hash of the history directory path to ensure consistency across sessions.
+ """
+ if history_cfg.history_dir:
+ return hashlib.md5(
+ str(Path(history_cfg.history_dir).resolve()).encode(),
+ usedforsecurity=False,
+ ).hexdigest()[:12]
+ return "default"
+
+
+def _try_init_memory(
+ memory_cfg: config.Memory,
+ history_cfg: config.History,
+ openai_llm_cfg: config.OpenAILLM,
+ quiet: bool,
+) -> MemoryClient | None:
+ """Try to initialize the memory system.
+
+ Returns the MemoryClient if successful, None otherwise.
+ """
+ from agent_cli.memory.client import MemoryClient # noqa: PLC0415
+
+ # Determine memory path
+ memory_path = memory_cfg.memory_path
+ if memory_path is None:
+ if history_cfg.history_dir:
+ memory_path = Path(history_cfg.history_dir).expanduser() / "vector_memory"
+ else:
+ memory_path = Path.home() / ".config" / "agent-cli" / "memory" / "vector_db"
+
+ # Determine OpenAI base URL for embeddings
+ openai_base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"
+
+ if not quiet:
+ console.print("[dim]Initializing memory system...[/dim]")
+
+ memory_client = MemoryClient(
+ memory_path=memory_path,
+ openai_base_url=openai_base_url,
+ embedding_model=memory_cfg.embedding_model,
+ embedding_api_key=openai_llm_cfg.openai_api_key,
+ chat_api_key=openai_llm_cfg.openai_api_key,
+ default_top_k=memory_cfg.top_k,
+ score_threshold=memory_cfg.score_threshold,
+ recency_weight=memory_cfg.recency_weight,
+ mmr_lambda=memory_cfg.mmr_lambda,
+ enable_summarization=memory_cfg.enable_summarization,
+ enable_git_versioning=memory_cfg.enable_git_versioning,
+ max_entries=memory_cfg.max_entries,
+ start_watcher=False,
+ )
+
+ # Start the memory client's file watcher
+ memory_client.start()
+
+ if not quiet:
+ console.print("[green]Memory system initialized[/green]")
+
+ return memory_client
+
+
+def _maybe_init_memory(
+ memory_cfg: config.Memory,
+ history_cfg: config.History,
+ openai_llm_cfg: config.OpenAILLM,
+ quiet: bool,
+) -> MemoryClient | None:
+ """Initialize memory if mode is not 'off', handling errors gracefully."""
+ if memory_cfg.mode == "off":
+ return None
+ try:
+ return _try_init_memory(memory_cfg, history_cfg, openai_llm_cfg, quiet)
+ except ImportError:
+ if not quiet:
+ console.print(
+ "[yellow]Memory system not available. "
+ "Install with: pip install 'agent-cli[memory]'[/yellow]",
+ )
+ except Exception as e:
+ if not quiet:
+ console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
+ LOGGER.warning("Failed to initialize memory: %s", e)
+ return None
+
+
# --- Conversation History ---
@@ -74,9 +166,7 @@ class ConversationEntry(TypedDict):
- execute_code: Execute a shell command.
- add_memory: Add important information to long-term memory for future recall.
- search_memory: Search your long-term memory for relevant information.
-- update_memory: Modify existing memories by ID when information changes.
-- list_all_memories: Show all stored memories with their IDs and details.
-- list_memory_categories: See what types of information you've remembered.
+- list_all_memories: Show all stored memories with their details.
- duckduckgo_search: Search the web for current information.
Memory Guidelines:
@@ -144,10 +234,67 @@ def _format_conversation_for_llm(history: list[ConversationEntry]) -> str:
return "\n".join(formatted_lines)
+async def _maybe_extract_memories(
+ memory_cfg: config.Memory,
+ memory_client: MemoryClient | None,
+ instruction: str,
+ response_text: str,
+ conversation_id: str,
+ model: str,
+ quiet: bool,
+) -> None:
+ """Extract memories in auto mode, silently skip otherwise."""
+ if memory_cfg.mode != "auto" or memory_client is None:
+ return
+ try:
+ await memory_client.extract_from_turn(
+ user_message=instruction,
+ assistant_message=response_text,
+ conversation_id=conversation_id,
+ model=model,
+ )
+ if not quiet:
+ console.print("[dim]💾 Memory extraction complete[/dim]")
+ except Exception as e:
+ LOGGER.warning("Failed to extract memories: %s", e)
+
+
+async def _maybe_retrieve_memories(
+ memory_cfg: config.Memory,
+ memory_client: MemoryClient | None,
+ instruction: str,
+ conversation_id: str,
+) -> str:
+ """Retrieve relevant memories in auto mode for prompt injection.
+
+ Returns formatted memory context string, or empty string if not applicable.
+ """
+ if memory_cfg.mode != "auto" or memory_client is None:
+ return ""
+ try:
+ retrieval = await memory_client.search(
+ query=instruction,
+ conversation_id=conversation_id,
+ top_k=memory_cfg.top_k,
+ )
+ if not retrieval.entries:
+ return ""
+ lines = ["\n"]
+ lines.extend(f"- {entry.content}" for entry in retrieval.entries)
+ lines.append("")
+ return "\n".join(lines)
+ except Exception as e:
+ LOGGER.warning("Failed to retrieve memories: %s", e)
+ return ""
+
+
async def _handle_conversation_turn(
*,
stop_event: InteractiveStopEvent,
conversation_history: list[ConversationEntry],
+ memory_client: MemoryClient | None,
+ conversation_id: str,
+ memory_cfg: config.Memory,
provider_cfg: config.ProviderSelection,
general_cfg: config.General,
history_cfg: config.History,
@@ -213,6 +360,15 @@ async def _handle_conversation_turn(
instruction=instruction,
)
+ # 3b. Auto-retrieve and inject memories in "auto" mode
+ memory_context = await _maybe_retrieve_memories(
+ memory_cfg,
+ memory_client,
+ instruction,
+ conversation_id,
+ )
+ system_prompt = SYSTEM_PROMPT + memory_context
+
# 4. Get LLM response with timing
start_time = time.monotonic()
@@ -230,8 +386,14 @@ async def _handle_conversation_turn(
quiet=general_cfg.quiet,
stop_event=stop_event,
):
+ # Memory tools access:
+ # - "off": no memory tools
+ # - "tools": full access (add, search, list)
+ # - "auto": read-only access (search, list) - extraction happens automatically
+ tool_memory_client = memory_client if memory_cfg.mode != "off" else None
+ memory_read_only = memory_cfg.mode == "auto"
response_text = await get_llm_response(
- system_prompt=SYSTEM_PROMPT,
+ system_prompt=system_prompt,
agent_instructions=AGENT_INSTRUCTIONS,
user_input=user_message_with_context,
provider_cfg=provider_cfg,
@@ -239,7 +401,7 @@ async def _handle_conversation_turn(
openai_cfg=openai_llm_cfg,
gemini_cfg=gemini_llm_cfg,
logger=LOGGER,
- tools=tools(),
+ tools=tools(tool_memory_client, conversation_id, memory_read_only=memory_read_only),
quiet=True, # Suppress internal output since we're showing our own timer
live=live,
)
@@ -267,6 +429,20 @@ async def _handle_conversation_turn(
},
)
+ # 5b. Auto-extract memories in "auto" mode (run in background, don't block)
+ if memory_cfg.mode == "auto" and memory_client is not None:
+ asyncio.create_task( # noqa: RUF006
+ _maybe_extract_memories(
+ memory_cfg,
+ memory_client,
+ instruction,
+ response_text,
+ conversation_id,
+ openai_llm_cfg.llm_openai_model,
+ general_cfg.quiet,
+ ),
+ )
+
# 6. Save history
if history_cfg.history_dir:
history_path = Path(history_cfg.history_dir).expanduser()
@@ -318,8 +494,11 @@ async def _async_main(
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
gemini_tts_cfg: config.GeminiTTS,
+ memory_cfg: config.Memory,
) -> None:
"""Main async function, consumes parsed arguments."""
+ memory_client = None
+
try:
device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
if device_info is None:
@@ -329,6 +508,14 @@ async def _async_main(
if audio_out_cfg.enable_tts:
audio_out_cfg.output_device_index = tts_output_device_index
+ # Initialize memory system (if not disabled)
+ memory_client = _maybe_init_memory(
+ memory_cfg,
+ history_cfg,
+ openai_llm_cfg,
+ general_cfg.quiet,
+ )
+
# Load conversation history
conversation_history = []
if history_cfg.history_dir:
@@ -342,6 +529,9 @@ async def _async_main(
history_cfg.last_n_messages,
)
+ # Generate conversation ID for memory scoping
+ conversation_id = _get_conversation_id(history_cfg)
+
with (
maybe_live(not general_cfg.quiet) as live,
signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
@@ -350,6 +540,9 @@ async def _async_main(
await _handle_conversation_turn(
stop_event=stop_event,
conversation_history=conversation_history,
+ memory_client=memory_client,
+ conversation_id=conversation_id,
+ memory_cfg=memory_cfg,
provider_cfg=provider_cfg,
general_cfg=general_cfg,
history_cfg=history_cfg,
@@ -371,6 +564,10 @@ async def _async_main(
if not general_cfg.quiet:
console.print_exception()
raise
+ finally:
+ # Clean up memory client
+ if memory_client is not None:
+ await memory_client.stop()
@app.command("chat")
@@ -433,6 +630,17 @@ def chat(
" Set to 0 to disable history.",
rich_help_panel="History Options",
),
+ # --- Memory Options ---
+ memory_mode: str = opts.MEMORY_MODE,
+ memory_path: Path | None = opts.MEMORY_PATH,
+ embedding_model: str = opts.EMBEDDING_MODEL,
+ memory_top_k: int = opts.MEMORY_TOP_K,
+ memory_score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+ memory_max_entries: int = opts.MEMORY_MAX_ENTRIES,
+ memory_mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+ memory_recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+ memory_summarization: bool = opts.MEMORY_SUMMARIZATION,
+ memory_git_versioning: bool = opts.MEMORY_GIT_VERSIONING,
# --- General Options ---
save_file: Path | None = opts.SAVE_FILE,
log_level: str = opts.LOG_LEVEL,
@@ -535,6 +743,18 @@ def chat(
history_dir=history_dir,
last_n_messages=last_n_messages,
)
+ memory_cfg = config.Memory(
+ mode=memory_mode, # type: ignore[arg-type]
+ memory_path=memory_path,
+ embedding_model=embedding_model,
+ top_k=memory_top_k,
+ score_threshold=memory_score_threshold,
+ max_entries=memory_max_entries,
+ mmr_lambda=memory_mmr_lambda,
+ recency_weight=memory_recency_weight,
+ enable_summarization=memory_summarization,
+ enable_git_versioning=memory_git_versioning,
+ )
asyncio.run(
_async_main(
@@ -553,5 +773,6 @@ def chat(
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
gemini_tts_cfg=gemini_tts_cfg,
+ memory_cfg=memory_cfg,
),
)
diff --git a/agent_cli/agents/memory/add.py b/agent_cli/agents/memory/add.py
index 0675e9920..aea4b0301 100644
--- a/agent_cli/agents/memory/add.py
+++ b/agent_cli/agents/memory/add.py
@@ -6,7 +6,7 @@
import re
import sys
from datetime import UTC, datetime
-from pathlib import Path # noqa: TC003
+from pathlib import Path
from typing import TYPE_CHECKING, Any
import typer
@@ -127,16 +127,8 @@ def add(
"-c",
help="Conversation ID to add memories to.",
),
- memory_path: Path = typer.Option( # noqa: B008
- "./memory_db",
- "--memory-path",
- help="Path to the memory store.",
- ),
- git_versioning: bool = typer.Option(
- True, # noqa: FBT003
- "--git-versioning/--no-git-versioning",
- help="Commit changes to git.",
- ),
+ memory_path: Path | None = opts.MEMORY_PATH,
+ git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
quiet: bool = opts.QUIET,
config_file: str | None = opts.CONFIG_FILE,
print_args: bool = opts.PRINT_ARGS,
@@ -176,6 +168,8 @@ def add(
console.print("[red]No memories provided. Use arguments or --file.[/red]")
raise typer.Exit(1)
+ if memory_path is None:
+ memory_path = Path("./memory_db")
memory_path = memory_path.resolve()
records = _write_memories(memory_path, parsed, git_versioning)
diff --git a/agent_cli/agents/memory/proxy.py b/agent_cli/agents/memory/proxy.py
index 73906c62d..18796eedc 100644
--- a/agent_cli/agents/memory/proxy.py
+++ b/agent_cli/agents/memory/proxy.py
@@ -3,7 +3,7 @@
from __future__ import annotations
import logging
-from pathlib import Path # noqa: TC003
+from pathlib import Path
import typer
from rich.logging import RichHandler
@@ -15,57 +15,23 @@
@memory_app.command("proxy")
def proxy(
- memory_path: Path = typer.Option( # noqa: B008
- "./memory_db",
- help="Path to the memory store (files + derived vector index).",
- rich_help_panel="Memory Configuration",
- ),
+ memory_path: Path | None = opts.MEMORY_PATH,
openai_base_url: str | None = opts.OPENAI_BASE_URL,
embedding_model: str = opts.EMBEDDING_MODEL,
openai_api_key: str | None = opts.OPENAI_API_KEY,
- default_top_k: int = typer.Option(
- 5,
- help="Number of memory entries to retrieve per query.",
- rich_help_panel="Memory Configuration",
- ),
+ default_top_k: int = opts.MEMORY_TOP_K,
host: str = opts.SERVER_HOST,
port: int = typer.Option(
8100,
help="Port to bind to",
rich_help_panel="Server Configuration",
),
- max_entries: int = typer.Option(
- 500,
- help="Maximum stored memory entries per conversation (excluding summary).",
- rich_help_panel="Memory Configuration",
- ),
- mmr_lambda: float = typer.Option(
- 0.7,
- help="MMR lambda (0-1): higher favors relevance, lower favors diversity.",
- rich_help_panel="Memory Configuration",
- ),
- recency_weight: float = typer.Option(
- 0.2,
- help="Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance).",
- rich_help_panel="Memory Configuration",
- ),
- score_threshold: float = typer.Option(
- 0.35,
- help="Minimum semantic relevance threshold (0.0-1.0). Memories below this score are discarded to reduce noise.",
- rich_help_panel="Memory Configuration",
- ),
- summarization: bool = typer.Option(
- True, # noqa: FBT003
- "--summarization/--no-summarization",
- help="Enable automatic fact extraction and summaries.",
- rich_help_panel="Memory Configuration",
- ),
- git_versioning: bool = typer.Option(
- True, # noqa: FBT003
- "--git-versioning/--no-git-versioning",
- help="Enable automatic git commit of memory changes.",
- rich_help_panel="Memory Configuration",
- ),
+ max_entries: int = opts.MEMORY_MAX_ENTRIES,
+ mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+ recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+ score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+ summarization: bool = opts.MEMORY_SUMMARIZATION,
+ git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
log_level: str = opts.with_default(opts.LOG_LEVEL, "INFO"),
config_file: str | None = opts.CONFIG_FILE,
print_args: bool = opts.PRINT_ARGS,
@@ -127,6 +93,8 @@ def proxy(
logging.getLogger("chromadb").setLevel(logging.WARNING)
logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+ if memory_path is None:
+ memory_path = Path("./memory_db")
memory_path = memory_path.resolve()
entries_dir, _ = ensure_store_dirs(memory_path)
if openai_base_url is None:
diff --git a/agent_cli/config.py b/agent_cli/config.py
index 65c078dfa..db17115d8 100644
--- a/agent_cli/config.py
+++ b/agent_cli/config.py
@@ -224,6 +224,43 @@ def _expand_user_path(cls, v: str | None) -> Path | None:
return None
+# --- Panel: Memory Options ---
+
+
+MemoryMode = Literal["off", "tools", "auto"]
+
+
+class Memory(BaseModel):
+ """Configuration for the vector-backed memory system.
+
+ The memory system uses ChromaDB with vector embeddings for semantic search,
+ recency-aware scoring, and automatic fact reconciliation.
+
+ Modes:
+ - off: Memory disabled
+ - tools: LLM decides via add_memory/search_memory tools (default)
+ - auto: Automatic extraction and injection each turn
+ """
+
+ mode: MemoryMode = "tools"
+ memory_path: Path | None = None
+ embedding_model: str = "text-embedding-3-small"
+ top_k: int = 5
+ score_threshold: float = 0.35
+ recency_weight: float = 0.2
+ mmr_lambda: float = 0.7
+ enable_summarization: bool = True
+ enable_git_versioning: bool = False
+ max_entries: int = 500
+
+ @field_validator("memory_path", mode="before")
+ @classmethod
+ def _expand_user_path(cls, v: str | None) -> Path | None:
+ if v:
+ return Path(v).expanduser()
+ return None
+
+
def _config_path(config_path_str: str | None = None) -> Path | None:
"""Return a usable config path, expanding user directories."""
if config_path_str:
diff --git a/agent_cli/memory/client.py b/agent_cli/memory/client.py
index 3ca4762f6..6e8d62671 100644
--- a/agent_cli/memory/client.py
+++ b/agent_cli/memory/client.py
@@ -137,6 +137,32 @@ async def add(
)
evict_if_needed(self.collection, self.memory_path, conversation_id, self.max_entries)
+ async def extract_from_turn(
+ self,
+ user_message: str,
+ assistant_message: str,
+ conversation_id: str = "default",
+ model: str = DEFAULT_OPENAI_MODEL,
+ ) -> None:
+ """Extract and store facts from a conversation turn.
+
+ This is used for automatic memory extraction mode, where facts are
+ extracted from both user and assistant messages after each turn.
+ """
+ await extract_and_store_facts_and_summaries(
+ collection=self.collection,
+ memory_root=self.memory_path,
+ conversation_id=conversation_id,
+ user_message=user_message,
+ assistant_message=assistant_message,
+ openai_base_url=self.openai_base_url,
+ api_key=self.chat_api_key,
+ model=model,
+ enable_git_versioning=self.enable_git_versioning,
+ enable_summarization=self.enable_summarization,
+ )
+ evict_if_needed(self.collection, self.memory_path, conversation_id, self.max_entries)
+
async def search(
self,
query: str,
diff --git a/agent_cli/opts.py b/agent_cli/opts.py
index 1002066de..ac149b80f 100644
--- a/agent_cli/opts.py
+++ b/agent_cli/opts.py
@@ -2,6 +2,7 @@
import copy
from pathlib import Path
+from typing import Any
import typer
from typer.models import OptionInfo
@@ -9,7 +10,7 @@
from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL, DEFAULT_OPENAI_MODEL
-def with_default(option: OptionInfo, default: str) -> OptionInfo:
+def with_default(option: OptionInfo, default: Any) -> OptionInfo:
"""Create a copy of a typer Option with a different default value."""
opt = copy.copy(option)
opt.default = default
@@ -381,6 +382,63 @@ def _conf_callback(ctx: typer.Context, param: typer.CallbackParam, value: str) -
rich_help_panel="General Options",
)
+# --- Memory Options ---
+MEMORY_MODE: str = typer.Option(
+ "tools",
+ "--memory-mode",
+ help="Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction).",
+ rich_help_panel="Memory Options",
+)
+MEMORY_PATH: Path | None = typer.Option(
+ None,
+ "--memory-path",
+ help="Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db",
+ rich_help_panel="Memory Options",
+)
+MEMORY_TOP_K: int = typer.Option(
+ 5,
+ "--memory-top-k",
+ help="Number of memories to retrieve per search.",
+ rich_help_panel="Memory Options",
+)
+MEMORY_SCORE_THRESHOLD: float = typer.Option(
+ 0.35,
+ "--memory-score-threshold",
+ help="Minimum relevance score threshold for memory retrieval (0.0-1.0).",
+ rich_help_panel="Memory Options",
+)
+MEMORY_MAX_ENTRIES: int = typer.Option(
+ 500,
+ "--memory-max-entries",
+ help="Maximum stored memory entries per conversation (excluding summary).",
+ rich_help_panel="Memory Options",
+)
+MEMORY_MMR_LAMBDA: float = typer.Option(
+ 0.7,
+ "--memory-mmr-lambda",
+ help="MMR lambda (0-1): higher favors relevance, lower favors diversity.",
+ rich_help_panel="Memory Options",
+)
+MEMORY_RECENCY_WEIGHT: float = typer.Option(
+ 0.2,
+ "--memory-recency-weight",
+ help="Recency score weight (0.0-1.0). Controls freshness vs. relevance.",
+ rich_help_panel="Memory Options",
+)
+MEMORY_SUMMARIZATION: bool = typer.Option(
+ True, # noqa: FBT003
+ "--memory-summarization/--no-memory-summarization",
+ help="Enable automatic fact extraction and summaries.",
+ rich_help_panel="Memory Options",
+)
+MEMORY_GIT_VERSIONING: bool = typer.Option(
+ False, # noqa: FBT003
+ "--memory-git-versioning/--no-memory-git-versioning",
+ help="Enable automatic git commit of memory changes.",
+ rich_help_panel="Memory Options",
+)
+
+
# --- Server Options ---
SERVER_HOST: str = typer.Option(
"0.0.0.0", # noqa: S104
diff --git a/docs/architecture/memory.md b/docs/architecture/memory.md
index f2cb3600f..6a70e50ff 100644
--- a/docs/architecture/memory.md
+++ b/docs/architecture/memory.md
@@ -39,7 +39,8 @@ A local-first system that gives LLMs persistent memory across conversations, wit
### Related
-- [memory command](../commands/memory.md) - How to run the memory proxy and add memories
+- [chat command](../commands/chat.md) - Voice-based chat agent with integrated memory
+- [memory command](../commands/memory.md) - Memory proxy server for any OpenAI-compatible app
- [Configuration](../configuration.md) - Config file keys and defaults
- [RAG System Architecture](rag.md) - Related retrieval stack for documents
- [rag-proxy command](../commands/rag-proxy.md) - Document retrieval server
diff --git a/docs/commands/chat.md b/docs/commands/chat.md
index fc3a9fbb8..f83b4a7ff 100644
--- a/docs/commands/chat.md
+++ b/docs/commands/chat.md
@@ -165,6 +165,26 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
| `--history-dir` | `~/.config/agent-cli/history` | Directory to store conversation history. |
| `--last-n-messages` | `50` | Number of messages to include in the conversation history. Set to 0 to disable history. |
+### Memory Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--memory-mode` | `tools` | Memory mode: 'off' (disabled), 'tools' (LLM decides via tools), 'auto' (automatic extraction). |
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-top-k` | `5` | Number of memories to retrieve per search. |
+| `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+| `--memory-max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
+| `--memory-mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
+| `--memory-recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. |
+| `--memory-summarization/--no-memory-summarization` | `true` | Enable automatic fact extraction and summaries. |
+| `--memory-git-versioning/--no-memory-git-versioning` | `false` | Enable automatic git commit of memory changes. |
+
+### LLM Configuration
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--embedding-model` | `text-embedding-3-small` | Embedding model to use for vectorization. |
+
### General Options
| Option | Default | Description |
@@ -179,22 +199,53 @@ agent-cli chat --last-n-messages 100 --history-dir ~/.my-chat-history
-## Available Tools
+## Memory System
-The chat agent has access to tools that let it interact with your system:
+The chat agent includes a built-in long-term memory system that allows it to remember information across conversations.
+
+The memory system uses a **vector-backed architecture** with semantic search. This provides:
+
+- **Semantic search**: Find relevant memories based on meaning, not just keywords
+- **Recency-aware scoring**: Recent memories are weighted higher
+- **Diversity selection (MMR)**: Avoids redundant memories in context
+- **Automatic reconciliation**: Contradicting facts are updated, not duplicated
+
+### Memory Modes
+
+Use `--memory-mode` to control how memory works:
+
+| Mode | Description |
+|------|-------------|
+| `off` | Memory system disabled |
+| `tools` (default) | LLM decides when to store/retrieve via tools. LLM asks permission before storing. |
+| `auto` | Automatic extraction after each conversation turn (no LLM tools exposed). |
+
+Example:
+
+```bash
+# Automatic memory extraction (no prompting, just remembers)
+agent-cli chat --memory-mode auto
+
+# Disable memory entirely
+agent-cli chat --memory-mode off
+```
> [!NOTE]
-> The memory tools below use a simple, built-in JSON storage system.
-> For the advanced, vector-backed memory system, see the [`memory`](memory.md) command.
+> The memory system requires the `[memory]` extra: `pip install "agent-cli[memory]"`.
+> If not installed, memory tools will not be available.
+
+For more details on how the memory system works, see [Memory System Architecture](../architecture/memory.md).
+
+## Available Tools
+
+The chat agent has access to tools that let it interact with your system:
- **read_file**: Read file contents
- **execute_code**: Run a single command (no shell features like pipes or redirects)
- **duckduckgo_search**: Search the web via DuckDuckGo
-- **add_memory**: Store information for future conversations
-- **search_memory**: Search stored memories
-- **update_memory**: Update existing memories
+- **add_memory**: Store information for future conversations (uses [vector memory](../architecture/memory.md))
+- **search_memory**: Search stored memories with semantic search
- **list_all_memories**: List all stored memories
-- **list_memory_categories**: Show memory category summary
## Example Conversation
diff --git a/docs/commands/memory.md b/docs/commands/memory.md
index 5ff97a459..6277f3c7a 100644
--- a/docs/commands/memory.md
+++ b/docs/commands/memory.md
@@ -69,18 +69,18 @@ agent-cli chat --openai-base-url http://localhost:8100/v1 --llm-provider openai
-### Memory Configuration
+### Memory Options
| Option | Default | Description |
|--------|---------|-------------|
-| `--memory-path` | `./memory_db` | Path to the memory store (files + derived vector index). |
-| `--default-top-k` | `5` | Number of memory entries to retrieve per query. |
-| `--max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
-| `--mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
-| `--recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance). |
-| `--score-threshold` | `0.35` | Minimum semantic relevance threshold (0.0-1.0). Memories below this score are discarded to reduce noise. |
-| `--summarization/--no-summarization` | `true` | Enable automatic fact extraction and summaries. |
-| `--git-versioning/--no-git-versioning` | `true` | Enable automatic git commit of memory changes. |
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-top-k` | `5` | Number of memories to retrieve per search. |
+| `--memory-max-entries` | `500` | Maximum stored memory entries per conversation (excluding summary). |
+| `--memory-mmr-lambda` | `0.7` | MMR lambda (0-1): higher favors relevance, lower favors diversity. |
+| `--memory-recency-weight` | `0.2` | Recency score weight (0.0-1.0). Controls freshness vs. relevance. |
+| `--memory-score-threshold` | `0.35` | Minimum relevance score threshold for memory retrieval (0.0-1.0). |
+| `--memory-summarization/--no-memory-summarization` | `true` | Enable automatic fact extraction and summaries. |
+| `--memory-git-versioning/--no-memory-git-versioning` | `true` | Enable automatic git commit of memory changes. |
### LLM: OpenAI-compatible
@@ -162,8 +162,13 @@ agent-cli memory add -c work "Project deadline is Friday"
|--------|---------|-------------|
| `--file` | - | Read memories from file. Use '-' for stdin. Supports JSON array, JSON object with 'memories' key, or plain text (one per line). |
| `--conversation-id` | `default` | Conversation ID to add memories to. |
-| `--memory-path` | `./memory_db` | Path to the memory store. |
-| `--git-versioning/--no-git-versioning` | `true` | Commit changes to git. |
+
+### Memory Options
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--memory-path` | - | Path for memory database storage. Default: ~/.config/agent-cli/memory/vector_db |
+| `--memory-git-versioning/--no-memory-git-versioning` | `true` | Enable automatic git commit of memory changes. |
### General Options
@@ -221,6 +226,8 @@ See [Memory System Architecture](../architecture/memory.md) for the full schema
## Related
+- [chat command](chat.md) - Voice-based chat agent with integrated memory
+- [Memory System Architecture](../architecture/memory.md) - Full technical specification
- [Configuration](../configuration.md) - Config file keys for memory proxy defaults
- [rag-proxy](rag-proxy.md) - Document RAG proxy server (contrast with memory)
- [RAG System Architecture](../architecture/rag.md) - How RAG indexing and retrieval works
diff --git a/tests/agents/test_interactive.py b/tests/agents/test_interactive.py
index bc4cc7292..e51648ddc 100644
--- a/tests/agents/test_interactive.py
+++ b/tests/agents/test_interactive.py
@@ -140,6 +140,7 @@ async def test_async_main_list_devices(tmp_path: Path) -> None:
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
gemini_tts_cfg=gemini_tts_cfg,
+ memory_cfg=config.Memory(),
)
mock_setup_devices.assert_called_once()
@@ -209,6 +210,7 @@ async def test_async_main_list_output_devices(tmp_path: Path) -> None:
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
gemini_tts_cfg=gemini_tts_cfg,
+ memory_cfg=config.Memory(),
)
mock_setup_devices.assert_called_once()
@@ -265,6 +267,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
with (
patch("agent_cli.agents.chat.setup_devices", return_value=(1, "mock_input", 1)),
+ patch("agent_cli.agents.chat._try_init_memory", return_value=None),
patch("agent_cli.agents.chat.asr.create_transcriber") as mock_create_transcriber,
patch(
"agent_cli.agents.chat.get_llm_response",
@@ -302,6 +305,7 @@ async def test_async_main_full_loop(tmp_path: Path) -> None:
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
gemini_tts_cfg=gemini_tts_cfg,
+ memory_cfg=config.Memory(),
)
# Verify that the core functions were called
diff --git a/tests/agents/test_interactive_extra.py b/tests/agents/test_interactive_extra.py
index 6d14bafec..dcb020877 100644
--- a/tests/agents/test_interactive_extra.py
+++ b/tests/agents/test_interactive_extra.py
@@ -68,6 +68,9 @@ async def test_handle_conversation_turn_no_llm_response():
await _handle_conversation_turn(
stop_event=stop_event,
conversation_history=conversation_history,
+ memory_client=None,
+ conversation_id="test",
+ memory_cfg=config.Memory(),
provider_cfg=provider_cfg,
general_cfg=general_cfg,
history_cfg=history_cfg,
@@ -138,6 +141,9 @@ async def test_handle_conversation_turn_no_instruction():
await _handle_conversation_turn(
stop_event=stop_event,
conversation_history=conversation_history,
+ memory_client=None,
+ conversation_id="test",
+ memory_cfg=config.Memory(),
provider_cfg=provider_cfg,
general_cfg=general_cfg,
history_cfg=history_cfg,
@@ -260,5 +266,6 @@ async def test_async_main_exception_handling():
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
gemini_tts_cfg=gemini_tts_cfg,
+ memory_cfg=config.Memory(),
)
mock_console.print_exception.assert_called_once()
diff --git a/tests/agents/test_memory_add.py b/tests/agents/test_memory_add.py
index d81e300c9..52c0f3064 100644
--- a/tests/agents/test_memory_add.py
+++ b/tests/agents/test_memory_add.py
@@ -154,7 +154,7 @@ def test_memory_add_single_memory(tmp_path: Path) -> None:
"User likes Python",
"--memory-path",
str(memory_path),
- "--no-git-versioning",
+ "--no-memory-git-versioning",
],
)
assert result.exit_code == 0
@@ -181,7 +181,7 @@ def test_memory_add_multiple_memories(tmp_path: Path) -> None:
"Fact three",
"--memory-path",
str(memory_path),
- "--no-git-versioning",
+ "--no-memory-git-versioning",
],
)
assert result.exit_code == 0
@@ -203,7 +203,7 @@ def test_memory_add_from_file(tmp_path: Path) -> None:
str(input_file),
"--memory-path",
str(memory_path),
- "--no-git-versioning",
+ "--no-memory-git-versioning",
],
)
assert result.exit_code == 0
@@ -225,7 +225,7 @@ def test_memory_add_with_conversation_id(tmp_path: Path) -> None:
"work",
"--memory-path",
str(memory_path),
- "--no-git-versioning",
+ "--no-memory-git-versioning",
],
)
assert result.exit_code == 0
@@ -247,7 +247,7 @@ def test_memory_add_no_memories_error(tmp_path: Path) -> None:
"add",
"--memory-path",
str(memory_path),
- "--no-git-versioning",
+ "--no-memory-git-versioning",
],
)
assert result.exit_code == 1
@@ -265,7 +265,7 @@ def test_memory_add_quiet_mode(tmp_path: Path) -> None:
"Silent fact",
"--memory-path",
str(memory_path),
- "--no-git-versioning",
+ "--no-memory-git-versioning",
"--quiet",
],
)
diff --git a/tests/memory/test_memory_integration.py b/tests/memory/test_memory_integration.py
index 28c7d48a9..e3dfcaf44 100644
--- a/tests/memory/test_memory_integration.py
+++ b/tests/memory/test_memory_integration.py
@@ -15,6 +15,9 @@
from agent_cli.constants import DEFAULT_OPENAI_EMBEDDING_MODEL
from agent_cli.memory import api as memory_api
+# Extend timeout for these tests - Windows SSL initialization is slow
+pytestmark = pytest.mark.timeout(30)
+
class _DummyReranker:
def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
diff --git a/tests/test_memory_tools.py b/tests/test_memory_tools.py
index 6c017b0a4..dabe3ae73 100644
--- a/tests/test_memory_tools.py
+++ b/tests/test_memory_tools.py
@@ -1,117 +1,567 @@
-"""Tests for the memory tools."""
+"""Tests for the memory tools in _tools.py."""
from __future__ import annotations
-import json
from pathlib import Path
-from unittest.mock import patch
-
-import pytest # noqa: TC002
-
-from agent_cli import _tools
+from unittest.mock import AsyncMock, MagicMock
+import pytest
-def test_get_memory_file_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
- """Test the _get_memory_file_path function."""
- # Test with AGENT_CLI_HISTORY_DIR set
- history_dir = tmp_path / "history"
- monkeypatch.setenv("AGENT_CLI_HISTORY_DIR", str(history_dir))
- path = _tools._get_memory_file_path()
- assert path == history_dir / "long_term_memory.json"
-
- # Test without AGENT_CLI_HISTORY_DIR set
- monkeypatch.delenv("AGENT_CLI_HISTORY_DIR", raising=False)
- path = _tools._get_memory_file_path()
- assert path == Path.home() / ".config" / "agent-cli" / "memory" / "long_term_memory.json"
-
-
-def test_load_and_save_memories(tmp_path: Path) -> None:
- """Test the _load_memories and _save_memories functions."""
- memory_file = tmp_path / "long_term_memory.json"
- with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
- # Test loading from a non-existent file
- memories = _tools._load_memories()
- assert memories == []
-
- # Test saving and then loading
- memories_to_save = [{"id": 1, "content": "test"}]
- _tools._save_memories(memories_to_save)
-
- loaded_memories = _tools._load_memories()
- assert loaded_memories == memories_to_save
-
- # Verify the file content
- with memory_file.open("r") as f:
- assert json.load(f) == memories_to_save
-
-
-def test_add_and_search_memory(tmp_path: Path) -> None:
- """Test the add_memory and search_memory functions."""
- memory_file = tmp_path / "long_term_memory.json"
- with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
- # Test searching in an empty memory
- assert "No memories found" in _tools.search_memory("test")
-
- # Test adding a memory
- result = _tools.add_memory("test content", "test_category", "tag1, tag2")
- assert "Memory added successfully with ID 1" in result
-
- # Test searching for the new memory
- search_result = _tools.search_memory("test content")
- assert "ID: 1" in search_result
- assert "Category: test_category" in search_result
- assert "Content: test content" in search_result
- assert "Tags: tag1, tag2" in search_result
-
- # Test searching with a category filter
- search_result_cat = _tools.search_memory("test", category="test_category")
- assert "ID: 1" in search_result_cat
-
- # Test searching with a non-matching category
- search_result_no_cat = _tools.search_memory("test", category="wrong_category")
- assert "No memories found" in search_result_no_cat
-
-
-def test_update_memory(tmp_path: Path) -> None:
- """Test the update_memory function."""
- memory_file = tmp_path / "long_term_memory.json"
- with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
- # Add a memory to work with
- _tools.add_memory("original content", "original_category", "original_tag")
-
- # Test updating a non-existent memory
- assert "not found" in _tools.update_memory(2, content="new")
-
- # Test updating the existing memory
- update_result = _tools.update_memory(1, content="new content", category="new_category")
- assert "updated successfully" in update_result
-
- # Verify the update
- search_result = _tools.search_memory("new content")
- assert "Category: new_category" in search_result
-
-
-def test_list_all_and_categories(tmp_path: Path) -> None:
- """Test the list_all_memories and list_memory_categories functions."""
- memory_file = tmp_path / "long_term_memory.json"
- with patch("agent_cli._tools._get_memory_file_path", return_value=memory_file):
- # Test with no memories
- assert "No memories stored" in _tools.list_all_memories()
- assert "No memories found" in _tools.list_memory_categories()
-
- # Add some memories
- _tools.add_memory("content1", "cat1", "tag1")
- _tools.add_memory("content2", "cat2", "tag2")
- _tools.add_memory("content3", "cat1", "tag3")
-
- # Test list_all_memories
- list_all_result = _tools.list_all_memories()
- assert "Showing 3 of 3 total memories" in list_all_result
- assert "ID: 1" in list_all_result
- assert "ID: 2" in list_all_result
- assert "ID: 3" in list_all_result
-
- # Test list_memory_categories
- list_cat_result = _tools.list_memory_categories()
- assert "cat1: 2 memories" in list_cat_result
- assert "cat2: 1 memories" in list_cat_result
+from agent_cli._tools import (
+ MemoryTools,
+ _format_memory_content,
+ create_memory_tools,
+ tools,
+)
+from agent_cli.agents.chat import (
+ _get_conversation_id,
+ _maybe_extract_memories,
+ _maybe_init_memory,
+ _maybe_retrieve_memories,
+)
+from agent_cli.config import History, Memory, OpenAILLM
+
+# --- Tests for _format_memory_content ---
+
+
+def test_format_memory_content_basic() -> None:
+ """Test basic memory content formatting."""
+ result = _format_memory_content("User likes Python", "preferences", "")
+ assert result == "[preferences] User likes Python"
+
+
+def test_format_memory_content_with_tags() -> None:
+ """Test memory content formatting with tags."""
+ result = _format_memory_content("User likes Python", "preferences", "programming, languages")
+ assert result == "[preferences] User likes Python (tags: programming, languages)"
+
+
+def test_format_memory_content_empty_category() -> None:
+ """Test memory content formatting with empty category."""
+ result = _format_memory_content("Some content", "", "")
+ assert result == "[] Some content"
+
+
+# --- Tests for MemoryTools._check ---
+
+
+def test_memory_tools_check_with_no_client() -> None:
+ """Test that _check returns error when client is None."""
+ mt = MemoryTools(None, "test_conversation")
+ error = mt._check()
+ assert error is not None
+ assert "Memory system not initialized" in error
+ assert "pip install 'agent-cli[memory]'" in error
+
+
+def test_memory_tools_check_with_client() -> None:
+ """Test that _check returns None when client exists."""
+ mock_client = MagicMock()
+ mt = MemoryTools(mock_client, "test_conversation")
+ error = mt._check()
+ assert error is None
+
+
+# --- Tests for MemoryTools.add_memory ---
+
+
+@pytest.mark.asyncio
+async def test_add_memory_without_client() -> None:
+ """Test add_memory returns error when no client."""
+ mt = MemoryTools(None, "test")
+ result = await mt.add_memory("content", "category", "tags")
+ assert "Error: Memory system not initialized" in result
+
+
+@pytest.mark.asyncio
+async def test_add_memory_success() -> None:
+ """Test successful memory addition."""
+ mock_client = MagicMock()
+ mock_client.add = AsyncMock()
+
+ mt = MemoryTools(mock_client, "test_conversation")
+ result = await mt.add_memory("User likes coffee", "preferences", "food")
+
+ assert result == "Memory added successfully."
+ mock_client.add.assert_called_once_with(
+ "[preferences] User likes coffee (tags: food)",
+ conversation_id="test_conversation",
+ )
+
+
+@pytest.mark.asyncio
+async def test_add_memory_exception() -> None:
+ """Test add_memory handles exceptions."""
+ mock_client = MagicMock()
+ mock_client.add = AsyncMock(side_effect=RuntimeError("Database error"))
+
+ mt = MemoryTools(mock_client, "test")
+ result = await mt.add_memory("content", "category", "tags")
+
+ assert "Error adding memory" in result
+ assert "Database error" in result
+
+
+# --- Tests for MemoryTools.search_memory ---
+
+
+@pytest.mark.asyncio
+async def test_search_memory_without_client() -> None:
+ """Test search_memory returns error when no client."""
+ mt = MemoryTools(None, "test")
+ result = await mt.search_memory("query")
+ assert "Error: Memory system not initialized" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_no_results() -> None:
+ """Test search_memory with no matching results."""
+ mock_retrieval = MagicMock()
+ mock_retrieval.entries = []
+
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+ mt = MemoryTools(mock_client, "test")
+ result = await mt.search_memory("nonexistent")
+
+ assert "No memories found matching 'nonexistent'" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_with_results() -> None:
+ """Test search_memory returns formatted results."""
+ # Create mock entries
+ entry1 = MagicMock()
+ entry1.content = "User likes Python"
+ entry1.score = 0.95
+
+ entry2 = MagicMock()
+ entry2.content = "User prefers dark mode"
+ entry2.score = 0.87
+
+ mock_retrieval = MagicMock()
+ mock_retrieval.entries = [entry1, entry2]
+
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+ mt = MemoryTools(mock_client, "test")
+ result = await mt.search_memory("preferences")
+
+ assert "User likes Python" in result
+ assert "User prefers dark mode" in result
+ assert "relevance: 0.95" in result
+ assert "relevance: 0.87" in result
+
+
+@pytest.mark.asyncio
+async def test_search_memory_with_category() -> None:
+ """Test search_memory includes category in query."""
+ mock_retrieval = MagicMock()
+ mock_retrieval.entries = []
+
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+ mt = MemoryTools(mock_client, "test_conv")
+ await mt.search_memory("coffee", category="preferences")
+
+ # Verify category is prepended to the query
+ mock_client.search.assert_called_once_with(
+ "preferences coffee",
+ conversation_id="test_conv",
+ )
+
+
+@pytest.mark.asyncio
+async def test_search_memory_exception() -> None:
+ """Test search_memory handles exceptions."""
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(side_effect=RuntimeError("Search failed"))
+
+ mt = MemoryTools(mock_client, "test")
+ result = await mt.search_memory("query")
+
+ assert "Error searching memory" in result
+ assert "Search failed" in result
+
+
+# --- Tests for MemoryTools.list_all_memories ---
+
+
+def test_list_all_memories_without_client() -> None:
+ """Test list_all_memories returns error when no client."""
+ mt = MemoryTools(None, "test")
+ result = mt.list_all_memories()
+ assert "Error: Memory system not initialized" in result
+
+
+def test_list_all_memories_empty() -> None:
+ """Test list_all_memories with no stored memories."""
+ mock_client = MagicMock()
+ mock_client.list_all = MagicMock(return_value=[])
+
+ mt = MemoryTools(mock_client, "test")
+ result = mt.list_all_memories()
+
+ assert result == "No memories stored yet."
+
+
+def test_list_all_memories_with_entries() -> None:
+ """Test list_all_memories returns formatted list."""
+ entries = [
+ {"content": "User likes Python", "role": "memory", "created_at": "2024-01-01T10:00:00"},
+ {
+ "content": "User lives in Amsterdam",
+ "role": "memory",
+ "created_at": "2024-01-02T12:00:00",
+ },
+ ]
+ mock_client = MagicMock()
+ mock_client.list_all = MagicMock(return_value=entries)
+
+ mt = MemoryTools(mock_client, "test")
+ result = mt.list_all_memories()
+
+ assert "Showing 2 of 2 total memories" in result
+ assert "User likes Python" in result
+ assert "User lives in Amsterdam" in result
+ assert "[memory]" in result
+
+
+def test_list_all_memories_with_limit() -> None:
+ """Test list_all_memories respects limit parameter."""
+ entries = [
+ {"content": f"Memory {i}", "role": "memory", "created_at": "2024-01-01"} for i in range(5)
+ ]
+ mock_client = MagicMock()
+ mock_client.list_all = MagicMock(return_value=entries)
+
+ mt = MemoryTools(mock_client, "test")
+ result = mt.list_all_memories(limit=3)
+
+ assert "Showing 3 of 5 total memories" in result
+ assert "... and 2 more memories" in result
+
+
+def test_list_all_memories_exception() -> None:
+ """Test list_all_memories handles exceptions."""
+ mock_client = MagicMock()
+ mock_client.list_all = MagicMock(side_effect=RuntimeError("List failed"))
+
+ mt = MemoryTools(mock_client, "test")
+ result = mt.list_all_memories()
+
+ assert "Error listing memories" in result
+ assert "List failed" in result
+
+
+# --- Tests for create_memory_tools ---
+
+
+def test_create_memory_tools_returns_list() -> None:
+ """Test create_memory_tools returns a list of Tool objects."""
+ mock_client = MagicMock()
+ result = create_memory_tools(mock_client, "test")
+
+ assert isinstance(result, list)
+ assert len(result) == 3 # add_memory, search_memory, list_all_memories
+
+
+def test_create_memory_tools_with_none_client() -> None:
+ """Test create_memory_tools works with None client."""
+ result = create_memory_tools(None, "test")
+
+ assert isinstance(result, list)
+ assert len(result) == 3
+
+
+def test_create_memory_tools_read_only() -> None:
+ """Test create_memory_tools with read_only=True excludes add_memory."""
+ mock_client = MagicMock()
+ result = create_memory_tools(mock_client, "test", read_only=True)
+
+ assert isinstance(result, list)
+ assert len(result) == 2 # Only search_memory and list_all_memories
+
+
+def test_create_memory_tools_read_only_false() -> None:
+ """Test create_memory_tools with read_only=False includes add_memory."""
+ mock_client = MagicMock()
+ result = create_memory_tools(mock_client, "test", read_only=False)
+
+ assert isinstance(result, list)
+ assert len(result) == 3 # add_memory, search_memory, list_all_memories
+
+
+# --- Tests for tools function ---
+
+
+def test_tools_returns_all_expected_tools() -> None:
+ """Test tools function returns all expected tools."""
+ result = tools(None, "test")
+
+ assert isinstance(result, list)
+ # Should have: read_file, execute_code, 3 memory tools, duckduckgo_search
+ assert len(result) == 6
+
+
+def test_tools_with_memory_client() -> None:
+ """Test tools function works with a memory client."""
+ mock_client = MagicMock()
+ result = tools(mock_client, "conversation_123")
+
+ assert isinstance(result, list)
+ assert len(result) == 6
+
+
+def test_tools_memory_read_only() -> None:
+ """Test tools function with memory_read_only=True has fewer memory tools."""
+ mock_client = MagicMock()
+ result = tools(mock_client, "test", memory_read_only=True)
+
+ assert isinstance(result, list)
+ # Should have: read_file, execute_code, 2 memory tools (no add_memory), duckduckgo_search
+ assert len(result) == 5
+
+
+def test_tools_memory_read_only_false() -> None:
+ """Test tools function with memory_read_only=False includes all memory tools."""
+ mock_client = MagicMock()
+ result = tools(mock_client, "test", memory_read_only=False)
+
+ assert isinstance(result, list)
+ # Should have: read_file, execute_code, 3 memory tools, duckduckgo_search
+ assert len(result) == 6
+
+
+# --- Tests for chat.py integration functions ---
+
+
+def test_get_conversation_id_with_history_dir() -> None:
+ """Test _get_conversation_id generates stable ID from history dir."""
+ history_cfg = History(history_dir=Path("/home/user/.chat-history"))
+ result = _get_conversation_id(history_cfg)
+
+ # Should be a 12-character hex string
+ assert len(result) == 12
+ assert all(c in "0123456789abcdef" for c in result)
+
+
+def test_get_conversation_id_without_history_dir() -> None:
+ """Test _get_conversation_id returns 'default' when no history dir."""
+ history_cfg = History(history_dir=None)
+ result = _get_conversation_id(history_cfg)
+
+ assert result == "default"
+
+
+def test_get_conversation_id_is_stable() -> None:
+ """Test _get_conversation_id produces same ID for same path."""
+ history_cfg1 = History(history_dir=Path("/some/path"))
+ history_cfg2 = History(history_dir=Path("/some/path"))
+
+ assert _get_conversation_id(history_cfg1) == _get_conversation_id(history_cfg2)
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_off_mode() -> None:
+ """Test _maybe_extract_memories does nothing when mode is not 'auto'."""
+ memory_cfg = Memory(mode="tools") # Not 'auto'
+ mock_client = MagicMock()
+ mock_client.extract_from_turn = AsyncMock()
+
+ await _maybe_extract_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="test",
+ response_text="response",
+ conversation_id="test",
+ model="gpt-4",
+ quiet=True,
+ )
+
+ # Should not call extract_from_turn when mode is not 'auto'
+ mock_client.extract_from_turn.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_auto_mode() -> None:
+ """Test _maybe_extract_memories extracts when mode is 'auto'."""
+ memory_cfg = Memory(mode="auto")
+ mock_client = MagicMock()
+ mock_client.extract_from_turn = AsyncMock()
+
+ await _maybe_extract_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="Hello world",
+ response_text="Hi there!",
+ conversation_id="conv123",
+ model="gpt-4",
+ quiet=True,
+ )
+
+ mock_client.extract_from_turn.assert_called_once_with(
+ user_message="Hello world",
+ assistant_message="Hi there!",
+ conversation_id="conv123",
+ model="gpt-4",
+ )
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_handles_exception() -> None:
+ """Test _maybe_extract_memories handles exceptions gracefully."""
+ memory_cfg = Memory(mode="auto")
+ mock_client = MagicMock()
+ mock_client.extract_from_turn = AsyncMock(side_effect=RuntimeError("Extraction failed"))
+
+ # Should not raise, just log warning
+ await _maybe_extract_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="test",
+ response_text="response",
+ conversation_id="test",
+ model="gpt-4",
+ quiet=True,
+ )
+
+
+@pytest.mark.asyncio
+async def test_maybe_extract_memories_no_client() -> None:
+ """Test _maybe_extract_memories does nothing when client is None."""
+ memory_cfg = Memory(mode="auto")
+
+ # Should not raise even with None client
+ await _maybe_extract_memories(
+ memory_cfg=memory_cfg,
+ memory_client=None,
+ instruction="test",
+ response_text="response",
+ conversation_id="test",
+ model="gpt-4",
+ quiet=True,
+ )
+
+
+def test_maybe_init_memory_off_mode() -> None:
+ """Test _maybe_init_memory returns None when mode is 'off'."""
+ memory_cfg = Memory(mode="off")
+ history_cfg = History()
+ openai_cfg = OpenAILLM(llm_openai_model="gpt-4o-mini")
+
+ result = _maybe_init_memory(memory_cfg, history_cfg, openai_cfg, quiet=True)
+ assert result is None
+
+
+# --- Tests for _maybe_retrieve_memories ---
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_off_mode() -> None:
+ """Test _maybe_retrieve_memories returns empty string when mode is not 'auto'."""
+ memory_cfg = Memory(mode="tools") # Not 'auto'
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock()
+
+ result = await _maybe_retrieve_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="test",
+ conversation_id="test",
+ )
+
+ assert result == ""
+ mock_client.search.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_auto_mode_with_results() -> None:
+ """Test _maybe_retrieve_memories returns formatted context in auto mode."""
+ memory_cfg = Memory(mode="auto", top_k=3)
+
+ # Create mock entries
+ entry1 = MagicMock()
+ entry1.content = "User likes pizza"
+ entry2 = MagicMock()
+ entry2.content = "User prefers Italian food"
+
+ mock_retrieval = MagicMock()
+ mock_retrieval.entries = [entry1, entry2]
+
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+ result = await _maybe_retrieve_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="What food do I like?",
+ conversation_id="conv123",
+ )
+
+ assert "" in result
+ assert "" in result
+ assert "User likes pizza" in result
+ assert "User prefers Italian food" in result
+ mock_client.search.assert_called_once_with(
+ query="What food do I like?",
+ conversation_id="conv123",
+ top_k=3,
+ )
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_auto_mode_no_results() -> None:
+ """Test _maybe_retrieve_memories returns empty string when no memories found."""
+ memory_cfg = Memory(mode="auto")
+
+ mock_retrieval = MagicMock()
+ mock_retrieval.entries = []
+
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(return_value=mock_retrieval)
+
+ result = await _maybe_retrieve_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="test",
+ conversation_id="test",
+ )
+
+ assert result == ""
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_no_client() -> None:
+ """Test _maybe_retrieve_memories returns empty string when client is None."""
+ memory_cfg = Memory(mode="auto")
+
+ result = await _maybe_retrieve_memories(
+ memory_cfg=memory_cfg,
+ memory_client=None,
+ instruction="test",
+ conversation_id="test",
+ )
+
+ assert result == ""
+
+
+@pytest.mark.asyncio
+async def test_maybe_retrieve_memories_handles_exception() -> None:
+ """Test _maybe_retrieve_memories handles exceptions gracefully."""
+ memory_cfg = Memory(mode="auto")
+ mock_client = MagicMock()
+ mock_client.search = AsyncMock(side_effect=RuntimeError("Search failed"))
+
+ # Should not raise, just return empty string
+ result = await _maybe_retrieve_memories(
+ memory_cfg=memory_cfg,
+ memory_client=mock_client,
+ instruction="test",
+ conversation_id="test",
+ )
+
+ assert result == ""