basnijholt · basnijholt · Jan 4, 2026 · Jan 4, 2026 · Jan 4, 2026 · Jan 4, 2026
diff --git a/README.md b/README.md
diff --git a/agent_cli/_tools.py b/agent_cli/_tools.py
diff --git a/agent_cli/agents/chat.py b/agent_cli/agents/chat.py
@@ -13,6 +13,7 @@
 from __future__ import annotations
 
 import asyncio
+import hashlib
 import json
 import logging
 import os
@@ -50,9 +51,100 @@
 if TYPE_CHECKING:
     from rich.live import Live
 
+    from agent_cli.memory.client import MemoryClient
+
 
 LOGGER = logging.getLogger(__name__)
 
+
+def _get_conversation_id(history_cfg: config.History) -> str:
+    """Generate a stable conversation ID from history configuration.
+
+    Uses a hash of the history directory path to ensure consistency across sessions.
+    """
+    if history_cfg.history_dir:
+        return hashlib.md5(
+            str(Path(history_cfg.history_dir).resolve()).encode(),
+            usedforsecurity=False,
+        ).hexdigest()[:12]
+    return "default"
+
+
+def _try_init_memory(
+    memory_cfg: config.Memory,
+    history_cfg: config.History,
+    openai_llm_cfg: config.OpenAILLM,
+    quiet: bool,
+) -> MemoryClient | None:
+    """Try to initialize the memory system.
+
+    Returns the MemoryClient if successful, None otherwise.
+    """
+    from agent_cli.memory.client import MemoryClient  # noqa: PLC0415
+
+    # Determine memory path
+    memory_path = memory_cfg.memory_path
+    if memory_path is None:
+        if history_cfg.history_dir:
+            memory_path = Path(history_cfg.history_dir).expanduser() / "vector_memory"
+        else:
+            memory_path = Path.home() / ".config" / "agent-cli" / "memory" / "vector_db"
+
+    # Determine OpenAI base URL for embeddings
+    openai_base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"
+
+    if not quiet:
+        console.print("[dim]Initializing memory system...[/dim]")
+
+    memory_client = MemoryClient(
+        memory_path=memory_path,
+        openai_base_url=openai_base_url,
+        embedding_model=memory_cfg.embedding_model,
+        embedding_api_key=openai_llm_cfg.openai_api_key,
+        chat_api_key=openai_llm_cfg.openai_api_key,
+        default_top_k=memory_cfg.top_k,
+        score_threshold=memory_cfg.score_threshold,
+        recency_weight=memory_cfg.recency_weight,
+        mmr_lambda=memory_cfg.mmr_lambda,
+        enable_summarization=memory_cfg.enable_summarization,
+        enable_git_versioning=memory_cfg.enable_git_versioning,
+        max_entries=memory_cfg.max_entries,
+        start_watcher=False,
+    )
+
+    # Start the memory client's file watcher
+    memory_client.start()
+
+    if not quiet:
+        console.print("[green]Memory system initialized[/green]")
+
+    return memory_client
+
+
+def _maybe_init_memory(
+    memory_cfg: config.Memory,
+    history_cfg: config.History,
+    openai_llm_cfg: config.OpenAILLM,
+    quiet: bool,
+) -> MemoryClient | None:
+    """Initialize memory if mode is not 'off', handling errors gracefully."""
+    if memory_cfg.mode == "off":
+        return None
+    try:
+        return _try_init_memory(memory_cfg, history_cfg, openai_llm_cfg, quiet)
+    except ImportError:
+        if not quiet:
+            console.print(
+                "[yellow]Memory system not available. "
+                "Install with: pip install 'agent-cli[memory]'[/yellow]",
+            )
+    except Exception as e:
+        if not quiet:
+            console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
+        LOGGER.warning("Failed to initialize memory: %s", e)
+    return None
+
+
 # --- Conversation History ---
 
 
@@ -74,9 +166,7 @@ class ConversationEntry(TypedDict):
 - execute_code: Execute a shell command.
 - add_memory: Add important information to long-term memory for future recall.
 - search_memory: Search your long-term memory for relevant information.
-- update_memory: Modify existing memories by ID when information changes.
-- list_all_memories: Show all stored memories with their IDs and details.
-- list_memory_categories: See what types of information you've remembered.
+- list_all_memories: Show all stored memories with their details.
 - duckduckgo_search: Search the web for current information.
 
 Memory Guidelines:
@@ -144,10 +234,67 @@ def _format_conversation_for_llm(history: list[ConversationEntry]) -> str:
     return "\n".join(formatted_lines)
 
 
+async def _maybe_extract_memories(
+    memory_cfg: config.Memory,
+    memory_client: MemoryClient | None,
+    instruction: str,
+    response_text: str,
+    conversation_id: str,
+    model: str,
+    quiet: bool,
+) -> None:
+    """Extract memories in auto mode, silently skip otherwise."""
+    if memory_cfg.mode != "auto" or memory_client is None:
+        return
+    try:
+        await memory_client.extract_from_turn(
+            user_message=instruction,
+            assistant_message=response_text,
+            conversation_id=conversation_id,
+            model=model,
+        )
+        if not quiet:
+            console.print("[dim]💾 Memory extraction complete[/dim]")
+    except Exception as e:
+        LOGGER.warning("Failed to extract memories: %s", e)
+
+
+async def _maybe_retrieve_memories(
+    memory_cfg: config.Memory,
+    memory_client: MemoryClient | None,
+    instruction: str,
+    conversation_id: str,
+) -> str:
+    """Retrieve relevant memories in auto mode for prompt injection.
+
+    Returns formatted memory context string, or empty string if not applicable.
+    """
+    if memory_cfg.mode != "auto" or memory_client is None:
+        return ""
+    try:
+        retrieval = await memory_client.search(
+            query=instruction,
+            conversation_id=conversation_id,
+            top_k=memory_cfg.top_k,
+        )
+        if not retrieval.entries:
+            return ""
+        lines = ["\n<relevant-memories>"]
+        lines.extend(f"- {entry.content}" for entry in retrieval.entries)
+        lines.append("</relevant-memories>")
+        return "\n".join(lines)
+    except Exception as e:
+        LOGGER.warning("Failed to retrieve memories: %s", e)
+        return ""
+
+
 async def _handle_conversation_turn(
     *,
     stop_event: InteractiveStopEvent,
     conversation_history: list[ConversationEntry],
+    memory_client: MemoryClient | None,
+    conversation_id: str,
+    memory_cfg: config.Memory,
     provider_cfg: config.ProviderSelection,
     general_cfg: config.General,
     history_cfg: config.History,
@@ -213,6 +360,15 @@ async def _handle_conversation_turn(
         instruction=instruction,
     )
 
+    # 3b. Auto-retrieve and inject memories in "auto" mode
+    memory_context = await _maybe_retrieve_memories(
+        memory_cfg,
+        memory_client,
+        instruction,
+        conversation_id,
+    )
+    system_prompt = SYSTEM_PROMPT + memory_context
+
     # 4. Get LLM response with timing
 
     start_time = time.monotonic()
@@ -230,16 +386,22 @@ async def _handle_conversation_turn(
         quiet=general_cfg.quiet,
         stop_event=stop_event,
     ):
+        # Memory tools access:
+        # - "off": no memory tools
+        # - "tools": full access (add, search, list)
+        # - "auto": read-only access (search, list) - extraction happens automatically
+        tool_memory_client = memory_client if memory_cfg.mode != "off" else None
+        memory_read_only = memory_cfg.mode == "auto"
         response_text = await get_llm_response(
-            system_prompt=SYSTEM_PROMPT,
+            system_prompt=system_prompt,
             agent_instructions=AGENT_INSTRUCTIONS,
             user_input=user_message_with_context,
             provider_cfg=provider_cfg,
             ollama_cfg=ollama_cfg,
             openai_cfg=openai_llm_cfg,
             gemini_cfg=gemini_llm_cfg,
             logger=LOGGER,
-            tools=tools(),
+            tools=tools(tool_memory_client, conversation_id, memory_read_only=memory_read_only),
             quiet=True,  # Suppress internal output since we're showing our own timer
             live=live,
         )
@@ -267,6 +429,20 @@ async def _handle_conversation_turn(
         },
     )
 
+    # 5b. Auto-extract memories in "auto" mode (run in background, don't block)
+    if memory_cfg.mode == "auto" and memory_client is not None:
+        asyncio.create_task(  # noqa: RUF006
+            _maybe_extract_memories(
+                memory_cfg,
+                memory_client,
+                instruction,
+                response_text,
+                conversation_id,
+                openai_llm_cfg.llm_openai_model,
+                general_cfg.quiet,
+            ),
+        )
+
     # 6. Save history
     if history_cfg.history_dir:
         history_path = Path(history_cfg.history_dir).expanduser()
@@ -318,8 +494,11 @@ async def _async_main(
     openai_tts_cfg: config.OpenAITTS,
     kokoro_tts_cfg: config.KokoroTTS,
     gemini_tts_cfg: config.GeminiTTS,
+    memory_cfg: config.Memory,
 ) -> None:
     """Main async function, consumes parsed arguments."""
+    memory_client = None
+
     try:
         device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
         if device_info is None:
@@ -329,6 +508,14 @@ async def _async_main(
         if audio_out_cfg.enable_tts:
             audio_out_cfg.output_device_index = tts_output_device_index
 
+        # Initialize memory system (if not disabled)
+        memory_client = _maybe_init_memory(
+            memory_cfg,
+            history_cfg,
+            openai_llm_cfg,
+            general_cfg.quiet,
+        )
+
         # Load conversation history
         conversation_history = []
         if history_cfg.history_dir:
@@ -342,6 +529,9 @@ async def _async_main(
                 history_cfg.last_n_messages,
             )
 
+        # Generate conversation ID for memory scoping
+        conversation_id = _get_conversation_id(history_cfg)
+
         with (
             maybe_live(not general_cfg.quiet) as live,
             signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
@@ -350,6 +540,9 @@ async def _async_main(
                 await _handle_conversation_turn(
                     stop_event=stop_event,
                     conversation_history=conversation_history,
+                    memory_client=memory_client,
+                    conversation_id=conversation_id,
+                    memory_cfg=memory_cfg,
                     provider_cfg=provider_cfg,
                     general_cfg=general_cfg,
                     history_cfg=history_cfg,
@@ -371,6 +564,10 @@ async def _async_main(
         if not general_cfg.quiet:
             console.print_exception()
         raise
+    finally:
+        # Clean up memory client
+        if memory_client is not None:
+            await memory_client.stop()
 
 
 @app.command("chat")
@@ -433,6 +630,17 @@ def chat(
         " Set to 0 to disable history.",
         rich_help_panel="History Options",
     ),
+    # --- Memory Options ---
+    memory_mode: str = opts.MEMORY_MODE,
+    memory_path: Path | None = opts.MEMORY_PATH,
+    embedding_model: str = opts.EMBEDDING_MODEL,
+    memory_top_k: int = opts.MEMORY_TOP_K,
+    memory_score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
+    memory_max_entries: int = opts.MEMORY_MAX_ENTRIES,
+    memory_mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
+    memory_recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
+    memory_summarization: bool = opts.MEMORY_SUMMARIZATION,
+    memory_git_versioning: bool = opts.MEMORY_GIT_VERSIONING,
     # --- General Options ---
     save_file: Path | None = opts.SAVE_FILE,
     log_level: str = opts.LOG_LEVEL,
@@ -535,6 +743,18 @@ def chat(
             history_dir=history_dir,
             last_n_messages=last_n_messages,
         )
+        memory_cfg = config.Memory(
+            mode=memory_mode,  # type: ignore[arg-type]
+            memory_path=memory_path,
+            embedding_model=embedding_model,
+            top_k=memory_top_k,
+            score_threshold=memory_score_threshold,
+            max_entries=memory_max_entries,
+            mmr_lambda=memory_mmr_lambda,
+            recency_weight=memory_recency_weight,
+            enable_summarization=memory_summarization,
+            enable_git_versioning=memory_git_versioning,
+        )
 
         asyncio.run(
             _async_main(
@@ -553,5 +773,6 @@ def chat(
                 openai_tts_cfg=openai_tts_cfg,
                 kokoro_tts_cfg=kokoro_tts_cfg,
                 gemini_tts_cfg=gemini_tts_cfg,
+                memory_cfg=memory_cfg,
             ),
         )
diff --git a/agent_cli/agents/memory/add.py b/agent_cli/agents/memory/add.py
@@ -6,7 +6,7 @@
 import re
 import sys
 from datetime import UTC, datetime
-from pathlib import Path  # noqa: TC003
+from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 import typer
@@ -127,16 +127,8 @@ def add(
         "-c",
         help="Conversation ID to add memories to.",
     ),
-    memory_path: Path = typer.Option(  # noqa: B008
-        "./memory_db",
-        "--memory-path",
-        help="Path to the memory store.",
-    ),
-    git_versioning: bool = typer.Option(
-        True,  # noqa: FBT003
-        "--git-versioning/--no-git-versioning",
-        help="Commit changes to git.",
-    ),
+    memory_path: Path | None = opts.MEMORY_PATH,
+    git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
     quiet: bool = opts.QUIET,
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
@@ -176,6 +168,8 @@ def add(
         console.print("[red]No memories provided. Use arguments or --file.[/red]")
         raise typer.Exit(1)
 
+    if memory_path is None:
+        memory_path = Path("./memory_db")
     memory_path = memory_path.resolve()
     records = _write_memories(memory_path, parsed, git_versioning)