Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
052cbdd
feat(chat): integrate advanced vector-backed memory system
basnijholt Jan 4, 2026
6006d13
Merge 052cbdd64ebc6822806c1c0f15c489723af2132f into 853ea9de073db7d6a…
basnijholt Jan 4, 2026
06fd1ac
Update auto-generated docs
github-actions[bot] Jan 4, 2026
c432bf9
docs: update documentation for advanced memory integration
basnijholt Jan 4, 2026
da636d1
Merge c432bf965dbccaabe0efd2a8ba8524eb840834bb into 853ea9de073db7d6a…
basnijholt Jan 4, 2026
41c2f41
Update auto-generated docs
github-actions[bot] Jan 4, 2026
0557d36
refactor(chat): simplify memory system by removing dual-backend
basnijholt Jan 4, 2026
7acdf11
Merge 0557d36477366dc7eb5b5349de46739ee3fc3c75 into 853ea9de073db7d6a…
basnijholt Jan 4, 2026
731fac7
Update auto-generated docs
github-actions[bot] Jan 4, 2026
c098806
refactor(chat): remove globals and simplify memory tools
basnijholt Jan 4, 2026
f40fa09
refactor(chat): remove list_memory_categories tool
basnijholt Jan 4, 2026
05edfdd
feat(chat): add --memory-mode option for memory control
basnijholt Jan 4, 2026
e0e8d86
Merge 05edfdd418e22be8bdf747aff300bd1632ce5392 into d7905589723cde6df…
basnijholt Jan 4, 2026
868cb7e
Update auto-generated docs
github-actions[bot] Jan 4, 2026
d1ff04a
refactor: address review comments - move hashlib import to top, simpl…
basnijholt Jan 4, 2026
73a07f3
docs: regenerate auto-generated CLI help sections
basnijholt Jan 4, 2026
b0eabde
Merge 73a07f3be7bd78aa8b7804d09da9f8384bebb25b into d7905589723cde6df…
basnijholt Jan 4, 2026
29cab5a
Update auto-generated docs
github-actions[bot] Jan 4, 2026
d6fc660
fix(chat): use configured LLM model for memory extraction
basnijholt Jan 4, 2026
6b48758
fix: address review issues and add missing tests
basnijholt Jan 4, 2026
53539cb
Merge 6b4875873d7c93006fb7510bfb4cdaafdd36c822 into d7905589723cde6df…
basnijholt Jan 4, 2026
0b2a749
Update auto-generated docs
github-actions[bot] Jan 4, 2026
d104dcb
fix(tests): extend timeout for memory integration tests on Windows
basnijholt Jan 4, 2026
718cb70
fix(chat): run memory extraction in background (don't block conversat…
basnijholt Jan 4, 2026
17de4da
fix(chat): enable memory search in auto mode
basnijholt Jan 4, 2026
bf31697
feat(chat): auto-inject memories in auto mode
basnijholt Jan 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 108 additions & 60 deletions README.md

Large diffs are not rendered by default.

466 changes: 172 additions & 294 deletions agent_cli/_tools.py

Large diffs are not rendered by default.

231 changes: 226 additions & 5 deletions agent_cli/agents/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from __future__ import annotations

import asyncio
import hashlib
import json
import logging
import os
Expand Down Expand Up @@ -50,9 +51,100 @@
if TYPE_CHECKING:
from rich.live import Live

from agent_cli.memory.client import MemoryClient


LOGGER = logging.getLogger(__name__)


def _get_conversation_id(history_cfg: config.History) -> str:
"""Generate a stable conversation ID from history configuration.

Uses a hash of the history directory path to ensure consistency across sessions.
"""
if history_cfg.history_dir:
return hashlib.md5(
str(Path(history_cfg.history_dir).resolve()).encode(),
usedforsecurity=False,
).hexdigest()[:12]
return "default"


def _try_init_memory(
memory_cfg: config.Memory,
history_cfg: config.History,
openai_llm_cfg: config.OpenAILLM,
quiet: bool,
) -> MemoryClient | None:
"""Try to initialize the memory system.

Returns the MemoryClient if successful, None otherwise.
"""
from agent_cli.memory.client import MemoryClient # noqa: PLC0415

# Determine memory path
memory_path = memory_cfg.memory_path
if memory_path is None:
if history_cfg.history_dir:
memory_path = Path(history_cfg.history_dir).expanduser() / "vector_memory"
else:
memory_path = Path.home() / ".config" / "agent-cli" / "memory" / "vector_db"

# Determine OpenAI base URL for embeddings
openai_base_url = openai_llm_cfg.openai_base_url or "https://api.openai.com/v1"

if not quiet:
console.print("[dim]Initializing memory system...[/dim]")

memory_client = MemoryClient(
memory_path=memory_path,
openai_base_url=openai_base_url,
embedding_model=memory_cfg.embedding_model,
embedding_api_key=openai_llm_cfg.openai_api_key,
chat_api_key=openai_llm_cfg.openai_api_key,
default_top_k=memory_cfg.top_k,
score_threshold=memory_cfg.score_threshold,
recency_weight=memory_cfg.recency_weight,
mmr_lambda=memory_cfg.mmr_lambda,
enable_summarization=memory_cfg.enable_summarization,
enable_git_versioning=memory_cfg.enable_git_versioning,
max_entries=memory_cfg.max_entries,
start_watcher=False,
)

# Start the memory client's file watcher
memory_client.start()

if not quiet:
console.print("[green]Memory system initialized[/green]")

return memory_client


def _maybe_init_memory(
memory_cfg: config.Memory,
history_cfg: config.History,
openai_llm_cfg: config.OpenAILLM,
quiet: bool,
) -> MemoryClient | None:
"""Initialize memory if mode is not 'off', handling errors gracefully."""
if memory_cfg.mode == "off":
return None
try:
return _try_init_memory(memory_cfg, history_cfg, openai_llm_cfg, quiet)
except ImportError:
if not quiet:
console.print(
"[yellow]Memory system not available. "
"Install with: pip install 'agent-cli[memory]'[/yellow]",
)
except Exception as e:
if not quiet:
console.print(f"[yellow]Failed to initialize memory: {e}[/yellow]")
LOGGER.warning("Failed to initialize memory: %s", e)
return None


# --- Conversation History ---


Expand All @@ -74,9 +166,7 @@ class ConversationEntry(TypedDict):
- execute_code: Execute a shell command.
- add_memory: Add important information to long-term memory for future recall.
- search_memory: Search your long-term memory for relevant information.
- update_memory: Modify existing memories by ID when information changes.
- list_all_memories: Show all stored memories with their IDs and details.
- list_memory_categories: See what types of information you've remembered.
- list_all_memories: Show all stored memories with their details.
- duckduckgo_search: Search the web for current information.

Memory Guidelines:
Expand Down Expand Up @@ -144,10 +234,67 @@ def _format_conversation_for_llm(history: list[ConversationEntry]) -> str:
return "\n".join(formatted_lines)


async def _maybe_extract_memories(
memory_cfg: config.Memory,
memory_client: MemoryClient | None,
instruction: str,
response_text: str,
conversation_id: str,
model: str,
quiet: bool,
) -> None:
"""Extract memories in auto mode, silently skip otherwise."""
if memory_cfg.mode != "auto" or memory_client is None:
return
try:
await memory_client.extract_from_turn(
user_message=instruction,
assistant_message=response_text,
conversation_id=conversation_id,
model=model,
)
if not quiet:
console.print("[dim]💾 Memory extraction complete[/dim]")
except Exception as e:
LOGGER.warning("Failed to extract memories: %s", e)


async def _maybe_retrieve_memories(
memory_cfg: config.Memory,
memory_client: MemoryClient | None,
instruction: str,
conversation_id: str,
) -> str:
"""Retrieve relevant memories in auto mode for prompt injection.

Returns formatted memory context string, or empty string if not applicable.
"""
if memory_cfg.mode != "auto" or memory_client is None:
return ""
try:
retrieval = await memory_client.search(
query=instruction,
conversation_id=conversation_id,
top_k=memory_cfg.top_k,
)
if not retrieval.entries:
return ""
lines = ["\n<relevant-memories>"]
lines.extend(f"- {entry.content}" for entry in retrieval.entries)
lines.append("</relevant-memories>")
return "\n".join(lines)
except Exception as e:
LOGGER.warning("Failed to retrieve memories: %s", e)
return ""


async def _handle_conversation_turn(
*,
stop_event: InteractiveStopEvent,
conversation_history: list[ConversationEntry],
memory_client: MemoryClient | None,
conversation_id: str,
memory_cfg: config.Memory,
provider_cfg: config.ProviderSelection,
general_cfg: config.General,
history_cfg: config.History,
Expand Down Expand Up @@ -213,6 +360,15 @@ async def _handle_conversation_turn(
instruction=instruction,
)

# 3b. Auto-retrieve and inject memories in "auto" mode
memory_context = await _maybe_retrieve_memories(
memory_cfg,
memory_client,
instruction,
conversation_id,
)
system_prompt = SYSTEM_PROMPT + memory_context

# 4. Get LLM response with timing

start_time = time.monotonic()
Expand All @@ -230,16 +386,22 @@ async def _handle_conversation_turn(
quiet=general_cfg.quiet,
stop_event=stop_event,
):
# Memory tools access:
# - "off": no memory tools
# - "tools": full access (add, search, list)
# - "auto": read-only access (search, list) - extraction happens automatically
tool_memory_client = memory_client if memory_cfg.mode != "off" else None
memory_read_only = memory_cfg.mode == "auto"
response_text = await get_llm_response(
system_prompt=SYSTEM_PROMPT,
system_prompt=system_prompt,
agent_instructions=AGENT_INSTRUCTIONS,
user_input=user_message_with_context,
provider_cfg=provider_cfg,
ollama_cfg=ollama_cfg,
openai_cfg=openai_llm_cfg,
gemini_cfg=gemini_llm_cfg,
logger=LOGGER,
tools=tools(),
tools=tools(tool_memory_client, conversation_id, memory_read_only=memory_read_only),
quiet=True, # Suppress internal output since we're showing our own timer
live=live,
)
Expand Down Expand Up @@ -267,6 +429,20 @@ async def _handle_conversation_turn(
},
)

# 5b. Auto-extract memories in "auto" mode (run in background, don't block)
if memory_cfg.mode == "auto" and memory_client is not None:
asyncio.create_task( # noqa: RUF006
_maybe_extract_memories(
memory_cfg,
memory_client,
instruction,
response_text,
conversation_id,
openai_llm_cfg.llm_openai_model,
general_cfg.quiet,
),
)

# 6. Save history
if history_cfg.history_dir:
history_path = Path(history_cfg.history_dir).expanduser()
Expand Down Expand Up @@ -318,8 +494,11 @@ async def _async_main(
openai_tts_cfg: config.OpenAITTS,
kokoro_tts_cfg: config.KokoroTTS,
gemini_tts_cfg: config.GeminiTTS,
memory_cfg: config.Memory,
) -> None:
"""Main async function, consumes parsed arguments."""
memory_client = None

try:
device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
if device_info is None:
Expand All @@ -329,6 +508,14 @@ async def _async_main(
if audio_out_cfg.enable_tts:
audio_out_cfg.output_device_index = tts_output_device_index

# Initialize memory system (if not disabled)
memory_client = _maybe_init_memory(
memory_cfg,
history_cfg,
openai_llm_cfg,
general_cfg.quiet,
)

# Load conversation history
conversation_history = []
if history_cfg.history_dir:
Expand All @@ -342,6 +529,9 @@ async def _async_main(
history_cfg.last_n_messages,
)

# Generate conversation ID for memory scoping
conversation_id = _get_conversation_id(history_cfg)

with (
maybe_live(not general_cfg.quiet) as live,
signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
Expand All @@ -350,6 +540,9 @@ async def _async_main(
await _handle_conversation_turn(
stop_event=stop_event,
conversation_history=conversation_history,
memory_client=memory_client,
conversation_id=conversation_id,
memory_cfg=memory_cfg,
provider_cfg=provider_cfg,
general_cfg=general_cfg,
history_cfg=history_cfg,
Expand All @@ -371,6 +564,10 @@ async def _async_main(
if not general_cfg.quiet:
console.print_exception()
raise
finally:
# Clean up memory client
if memory_client is not None:
await memory_client.stop()


@app.command("chat")
Expand Down Expand Up @@ -433,6 +630,17 @@ def chat(
" Set to 0 to disable history.",
rich_help_panel="History Options",
),
# --- Memory Options ---
memory_mode: str = opts.MEMORY_MODE,
memory_path: Path | None = opts.MEMORY_PATH,
embedding_model: str = opts.EMBEDDING_MODEL,
memory_top_k: int = opts.MEMORY_TOP_K,
memory_score_threshold: float = opts.MEMORY_SCORE_THRESHOLD,
memory_max_entries: int = opts.MEMORY_MAX_ENTRIES,
memory_mmr_lambda: float = opts.MEMORY_MMR_LAMBDA,
memory_recency_weight: float = opts.MEMORY_RECENCY_WEIGHT,
memory_summarization: bool = opts.MEMORY_SUMMARIZATION,
memory_git_versioning: bool = opts.MEMORY_GIT_VERSIONING,
# --- General Options ---
save_file: Path | None = opts.SAVE_FILE,
log_level: str = opts.LOG_LEVEL,
Expand Down Expand Up @@ -535,6 +743,18 @@ def chat(
history_dir=history_dir,
last_n_messages=last_n_messages,
)
memory_cfg = config.Memory(
mode=memory_mode, # type: ignore[arg-type]
memory_path=memory_path,
embedding_model=embedding_model,
top_k=memory_top_k,
score_threshold=memory_score_threshold,
max_entries=memory_max_entries,
mmr_lambda=memory_mmr_lambda,
recency_weight=memory_recency_weight,
enable_summarization=memory_summarization,
enable_git_versioning=memory_git_versioning,
)

asyncio.run(
_async_main(
Expand All @@ -553,5 +773,6 @@ def chat(
openai_tts_cfg=openai_tts_cfg,
kokoro_tts_cfg=kokoro_tts_cfg,
gemini_tts_cfg=gemini_tts_cfg,
memory_cfg=memory_cfg,
),
)
16 changes: 5 additions & 11 deletions agent_cli/agents/memory/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import sys
from datetime import UTC, datetime
from pathlib import Path # noqa: TC003
from pathlib import Path
from typing import TYPE_CHECKING, Any

import typer
Expand Down Expand Up @@ -127,16 +127,8 @@ def add(
"-c",
help="Conversation ID to add memories to.",
),
memory_path: Path = typer.Option( # noqa: B008
"./memory_db",
"--memory-path",
help="Path to the memory store.",
),
git_versioning: bool = typer.Option(
True, # noqa: FBT003
"--git-versioning/--no-git-versioning",
help="Commit changes to git.",
),
memory_path: Path | None = opts.MEMORY_PATH,
git_versioning: bool = opts.with_default(opts.MEMORY_GIT_VERSIONING, default=True),
quiet: bool = opts.QUIET,
config_file: str | None = opts.CONFIG_FILE,
print_args: bool = opts.PRINT_ARGS,
Expand Down Expand Up @@ -176,6 +168,8 @@ def add(
console.print("[red]No memories provided. Use arguments or --file.[/red]")
raise typer.Exit(1)

if memory_path is None:
memory_path = Path("./memory_db")
memory_path = memory_path.resolve()
records = _write_memories(memory_path, parsed, git_versioning)

Expand Down
Loading
Loading