From 1bbdd7cc00166883b70f3a40a6a9f22b9df84627 Mon Sep 17 00:00:00 2001 From: Catalin Lupuleti Date: Wed, 20 May 2026 17:03:36 +0100 Subject: [PATCH 1/2] feat: MCP server so agents can call session search over stdio Adds a `code-recall mcp` subcommand that serves an MCP stdio server, turning code-recall into a retrieval tool any agent (Claude Code, Codex) can call instead of grepping ~/.claude transcripts by hand. Why MCP-over-stdio rather than a remote server or a bare CLI: the CLI already has --json, so the real gap was agent-native integration. stdio is the documented transport for local tools with direct filesystem/DB access, and Claude Code's Tool Search loads MCP tools on demand so a focused tool set doesn't bloat context. Shipping it as a subcommand of the existing binary keeps it to one install. Tools exposed (FastMCP, structured output): - search_sessions(query, limit, provider, project) -> ranked hits with title, project, provider, score, why-matched, snippet, resume command. Supports file:/cmd:/branch: prefixes. - get_session_detail(session_id) -> files, commands, branch, model, prompts, resume command. The server honors the same --db / --claude-dir / --codex-dir flags as the rest of the CLI (paths captured in the tool closures), runs a quick incremental index before each query, and stays local-only. mcp is a new optional dependency: `code-recall[mcp]`, also folded into `[all]`. Uses typing_extensions.TypedDict for pydantic schema-gen on Python < 3.12. Tests: 6 new tool-layer tests (call_tool against a temp index with the reindex stubbed) plus a manual stdio round-trip against a real index. 342 pass. --- README.md | 18 +++ pyproject.toml | 3 +- src/code_recall/cli.py | 14 ++ src/code_recall/mcp_server.py | 239 ++++++++++++++++++++++++++++++++++ tests/test_mcp_server.py | 76 +++++++++++ 5 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 src/code_recall/mcp_server.py create mode 100644 tests/test_mcp_server.py diff --git a/README.md b/README.md index 6abe80c..6a0b827 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,24 @@ code-recall currently indexes: AI features use the matching assistant when possible: Claude Code sessions prefer `claude -p`, Codex sessions prefer `codex exec`. If the matching CLI is not installed, code-recall falls back to the other supported CLI when available and shows the assistant provider in the AI tab. +## MCP Server + +code-recall can run as an [MCP](https://modelcontextprotocol.io) server over stdio, so a coding agent can call fast, ranked session retrieval instead of blindly grepping transcript files. + +```bash +# Requires the mcp extra: pip install 'code-recall[all]' or 'code-recall[mcp]' +claude mcp add code-recall -- code-recall mcp +``` + +It exposes two tools: + +| Tool | What it does | +|------|--------------| +| `search_sessions(query, limit, provider, project)` | Hybrid ranked search over the local index. Returns matched sessions with title, project, provider, score, why-matched, a snippet, and a ready-to-run resume command. Supports `file:`, `cmd:`, `branch:` prefixes. | +| `get_session_detail(session_id)` | Full detail for one session: files touched, commands run, branch, model, first/last prompts, resume command. | + +The server reads only the local SQLite index — no transcript content leaves your machine, and every query runs a quick incremental index first so results stay fresh. + ## Index Freshness code-recall keeps the index fresh in two ways: diff --git a/pyproject.toml b/pyproject.toml index ab279c5..5b62001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ code-recall = "code_recall.cli:main" [project.optional-dependencies] semantic = ["fastembed>=0.4", "sqlite-vec>=0.1"] tui = ["textual>=1.0"] -all = ["code-recall[semantic,tui]"] +mcp = ["mcp>=1.2"] +all = ["code-recall[semantic,tui,mcp]"] dev = ["pytest>=7.0", "pytest-cov>=4.0", "tomli>=2.0; python_version < '3.11'"] [project.urls] diff --git a/src/code_recall/cli.py b/src/code_recall/cli.py index 4a9f393..0de8c05 100644 --- a/src/code_recall/cli.py +++ b/src/code_recall/cli.py @@ -87,6 +87,9 @@ def _run(argv: list[str] | None = None) -> None: if command in ("index", "i"): _cmd_index(args) + elif command == "mcp": + _cmd_mcp(args) + return elif command == "info": _cmd_info(args) elif command == "gc": @@ -520,6 +523,17 @@ def _cmd_index(args: argparse.Namespace) -> None: ) +def _cmd_mcp(args: argparse.Namespace) -> None: + """Run the MCP server over stdio so agents can call session search.""" + from code_recall.mcp_server import run + + run( + db_path=args.db, + projects_dir=args.claude_dir, + codex_dir=None if args.no_codex else args.codex_dir, + ) + + def _cmd_info(args: argparse.Namespace) -> None: if not args.db.exists(): print("No index found. Run 'code-recall' to build it.") diff --git a/src/code_recall/mcp_server.py b/src/code_recall/mcp_server.py new file mode 100644 index 0000000..2df06e2 --- /dev/null +++ b/src/code_recall/mcp_server.py @@ -0,0 +1,239 @@ +"""MCP server exposing code-recall session search to coding agents. + +Runs over stdio so an agent (Claude Code, Codex, etc.) can call fast, +ranked session retrieval instead of blindly grepping transcript files. + +Add to Claude Code: + claude mcp add code-recall -- code-recall mcp +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +# Pydantic (used by FastMCP for schema generation) requires +# typing_extensions.TypedDict on Python < 3.12. +from typing_extensions import TypedDict + +from code_recall.db import DB_PATH, get_connection +from code_recall.models import SearchResult +from code_recall.searcher import search +from code_recall.utils import CODEX_DIR, PROJECTS_DIR, clean_display_text + + +class SessionHit(TypedDict): + """A ranked session match returned by search_sessions.""" + + session_id: str + title: str + project: str + provider: str + branch: str | None + modified: str | None + message_count: int + score: float + why: str + snippet: str + resume_command: str + + +class SessionDetail(TypedDict): + """Full detail for a single session returned by get_session_detail.""" + + session_id: str + title: str + project: str + project_path: str + provider: str + branch: str | None + model: str | None + created: str | None + modified: str | None + message_count: int + first_prompt: str | None + last_prompt: str | None + files_modified: list[str] + commands_run: list[str] + resume_command: str + transcript_path: str + + +def _why_matched(result: SearchResult) -> str: + """A short, agent-readable reason this session matched.""" + snippets = [clean_display_text(s) for s in result.snippets if clean_display_text(s)] + if result.fts_rank is not None and result.vec_score is not None: + return "hybrid keyword + semantic match" + if result.fts_rank is not None: + return "keyword match" + if result.vec_score is not None: + return "semantic match" + if snippets: + return f"matched text: {snippets[0][:120]}" + return "relevance match" + + +def _title(result: SearchResult) -> str: + s = result.session + return ( + clean_display_text(s.summary) + or clean_display_text(s.first_prompt) + or "(untitled session)" + ) + + +def _to_hit(result: SearchResult) -> SessionHit: + s = result.session + snippets = [clean_display_text(x) for x in result.snippets if clean_display_text(x)] + return SessionHit( + session_id=s.session_id, + title=_title(result), + project=result.display_project, + provider=s.provider, + branch=s.git_branch or s.git_branch_detected, + modified=s.modified, + message_count=s.message_count, + score=round(result.score, 4), + why=_why_matched(result), + snippet=(snippets[0][:280] if snippets else ""), + resume_command=result.resume_command, + ) + + +def _json_list(value: str | None) -> list[str]: + if not value: + return [] + try: + parsed = json.loads(value) + except (TypeError, ValueError): + return [] + return [str(item) for item in parsed if str(item).strip()] if isinstance(parsed, list) else [] + + +def build_server( + db_path: Path = DB_PATH, + projects_dir: Path = PROJECTS_DIR, + codex_dir: Path | None = CODEX_DIR, +): + """Construct the FastMCP server. Imported lazily so the rest of the + package works without the optional `mcp` dependency installed. + + Paths are captured in the tool closures so the server honors the same + --db / --claude-dir / --codex-dir flags as the rest of the CLI.""" + from mcp.server.fastmcp import FastMCP + + mcp = FastMCP("code-recall") + + @mcp.tool() + def search_sessions( + query: str, + limit: int = 10, + provider: str | None = None, + project: str | None = None, + ) -> list[SessionHit]: + """Search past Claude Code and Codex sessions by intent. + + Use this instead of grepping transcript files: it runs a fast, + ranked hybrid search (keyword + semantic + knowledge graph) over + the local index and returns the most relevant sessions with a + ready-to-run resume command. + + Args: + query: Natural-language description of the session you want. + Supports structured prefixes: 'file:path', 'cmd:name', + 'branch:name'. + limit: Max sessions to return (default 10). + provider: Filter to 'claude' or 'codex'. None = both. + project: Substring filter on the project path. + """ + _ensure_fresh_index(db_path, projects_dir, codex_dir) + results = search(query, db_path=db_path, limit=limit, project_filter=project) + if provider: + results = [r for r in results if r.session.provider == provider] + return [_to_hit(r) for r in results[:limit]] + + @mcp.tool() + def get_session_detail(session_id: str) -> SessionDetail | None: + """Get full detail for one session by its id. + + Returns the files it touched, commands it ran, branch, model, + first/last prompts, and the resume command. Use after + search_sessions to inspect a specific candidate. + """ + conn = get_connection(db_path) + try: + row = conn.execute( + "SELECT * FROM sessions WHERE session_id = ?", (session_id,) + ).fetchone() + finally: + conn.close() + if row is None: + return None + + from code_recall.models import Session + + cols = row.keys() + s = Session(**{k: row[k] for k in cols if k in Session.__dataclass_fields__}) + result = SearchResult(session=s) + return SessionDetail( + session_id=s.session_id, + title=_title(result), + project=result.display_project, + project_path=s.project_path, + provider=s.provider, + branch=s.git_branch or s.git_branch_detected, + model=s.model, + created=s.created, + modified=s.modified, + message_count=s.message_count, + first_prompt=clean_display_text(s.first_prompt), + last_prompt=clean_display_text(s.last_prompt), + files_modified=_json_list(s.files_modified), + commands_run=_json_list(s.commands_run), + resume_command=result.resume_command, + transcript_path=s.file_path, + ) + + return mcp + + +def _ensure_fresh_index( + db_path: Path = DB_PATH, + projects_dir: Path = PROJECTS_DIR, + codex_dir: Path | None = CODEX_DIR, +) -> None: + """Quick incremental index before serving a query. Never raises — + a locked or missing index just means we search what's there.""" + try: + from code_recall.indexer import ensure_index + + ensure_index( + projects_dir=projects_dir, + db_path=db_path, + codex_dir=codex_dir if (codex_dir and codex_dir.exists()) else None, + verbose=False, + ) + except Exception: + pass + + +def run( + db_path: Path = DB_PATH, + projects_dir: Path = PROJECTS_DIR, + codex_dir: Path | None = CODEX_DIR, +) -> None: + """Entry point for `code-recall mcp`. Serves over stdio. + + Note: stdout is reserved for the JSON-RPC protocol — all diagnostics + must go to stderr. + """ + try: + server = build_server(db_path=db_path, projects_dir=projects_dir, codex_dir=codex_dir) + except ImportError: + print( + "The 'mcp' package is required. Install with: pip install 'code-recall[mcp]'", + file=sys.stderr, + ) + raise SystemExit(1) + server.run(transport="stdio") diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py new file mode 100644 index 0000000..7f9a537 --- /dev/null +++ b/tests/test_mcp_server.py @@ -0,0 +1,76 @@ +"""Tests for the MCP server (code_recall.mcp_server). + +These exercise the FastMCP tool layer end-to-end via call_tool, against a +temporary index, with the real-projects reindex disabled. Async calls are +wrapped with asyncio.run so no pytest-asyncio dependency is needed. +""" + +from __future__ import annotations + +import asyncio + +import pytest + +pytest.importorskip("mcp", reason="mcp extra not installed") + +import code_recall.mcp_server as mcp_server + + +@pytest.fixture +def server(populated_db, db_path, monkeypatch): + """A FastMCP server pointed at the populated temp index, with the + pre-query incremental reindex stubbed out.""" + monkeypatch.setattr(mcp_server, "_ensure_fresh_index", lambda *a, **k: None) + return mcp_server.build_server(db_path=db_path) + + +def _call(server, name, args): + """Run a tool and return its structured result payload.""" + _content, structured = asyncio.run(server.call_tool(name, args)) + return structured["result"] + + +def test_lists_expected_tools(server): + tools = asyncio.run(server.list_tools()) + names = {t.name for t in tools} + assert names == {"search_sessions", "get_session_detail"} + + +def test_search_returns_structured_hits(server): + hits = _call(server, "search_sessions", {"query": "auth middleware"}) + assert hits, "expected at least one hit for 'auth middleware'" + top = hits[0] + assert top["session_id"] == "abc123" + for key in ( + "session_id", "title", "project", "provider", "branch", + "modified", "message_count", "score", "why", "snippet", + "resume_command", + ): + assert key in top + assert top["resume_command"] == "claude --resume abc123" + assert 0.0 <= top["score"] <= 1.0 + + +def test_search_respects_limit(server): + hits = _call(server, "search_sessions", {"query": "the", "limit": 1}) + assert len(hits) <= 1 + + +def test_search_provider_filter(server): + # All sample sessions are provider 'claude'; codex filter yields nothing. + assert _call(server, "search_sessions", {"query": "router", "provider": "codex"}) == [] + assert _call(server, "search_sessions", {"query": "router", "provider": "claude"}) + + +def test_get_session_detail(server): + detail = _call(server, "get_session_detail", {"session_id": "abc123"}) + assert detail["session_id"] == "abc123" + assert detail["title"] == "Debugging auth middleware" + assert detail["branch"] == "fix/auth-bug" + assert detail["resume_command"] == "claude --resume abc123" + assert isinstance(detail["files_modified"], list) + assert isinstance(detail["commands_run"], list) + + +def test_get_session_detail_missing_returns_none(server): + assert _call(server, "get_session_detail", {"session_id": "nope"}) is None From a20bc0bc8719905d4336d221c20f6edd9e05faa3 Mon Sep 17 00:00:00 2001 From: Catalin Lupuleti Date: Wed, 20 May 2026 17:10:13 +0100 Subject: [PATCH 2/2] feat(mcp): one-line install that wires both Claude Code and Codex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `code-recall mcp install` — registers the MCP server with every supported agent CLI found on PATH, so setup for both providers is a single command: pip install 'code-recall[all]' && code-recall mcp install Under the hood it runs (idempotently — removes any stale entry first): claude mcp add --scope user code-recall -- code-recall mcp codex mcp add code-recall -- code-recall mcp Verified end-to-end against real CLIs: Claude Code reports "✓ Connected"; Codex registers as an enabled stdio server (the "Unsupported" it shows is the Auth column — stdio servers don't do OAuth, same as any other local MCP server). If an agent CLI is absent it's skipped with a note rather than failing. README now leads the MCP section with the one-liner and documents the manual commands in a details block. Tests: +2 (no-CLI fallback returns 1; both-CLIs path issues add for each). 344 pass. --- README.md | 21 ++++++++++-- src/code_recall/cli.py | 10 +++++- src/code_recall/mcp_server.py | 60 +++++++++++++++++++++++++++++++++++ tests/test_mcp_server.py | 28 ++++++++++++++++ 4 files changed, 115 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6a0b827..c07d5d2 100644 --- a/README.md +++ b/README.md @@ -191,13 +191,28 @@ AI features use the matching assistant when possible: Claude Code sessions prefe ## MCP Server -code-recall can run as an [MCP](https://modelcontextprotocol.io) server over stdio, so a coding agent can call fast, ranked session retrieval instead of blindly grepping transcript files. +code-recall can run as an [MCP](https://modelcontextprotocol.io) server over stdio, so your coding agent calls fast, ranked session retrieval instead of blindly grepping transcript files. + +**Install and wire it into both Claude Code and Codex in one line:** ```bash -# Requires the mcp extra: pip install 'code-recall[all]' or 'code-recall[mcp]' -claude mcp add code-recall -- code-recall mcp +pip install 'code-recall[all]' && code-recall mcp install ``` +`code-recall mcp install` registers the server with every agent CLI it finds on your PATH (`claude` and `codex`) and is idempotent — re-run it any time. Restart the agent, then just ask it to find a past session. + +
+Manual registration / what it runs under the hood + +```bash +# Claude Code (user scope — available in every project) +claude mcp add --scope user code-recall -- code-recall mcp + +# Codex +codex mcp add code-recall -- code-recall mcp +``` +
+ It exposes two tools: | Tool | What it does | diff --git a/src/code_recall/cli.py b/src/code_recall/cli.py index 0de8c05..0861b5c 100644 --- a/src/code_recall/cli.py +++ b/src/code_recall/cli.py @@ -524,7 +524,15 @@ def _cmd_index(args: argparse.Namespace) -> None: def _cmd_mcp(args: argparse.Namespace) -> None: - """Run the MCP server over stdio so agents can call session search.""" + """Run the MCP server, or 'code-recall mcp install' to register it + with the agent CLIs (Claude Code and Codex).""" + sub = args.query[1] if len(args.query) > 1 else None + + if sub in ("install", "setup"): + from code_recall.mcp_server import install_to_agents + + raise SystemExit(install_to_agents()) + from code_recall.mcp_server import run run( diff --git a/src/code_recall/mcp_server.py b/src/code_recall/mcp_server.py index 2df06e2..465ce9f 100644 --- a/src/code_recall/mcp_server.py +++ b/src/code_recall/mcp_server.py @@ -10,6 +10,8 @@ from __future__ import annotations import json +import shutil +import subprocess import sys from pathlib import Path @@ -218,6 +220,64 @@ def _ensure_fresh_index( pass +def install_to_agents() -> int: + """Register the MCP server with every supported agent CLI found on PATH + (Claude Code and Codex). Idempotent — re-running re-points existing + entries. Returns 0 if at least one agent was configured. + + Registration commands (run for whichever CLI exists): + claude mcp add --scope user code-recall -- code-recall mcp + codex mcp add code-recall -- code-recall mcp + """ + agents = [ + { + "name": "Claude Code", + "cli": "claude", + "remove": ["claude", "mcp", "remove", "--scope", "user", "code-recall"], + "add": ["claude", "mcp", "add", "--scope", "user", "code-recall", "--", "code-recall", "mcp"], + }, + { + "name": "Codex", + "cli": "codex", + "remove": ["codex", "mcp", "remove", "code-recall"], + "add": ["codex", "mcp", "add", "code-recall", "--", "code-recall", "mcp"], + }, + ] + + configured = 0 + found_any = False + for agent in agents: + if shutil.which(agent["cli"]) is None: + print(f"- {agent['name']}: {agent['cli']} not on PATH, skipped", file=sys.stderr) + continue + found_any = True + subprocess.run(agent["remove"], capture_output=True) # drop stale entry + result = subprocess.run(agent["add"], capture_output=True, text=True) + if result.returncode == 0: + print(f"- {agent['name']}: registered (code-recall mcp)", file=sys.stderr) + configured += 1 + else: + print(f"- {agent['name']}: FAILED", file=sys.stderr) + sys.stderr.write(result.stdout) + sys.stderr.write(result.stderr) + + if not found_any: + print( + "No supported agent CLI found (claude or codex). Install one, then run:\n" + " code-recall mcp install", + file=sys.stderr, + ) + return 1 + + if configured: + print( + f"\nDone — code-recall is wired into {configured} agent(s). " + "Restart the agent, then ask it to find a past session.", + file=sys.stderr, + ) + return 0 if configured else 1 + + def run( db_path: Path = DB_PATH, projects_dir: Path = PROJECTS_DIR, diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 7f9a537..f676482 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -74,3 +74,31 @@ def test_get_session_detail(server): def test_get_session_detail_missing_returns_none(server): assert _call(server, "get_session_detail", {"session_id": "nope"}) is None + + +def test_install_no_agent_cli_returns_error(monkeypatch): + """install_to_agents reports cleanly when neither agent CLI is present.""" + monkeypatch.setattr(mcp_server.shutil, "which", lambda _name: None) + assert mcp_server.install_to_agents() == 1 + + +def test_install_registers_found_agents(monkeypatch): + """install_to_agents runs add for each CLI on PATH and returns 0.""" + import subprocess + + monkeypatch.setattr( + mcp_server.shutil, "which", + lambda name: f"/usr/local/bin/{name}" if name in ("claude", "codex") else None, + ) + calls: list[list[str]] = [] + + def fake_run(cmd, *a, **k): + calls.append(cmd) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + monkeypatch.setattr(mcp_server.subprocess, "run", fake_run) + assert mcp_server.install_to_agents() == 0 + # Both agents' add commands were issued + added = [c for c in calls if "add" in c] + assert any("claude" in c for c in added) + assert any("codex" in c for c in added)