Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,39 @@ code-recall currently indexes:

AI features use the matching assistant when possible: Claude Code sessions prefer `claude -p`, Codex sessions prefer `codex exec`. If the matching CLI is not installed, code-recall falls back to the other supported CLI when available and shows the assistant provider in the AI tab.

## MCP Server

code-recall can run as an [MCP](https://modelcontextprotocol.io) server over stdio, so your coding agent calls fast, ranked session retrieval instead of blindly grepping transcript files.

**Install and wire it into both Claude Code and Codex in one line:**

```bash
pip install 'code-recall[all]' && code-recall mcp install
```

`code-recall mcp install` registers the server with every agent CLI it finds on your PATH (`claude` and `codex`) and is idempotent — re-run it any time. Restart the agent, then just ask it to find a past session.

<details>
<summary>Manual registration / what it runs under the hood</summary>

```bash
# Claude Code (user scope — available in every project)
claude mcp add --scope user code-recall -- code-recall mcp

# Codex
codex mcp add code-recall -- code-recall mcp
```
</details>

It exposes two tools:

| Tool | What it does |
|------|--------------|
| `search_sessions(query, limit, provider, project)` | Hybrid ranked search over the local index. Returns matched sessions with title, project, provider, score, why-matched, a snippet, and a ready-to-run resume command. Supports `file:`, `cmd:`, `branch:` prefixes. |
| `get_session_detail(session_id)` | Full detail for one session: files touched, commands run, branch, model, first/last prompts, resume command. |

The server reads only the local SQLite index — no transcript content leaves your machine, and every query runs a quick incremental index first so results stay fresh.

## Index Freshness

code-recall keeps the index fresh in two ways:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ code-recall = "code_recall.cli:main"
[project.optional-dependencies]
semantic = ["fastembed>=0.4", "sqlite-vec>=0.1"]
tui = ["textual>=1.0"]
all = ["code-recall[semantic,tui]"]
mcp = ["mcp>=1.2"]
all = ["code-recall[semantic,tui,mcp]"]
Comment on lines +34 to +35
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Bump package version for new MCP user-facing feature

This commit introduces a new user-visible command (code-recall mcp) and documentation surface but keeps the package version unchanged (0.2.5). Under this repo’s versioning policy, user-visible behavior changes must bump version metadata, otherwise installed artifacts are indistinguishable across feature boundaries and release/version checks can drift.

Useful? React with 👍 / 👎.

dev = ["pytest>=7.0", "pytest-cov>=4.0", "tomli>=2.0; python_version < '3.11'"]

[project.urls]
Expand Down
22 changes: 22 additions & 0 deletions src/code_recall/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ def _run(argv: list[str] | None = None) -> None:

if command in ("index", "i"):
_cmd_index(args)
elif command == "mcp":
_cmd_mcp(args)
return
elif command == "info":
_cmd_info(args)
elif command == "gc":
Expand Down Expand Up @@ -520,6 +523,25 @@ def _cmd_index(args: argparse.Namespace) -> None:
)


def _cmd_mcp(args: argparse.Namespace) -> None:
"""Run the MCP server, or 'code-recall mcp install' to register it
with the agent CLIs (Claude Code and Codex)."""
sub = args.query[1] if len(args.query) > 1 else None

if sub in ("install", "setup"):
from code_recall.mcp_server import install_to_agents

raise SystemExit(install_to_agents())

from code_recall.mcp_server import run

run(
db_path=args.db,
projects_dir=args.claude_dir,
codex_dir=None if args.no_codex else args.codex_dir,
)


def _cmd_info(args: argparse.Namespace) -> None:
if not args.db.exists():
print("No index found. Run 'code-recall' to build it.")
Expand Down
299 changes: 299 additions & 0 deletions src/code_recall/mcp_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
"""MCP server exposing code-recall session search to coding agents.

Runs over stdio so an agent (Claude Code, Codex, etc.) can call fast,
ranked session retrieval instead of blindly grepping transcript files.

Add to Claude Code:
claude mcp add code-recall -- code-recall mcp
"""

from __future__ import annotations

import json
import shutil
import subprocess
import sys
from pathlib import Path

# Pydantic (used by FastMCP for schema generation) requires
# typing_extensions.TypedDict on Python < 3.12.
from typing_extensions import TypedDict

from code_recall.db import DB_PATH, get_connection
from code_recall.models import SearchResult
from code_recall.searcher import search
from code_recall.utils import CODEX_DIR, PROJECTS_DIR, clean_display_text


class SessionHit(TypedDict):
"""A ranked session match returned by search_sessions."""

session_id: str
title: str
project: str
provider: str
branch: str | None
modified: str | None
message_count: int
score: float
why: str
snippet: str
resume_command: str


class SessionDetail(TypedDict):
"""Full detail for a single session returned by get_session_detail."""

session_id: str
title: str
project: str
project_path: str
provider: str
branch: str | None
model: str | None
created: str | None
modified: str | None
message_count: int
first_prompt: str | None
last_prompt: str | None
files_modified: list[str]
commands_run: list[str]
resume_command: str
transcript_path: str


def _why_matched(result: SearchResult) -> str:
"""A short, agent-readable reason this session matched."""
snippets = [clean_display_text(s) for s in result.snippets if clean_display_text(s)]
if result.fts_rank is not None and result.vec_score is not None:
return "hybrid keyword + semantic match"
if result.fts_rank is not None:
return "keyword match"
if result.vec_score is not None:
return "semantic match"
if snippets:
return f"matched text: {snippets[0][:120]}"
return "relevance match"


def _title(result: SearchResult) -> str:
s = result.session
return (
clean_display_text(s.summary)
or clean_display_text(s.first_prompt)
or "(untitled session)"
)


def _to_hit(result: SearchResult) -> SessionHit:
s = result.session
snippets = [clean_display_text(x) for x in result.snippets if clean_display_text(x)]
return SessionHit(
session_id=s.session_id,
title=_title(result),
project=result.display_project,
provider=s.provider,
branch=s.git_branch or s.git_branch_detected,
modified=s.modified,
message_count=s.message_count,
score=round(result.score, 4),
why=_why_matched(result),
snippet=(snippets[0][:280] if snippets else ""),
resume_command=result.resume_command,
)


def _json_list(value: str | None) -> list[str]:
if not value:
return []
try:
parsed = json.loads(value)
except (TypeError, ValueError):
return []
return [str(item) for item in parsed if str(item).strip()] if isinstance(parsed, list) else []


def build_server(
db_path: Path = DB_PATH,
projects_dir: Path = PROJECTS_DIR,
codex_dir: Path | None = CODEX_DIR,
):
"""Construct the FastMCP server. Imported lazily so the rest of the
package works without the optional `mcp` dependency installed.

Paths are captured in the tool closures so the server honors the same
--db / --claude-dir / --codex-dir flags as the rest of the CLI."""
from mcp.server.fastmcp import FastMCP

mcp = FastMCP("code-recall")

@mcp.tool()
def search_sessions(
query: str,
limit: int = 10,
provider: str | None = None,
project: str | None = None,
) -> list[SessionHit]:
"""Search past Claude Code and Codex sessions by intent.

Use this instead of grepping transcript files: it runs a fast,
ranked hybrid search (keyword + semantic + knowledge graph) over
the local index and returns the most relevant sessions with a
ready-to-run resume command.

Args:
query: Natural-language description of the session you want.
Supports structured prefixes: 'file:path', 'cmd:name',
'branch:name'.
limit: Max sessions to return (default 10).
provider: Filter to 'claude' or 'codex'. None = both.
project: Substring filter on the project path.
"""
_ensure_fresh_index(db_path, projects_dir, codex_dir)
results = search(query, db_path=db_path, limit=limit, project_filter=project)
if provider:
results = [r for r in results if r.session.provider == provider]
return [_to_hit(r) for r in results[:limit]]
Comment on lines +153 to +156
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Apply provider filter before limiting search candidates

search_sessions fetches only limit unfiltered results and then applies the provider filter in Python, so in mixed-provider datasets a valid provider can appear to have no matches just because its sessions rank below the first limit global hits. This makes MCP results incomplete/incorrect for provider="claude" or provider="codex" queries when both providers are indexed.

Useful? React with 👍 / 👎.


@mcp.tool()
def get_session_detail(session_id: str) -> SessionDetail | None:
"""Get full detail for one session by its id.

Returns the files it touched, commands it ran, branch, model,
first/last prompts, and the resume command. Use after
search_sessions to inspect a specific candidate.
"""
conn = get_connection(db_path)
try:
row = conn.execute(
"SELECT * FROM sessions WHERE session_id = ?", (session_id,)
).fetchone()
Comment on lines +166 to +170
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Refresh index before serving session detail requests

get_session_detail reads directly from SQLite without calling _ensure_fresh_index, unlike search_sessions. If a new session has been written to transcripts but not yet indexed, this tool can return None or stale metadata for an otherwise valid session_id, which violates the freshness behavior documented for MCP queries.

Useful? React with 👍 / 👎.

finally:
conn.close()
if row is None:
return None

from code_recall.models import Session

cols = row.keys()
s = Session(**{k: row[k] for k in cols if k in Session.__dataclass_fields__})
result = SearchResult(session=s)
return SessionDetail(
session_id=s.session_id,
title=_title(result),
project=result.display_project,
project_path=s.project_path,
provider=s.provider,
branch=s.git_branch or s.git_branch_detected,
model=s.model,
created=s.created,
modified=s.modified,
message_count=s.message_count,
first_prompt=clean_display_text(s.first_prompt),
last_prompt=clean_display_text(s.last_prompt),
files_modified=_json_list(s.files_modified),
commands_run=_json_list(s.commands_run),
resume_command=result.resume_command,
transcript_path=s.file_path,
)

return mcp


def _ensure_fresh_index(
db_path: Path = DB_PATH,
projects_dir: Path = PROJECTS_DIR,
codex_dir: Path | None = CODEX_DIR,
) -> None:
"""Quick incremental index before serving a query. Never raises —
a locked or missing index just means we search what's there."""
try:
from code_recall.indexer import ensure_index

ensure_index(
projects_dir=projects_dir,
db_path=db_path,
codex_dir=codex_dir if (codex_dir and codex_dir.exists()) else None,
verbose=False,
)
except Exception:
pass


def install_to_agents() -> int:
"""Register the MCP server with every supported agent CLI found on PATH
(Claude Code and Codex). Idempotent — re-running re-points existing
entries. Returns 0 if at least one agent was configured.

Registration commands (run for whichever CLI exists):
claude mcp add --scope user code-recall -- code-recall mcp
codex mcp add code-recall -- code-recall mcp
"""
agents = [
{
"name": "Claude Code",
"cli": "claude",
"remove": ["claude", "mcp", "remove", "--scope", "user", "code-recall"],
"add": ["claude", "mcp", "add", "--scope", "user", "code-recall", "--", "code-recall", "mcp"],
},
{
"name": "Codex",
"cli": "codex",
"remove": ["codex", "mcp", "remove", "code-recall"],
"add": ["codex", "mcp", "add", "code-recall", "--", "code-recall", "mcp"],
},
]

configured = 0
found_any = False
for agent in agents:
if shutil.which(agent["cli"]) is None:
print(f"- {agent['name']}: {agent['cli']} not on PATH, skipped", file=sys.stderr)
continue
found_any = True
subprocess.run(agent["remove"], capture_output=True) # drop stale entry
result = subprocess.run(agent["add"], capture_output=True, text=True)
if result.returncode == 0:
print(f"- {agent['name']}: registered (code-recall mcp)", file=sys.stderr)
configured += 1
else:
print(f"- {agent['name']}: FAILED", file=sys.stderr)
sys.stderr.write(result.stdout)
sys.stderr.write(result.stderr)

if not found_any:
print(
"No supported agent CLI found (claude or codex). Install one, then run:\n"
" code-recall mcp install",
file=sys.stderr,
)
return 1

if configured:
print(
f"\nDone — code-recall is wired into {configured} agent(s). "
"Restart the agent, then ask it to find a past session.",
file=sys.stderr,
)
return 0 if configured else 1


def run(
db_path: Path = DB_PATH,
projects_dir: Path = PROJECTS_DIR,
codex_dir: Path | None = CODEX_DIR,
) -> None:
"""Entry point for `code-recall mcp`. Serves over stdio.

Note: stdout is reserved for the JSON-RPC protocol — all diagnostics
must go to stderr.
"""
try:
server = build_server(db_path=db_path, projects_dir=projects_dir, codex_dir=codex_dir)
except ImportError:
print(
"The 'mcp' package is required. Install with: pip install 'code-recall[mcp]'",
file=sys.stderr,
)
raise SystemExit(1)
server.run(transport="stdio")
Loading