From 236d21d46715e4e3d6922e94a49e10c6efd8b900 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:07:49 -0500 Subject: [PATCH 1/7] agentgrep(feat[pi]): Register pi agent name across modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: Adding earendil-works/pi (issue #25) as a searchable backend starts with teaching every agent-name surface that "pi" exists. This commit is inert on its own — pi is a recognized agent with no stores yet, so discovery and parsing are never reached — which keeps the literal change isolated from the catalog/discovery/parser layers that follow. what: - Add "pi" to the AgentName literal in stores.py, __init__.py, and mcp/_library.py, plus the AgentSelector literal and AGENT_CHOICES. - Add "pi" to the five MCP model agent literals and the query registry's agent enum_values (and its docstring values line). - Mention Pi in the MCP server-instruction header and trigger scope. - Add "pi" to the package description and keywords. --- pyproject.toml | 4 ++-- src/agentgrep/__init__.py | 13 +++++++------ src/agentgrep/mcp/_library.py | 6 ++++-- src/agentgrep/mcp/instructions.py | 4 ++-- src/agentgrep/mcp/models.py | 10 +++++----- src/agentgrep/query/registry.py | 4 ++-- src/agentgrep/stores.py | 2 +- 7 files changed, 23 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2b3315b..76a06b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "agentgrep" version = "0.1.0a11" -description = "Read-only search for local AI agent prompts and history (Codex, Claude Code, Cursor, Gemini, Grok)" +description = "Read-only search for local AI agent prompts and history (Codex, Claude Code, Cursor, Gemini, Grok, Pi)" requires-python = ">=3.14,<4.0" authors = [ {name = "Tony Narlock", email = "tony@git-pull.com"} @@ -21,7 +21,7 @@ classifiers = [ "Typing :: Typed", ] -keywords = ["ai", "codex", "claude", "cursor", "gemini", "grok", "mcp", "search", "agent-history"] +keywords = ["ai", "codex", "claude", "cursor", "gemini", "grok", "pi", "mcp", "search", "agent-history"] readme = "README.md" packages = [ { include = "*", from = "src" }, diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index a228220..35b0661 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -83,7 +83,7 @@ else: PrivatePathBase = type(pathlib.Path()) -AgentName = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] +AgentName = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] OutputMode = t.Literal["text", "json", "ndjson", "ui"] ProgressMode = t.Literal["auto", "always", "never"] SearchType = t.Literal["prompts", "history", "all"] @@ -102,6 +102,7 @@ "cursor-ide", "gemini", "grok", + "pi", ) JSON_FILE_SUFFIXES: frozenset[str] = frozenset({".json", ".jsonl"}) SCHEMA_VERSION: str = "agentgrep.v1" @@ -221,11 +222,11 @@ def build_description( CLI_DESCRIPTION = build_description( """ - Read-only search across Codex, Claude, Cursor, Gemini, and Grok - local stores. Pick a subcommand from the list below: ``search`` for - ranked results with dedup and session grouping, ``grep`` for - rg-shaped content search, ``find`` for store enumeration, ``ui`` - for the interactive Textual explorer. + Read-only search across Codex, Claude, Cursor, Gemini, Grok, and + Pi local stores. Pick a subcommand from the list below: + ``search`` for ranked results with dedup and session grouping, + ``grep`` for rg-shaped content search, ``find`` for store + enumeration, ``ui`` for the interactive Textual explorer. """, ( ( diff --git a/src/agentgrep/mcp/_library.py b/src/agentgrep/mcp/_library.py index 4389192..c437b56 100644 --- a/src/agentgrep/mcp/_library.py +++ b/src/agentgrep/mcp/_library.py @@ -13,8 +13,10 @@ import pathlib import typing as t -AgentName = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] -AgentSelector = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "all"] +AgentName = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] +AgentSelector = t.Literal[ + "codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi", "all" +] SearchTypeName = t.Literal["prompts", "history", "all"] SERVER_VERSION = "0.1.0" diff --git a/src/agentgrep/mcp/instructions.py b/src/agentgrep/mcp/instructions.py index e4b2f6b..caba9b2 100644 --- a/src/agentgrep/mcp/instructions.py +++ b/src/agentgrep/mcp/instructions.py @@ -10,7 +10,7 @@ _INSTR_HEADER = ( "agentgrep MCP server. Read-only search over local AI-agent prompts and " - "history across Codex, Claude Code, Cursor, Gemini, and Grok CLIs. All tools " + "history across Codex, Claude Code, Cursor, Gemini, Grok, and Pi CLIs. All tools " "are read-only and never spawn writes." ) @@ -18,7 +18,7 @@ "TRIGGERS: invoke for retrospective questions about what the user typed " "into or received from a coding-agent CLI (prompts, history, session " "transcripts, store discovery). Bare 'prompt', 'history', 'transcript', " - "'session', 'what did I ask Claude/Codex/Cursor/Gemini/Grok' default to " + "'session', 'what did I ask Claude/Codex/Cursor/Gemini/Grok/Pi' default to " "agentgrep.\n" "ANTI-TRIGGERS: do NOT invoke for IDE editor history (VS Code timeline), " "shell history (zsh/fish history), browser tabs, or live agent sessions " diff --git a/src/agentgrep/mcp/models.py b/src/agentgrep/mcp/models.py index bc56b15..03d8cba 100644 --- a/src/agentgrep/mcp/models.py +++ b/src/agentgrep/mcp/models.py @@ -28,7 +28,7 @@ class SearchRecordModel(AgentGrepModel): schema_version: str = agentgrep.SCHEMA_VERSION kind: t.Literal["prompt", "history"] - agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] + agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] store: str adapter_id: str path: str @@ -52,7 +52,7 @@ class FindRecordModel(AgentGrepModel): schema_version: str = agentgrep.SCHEMA_VERSION kind: t.Literal["find"] - agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] + agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] store: str adapter_id: str path: str @@ -84,7 +84,7 @@ class SourceRecordModel(AgentGrepModel): """Discovered source summary payload.""" schema_version: str = agentgrep.SCHEMA_VERSION - agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] + agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] store: str adapter_id: str path: str @@ -150,7 +150,7 @@ class CapabilitiesModel(AgentGrepModel): name: str = "agentgrep" version: str = SERVER_VERSION read_only: bool = True - agents: list[t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"]] + agents: list[t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"]] search_types: list[SearchTypeName] adapters: list[str] tools: list[str] @@ -185,7 +185,7 @@ class StoreDescriptorModel(AgentGrepModel): schema_version: str = agentgrep.SCHEMA_VERSION kind: t.Literal["store"] = "store" - agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] + agent: t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] store_id: str role: str format: str diff --git a/src/agentgrep/query/registry.py b/src/agentgrep/query/registry.py index bafcb67..4a458c2 100644 --- a/src/agentgrep/query/registry.py +++ b/src/agentgrep/query/registry.py @@ -100,7 +100,7 @@ def default_registry() -> FieldRegistry: ============= ====== ======= =========================================== Field Kind Layer Notes ============= ====== ======= =========================================== - ``agent`` enum source Values: codex, claude, cursor-cli, cursor-ide, gemini, grok + ``agent`` enum source Values: codex, claude, cursor-cli, cursor-ide, gemini, grok, pi ``store`` string source Substring against :attr:`SourceHandle.store` ``adapter`` string source Alias of ``adapter_id`` ``path`` path source Glob against the file basename by default @@ -117,7 +117,7 @@ def default_registry() -> FieldRegistry: name="agent", kind="enum", layer="source", - enum_values=("codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"), + enum_values=("codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"), ), FieldSpec(name="store", kind="string", layer="source"), FieldSpec( diff --git a/src/agentgrep/stores.py b/src/agentgrep/stores.py index 23abdbf..f7ed528 100644 --- a/src/agentgrep/stores.py +++ b/src/agentgrep/stores.py @@ -88,7 +88,7 @@ class VersionDetectionConfidence(enum.StrEnum): LOW = "low" -AgentName = t.Literal["claude", "cursor-cli", "cursor-ide", "codex", "gemini", "grok"] +AgentName = t.Literal["claude", "cursor-cli", "cursor-ide", "codex", "gemini", "grok", "pi"] PathKind = t.Literal["history_file", "session_file", "sqlite_db", "store_file"] SourceKind = t.Literal["json", "jsonl", "sqlite", "text", "opaque"] From 5a76272785d7e3f144dffe64631a21a2b1dce11a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:11:42 -0500 Subject: [PATCH 2/7] agentgrep(feat[pi]): Add pi store descriptors and session adapter why: The catalogue is agentgrep's single source of truth for where agent data lives and what shape it takes. pi gets one searchable store (its JSONL session transcripts) plus documentary descriptors for every other on-disk artifact it can create, so the catalogue stays a complete inventory even for data agentgrep never searches. what: - Add _PI_OBSERVED_AT and the _PI_STORES tuple: pi.sessions (PRIMARY_CHAT, searched, two discovery specs for the default nested layout and the flat PI_CODING_AGENT_SESSION_DIR override) plus nine documentary rows (settings, auth [PRIVATE credentials], models, themes, tools, bin, prompts, debug_log, extensions_npm). - Splice _PI_STORES into CATALOG, bump catalog_version to 10, and advance captured_at to the pi observation date. - Register pi.sessions_jsonl.v1 in the MCP KNOWN_ADAPTERS tuple. - Add "pi" to the test-side KNOWN_AGENTS so the catalogue invariants cover the new rows. --- src/agentgrep/mcp/_library.py | 1 + src/agentgrep/store_catalog.py | 194 ++++++++++++++++++++++++++++++++- tests/test_stores.py | 2 + 3 files changed, 195 insertions(+), 2 deletions(-) diff --git a/src/agentgrep/mcp/_library.py b/src/agentgrep/mcp/_library.py index c437b56..8c3f5cf 100644 --- a/src/agentgrep/mcp/_library.py +++ b/src/agentgrep/mcp/_library.py @@ -77,6 +77,7 @@ "grok.prompt_history_jsonl.v1", "grok.sessions_jsonl.v1", "grok.session_search_sqlite.v1", + "pi.sessions_jsonl.v1", ) READONLY_TAGS = {"readonly", "agentgrep"} RESOURCE_ANNOTATIONS = {"readOnlyHint": True, "idempotentHint": True} diff --git a/src/agentgrep/store_catalog.py b/src/agentgrep/store_catalog.py index ea3b9b8..b28be22 100644 --- a/src/agentgrep/store_catalog.py +++ b/src/agentgrep/store_catalog.py @@ -33,6 +33,7 @@ _GROK_OBSERVED_AT = datetime.date(2026, 5, 25) _CLAUDE_HISTORY_OBSERVED_AT = datetime.date(2026, 5, 29) _CURSOR_CONFIG_OBSERVED_AT = datetime.date(2026, 5, 30) +_PI_OBSERVED_AT = datetime.date(2026, 5, 30) def gemini_project_hash(project_root: pathlib.Path) -> str: @@ -2756,9 +2757,197 @@ def gemini_project_hash(project_root: pathlib.Path) -> str: ) +_PI_STORES: tuple[StoreDescriptor, ...] = ( + StoreDescriptor( + agent="pi", + store_id="pi.sessions", + role=StoreRole.PRIMARY_CHAT, + format=StoreFormat.JSONL, + path_pattern=( + "${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/sessions/" + "----/_.jsonl" + ), + env_overrides=("PI_CODING_AGENT_DIR", "PI_CODING_AGENT_SESSION_DIR"), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + upstream_ref=( + "github.com/earendil-works/pi@v0.78.0/packages/coding-agent/" + "src/core/session-manager.ts#L51-L54" + ), + schema_notes=( + "Append-only JSONL transcript, one file per session, grouped by " + "working directory (`----`, leading slash stripped, " + '`/ \\ :` -> `-`). Line 1 is a `type:"session"` header (`id`, ' + "`timestamp`, `cwd`; `version` is 3 and may be absent in v1 files). " + "Each later line is a SessionEntry tagged union sharing " + "`id`/`parentId`/`timestamp`: `message` wraps an LLM message " + "(`role` user/assistant/toolResult, `content` string or " + "content-blocks; assistant turns carry `model`/`provider`); " + "`compaction`/`branch_summary` carry a `summary`; `session_info` " + "carries a user-set `name`. No separate prompt-history log or " + "SQLite index exists." + ), + sample_record=( + '{"type":"message","id":"...","parentId":"...",' + '"timestamp":"2026-05-30T18:23:54.003Z","message":{"role":"user",' + '"content":[{"type":"text","text":""}],' + '"timestamp":1780165434002}}' + ), + search_by_default=True, + search_notes=( + "The sole searchable pi store. User turns surface as prompts and " + "assistant/tool turns as history via the shared role->kind mapping; " + "compaction/branch summaries and session names are included as " + "history text." + ), + discovery=( + DiscoverySpec( + store="pi.sessions", + adapter_id="pi.sessions_jsonl.v1", + path_kind="session_file", + source_kind="jsonl", + root_key="default", + home_subpath=("sessions",), + glob="*.jsonl", + ), + DiscoverySpec( + store="pi.sessions", + adapter_id="pi.sessions_jsonl.v1", + path_kind="session_file", + source_kind="jsonl", + root_key="pi_session", + glob="*.jsonl", + ), + ), + ), + StoreDescriptor( + agent="pi", + store_id="pi.settings", + role=StoreRole.APP_STATE, + format=StoreFormat.JSON_OBJECT, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/settings.json", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes=( + "User preferences: selected models, themes, installed extension " + "`packages`, and assorted UI/agent settings. Configuration, not " + "chat content." + ), + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.auth", + role=StoreRole.APP_STATE, + format=StoreFormat.JSON_OBJECT, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/auth.json", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes="Provider API credentials. Documented but never enumerated.", + coverage=StoreCoverage.PRIVATE, + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.models", + role=StoreRole.APP_STATE, + format=StoreFormat.JSON_OBJECT, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/models.json", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes=( + "Custom model definitions and provider overrides. Created only " + "when the user adds custom models." + ), + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.themes", + role=StoreRole.APP_STATE, + format=StoreFormat.JSON_OBJECT, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/themes/.json", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes="User-defined TUI colour schemes. Created only when the user adds themes.", + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.tools", + role=StoreRole.APP_STATE, + format=StoreFormat.OPAQUE, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/tools/", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes="Directory of user-authored custom tool scripts. Created on demand.", + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.bin", + role=StoreRole.APP_STATE, + format=StoreFormat.OPAQUE, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/bin/", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes="Managed binaries (e.g. `fd`, `rg`) pi downloads for its own use.", + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.prompts", + role=StoreRole.INSTRUCTION, + format=StoreFormat.MARKDOWN_FRONTMATTER, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/prompts/.md", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes=( + "User-authored Markdown prompt templates, not conversation history. Created on demand." + ), + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.debug_log", + role=StoreRole.APP_STATE, + format=StoreFormat.TEXT, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/pi-debug.log", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes="Runtime diagnostics log. Written only when debug logging is enabled.", + search_by_default=False, + ), + StoreDescriptor( + agent="pi", + store_id="pi.extensions_npm", + role=StoreRole.APP_STATE, + format=StoreFormat.OPAQUE, + path_pattern="${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/npm/", + env_overrides=("PI_CODING_AGENT_DIR",), + observed_version="pi v0.78.0 (observed 2026-05-30)", + observed_at=_PI_OBSERVED_AT, + schema_notes=( + "Managed npm extension install root: `package.json`, " + "`package-lock.json`, and `node_modules/`. Declared via the " + "`packages` array in pi.settings." + ), + search_by_default=False, + ), +) + + CATALOG = StoreCatalog( - catalog_version=11, - captured_at=_CLAUDE_HISTORY_OBSERVED_AT, + catalog_version=12, + captured_at=_PI_OBSERVED_AT, stores=( *_CLAUDE_STORES, *_CURSOR_CLI_STORES, @@ -2766,6 +2955,7 @@ def gemini_project_hash(project_root: pathlib.Path) -> str: *_CODEX_STORES, *_GEMINI_STORES, *_GROK_STORES, + *_PI_STORES, ), ) """The canonical agentgrep store catalogue. diff --git a/tests/test_stores.py b/tests/test_stores.py index 0167b5c..d5efb35 100644 --- a/tests/test_stores.py +++ b/tests/test_stores.py @@ -31,6 +31,7 @@ "codex", "gemini", "grok", + "pi", ) PATH_TOKEN_RE = re.compile(r"\$\{(?:HOME|[A-Z][A-Z0-9_]*)(?:\s+or\s+[^}]+)?\}") @@ -292,6 +293,7 @@ def test_runtime_adapter_ids_match_catalogue_discovery() -> None: assert "grok.prompt_history_jsonl.v1" in runtime_adapter_ids assert "grok.sessions_jsonl.v1" in runtime_adapter_ids assert "grok.session_search_sqlite.v1" in runtime_adapter_ids + assert "pi.sessions_jsonl.v1" in runtime_adapter_ids # No catalogue row claims an adapter id the MCP capabilities # tuple doesn't advertise. From 2806124315de31ccd789f75b968f55726748f997 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:14:02 -0500 Subject: [PATCH 3/7] agentgrep(feat[pi]): Discover pi session sources why: With pi in the catalogue, discovery needs to resolve pi's data directory and enumerate its session files. pi diverges from the other backends in two ways: PI_CODING_AGENT_DIR already includes the agent segment (used verbatim, default ~/.pi/agent), and the optional PI_CODING_AGENT_SESSION_DIR points at the sessions directory directly, where files land flat with no per-cwd subdirectory. what: - Add discover_pi_sources, resolving the agent dir via resolve_env_root and the session dir via _resolve_optional_root, then handing both to discover_from_catalog as named roots ("default", "pi_session") so the nested and flat layouts are both covered. Reuses the Codex/Cursor multi-root pattern; per-descriptor dedup collapses the roots when the override is unset. - Wire the "pi" branch into discover_sources. --- src/agentgrep/__init__.py | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index 35b0661..f83e314 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -2333,6 +2333,14 @@ def discover_sources( include_non_default=include_non_default, ), ) + elif agent == "pi": + discovered.extend( + discover_pi_sources( + home, + backends, + include_non_default=include_non_default, + ), + ) discovered.sort(key=lambda item: (item.agent, item.store, str(item.path))) return discovered @@ -3253,6 +3261,43 @@ def discover_grok_sources( ) +def discover_pi_sources( + home: pathlib.Path, + backends: BackendSelection, + *, + include_non_default: bool = False, +) -> list[SourceHandle]: + """Discover pi (earendil-works/pi) session transcripts. + + Honours ``PI_CODING_AGENT_DIR`` (pi's agent data directory, used + verbatim) and falls back to ``${HOME}/.pi/agent``. The optional + ``PI_CODING_AGENT_SESSION_DIR`` overrides the sessions directory + directly: when set, pi writes session files flat into it with no + per-working-directory subdirectory, so it is resolved as a separate + discovery root. Path roots, globs, and adapter metadata come from + the ``pi.*`` rows of :data:`agentgrep.store_catalog.CATALOG`. + """ + agent_dir = resolve_env_root("PI_CODING_AGENT_DIR", home / ".pi" / "agent") + session_dir = _resolve_optional_root( + os.environ.get("PI_CODING_AGENT_SESSION_DIR"), + agent_dir / "sessions", + label="PI_CODING_AGENT_SESSION_DIR", + ) + if not agent_dir.exists() and not session_dir.exists(): + return [] + roots: dict[str, DiscoveryRoot] = { + "default": agent_dir, + "pi_session": session_dir, + } + return discover_from_catalog( + home, + "pi", + roots, + backends, + include_non_default=include_non_default, + ) + + def list_files_matching( root: pathlib.Path, glob_pattern: str, From 53e6a86367ea3db729bca0096534ba3d52f4b552 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:17:27 -0500 Subject: [PATCH 4/7] agentgrep(feat[pi]): Parse pi session JSONL transcripts why: Discovered pi sessions need an adapter to turn their tagged-union entries into normalized search records. pi keeps everything in one append-only JSONL file, so the parser must walk the entry tree and lift the conversation turns, summaries, and session name while ignoring metadata-only entries. what: - Add parse_pi_session_file plus _pi_message_candidate and _pi_entry_text helpers. message entries become MessageCandidates fed through build_search_record so user turns map to prompts and the rest to history; compaction/branch_summary summaries and session_info names are emitted as history text; model/thinking/custom/label entries are skipped. - Prefer the entry-level ISO timestamp, falling back to the inner unix-milliseconds message timestamp for v1 entries; capture the session id and cwd from the header for record metadata. - Dispatch pi.sessions_jsonl.v1 in iter_source_records and register it in ITER_SOURCE_RECORD_ADAPTERS. --- src/agentgrep/__init__.py | 103 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index f83e314..e244f81 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -173,6 +173,7 @@ "grok.prompt_history_jsonl.v1", "grok.session_search_sqlite.v1", "grok.sessions_jsonl.v1", + "pi.sessions_jsonl.v1", }, ) EnvelopeFactory = t.Callable[[str, dict[str, object], list[dict[str, object]]], dict[str, object]] @@ -3826,6 +3827,9 @@ def iter_source_records( if source.adapter_id == "grok.session_search_sqlite.v1": yield from parse_grok_session_search_db(source) return + if source.adapter_id == "pi.sessions_jsonl.v1": + yield from parse_pi_session_file(source) + return def parse_codex_session_file( @@ -4731,6 +4735,105 @@ def _unix_millis_to_isoformat(value: object) -> str | None: return None +def _pi_message_candidate( + entry: dict[str, object], + entry_timestamp: str | None, + session_id: str | None, + conversation_id: str | None, +) -> MessageCandidate | None: + """Build a candidate from a pi ``message`` session entry. + + The entry wraps an LLM message under ``message`` (``role`` plus + ``content`` that is a string or content-blocks array). The + entry-level ISO timestamp is preferred; the inner unix-milliseconds + ``timestamp`` is the fallback for v1 entries that lack one. + """ + message = entry.get("message") + if not isinstance(message, dict): + return None + message_map = t.cast("dict[str, object]", message) + role = as_optional_str(message_map.get("role")) + text = flatten_content_value(t.cast("JSONValue | None", message_map.get("content"))) + if role is None or not text: + return None + timestamp = entry_timestamp or _unix_millis_to_isoformat(message_map.get("timestamp")) + return MessageCandidate( + role=role, + text=text, + timestamp=timestamp, + model=as_optional_str(message_map.get("model")), + session_id=session_id, + conversation_id=conversation_id, + ) + + +def _pi_entry_text(entry_type: str, entry: dict[str, object]) -> str | None: + """Return searchable text from a non-message pi session entry. + + ``compaction``/``branch_summary`` carry a ``summary``; ``session_info`` + carries a user-set ``name``. Other entry types (model/thinking-level + changes, custom, label) are metadata-only and yield no text. + """ + if entry_type in {"compaction", "branch_summary"}: + return as_optional_str(entry.get("summary")) + if entry_type == "session_info": + return as_optional_str(entry.get("name")) + return None + + +def parse_pi_session_file( + source: SourceHandle, +) -> cabc.Iterator[SearchRecord]: + """Parse a pi (earendil-works/pi) session JSONL transcript. + + Line 1 is a ``type:"session"`` header (capturing ``id``/``cwd``); + ``version`` may be absent in v1 files. Each later line is a + ``SessionEntry`` tagged union. ``message`` entries become candidates + whose role drives the prompt/history split (user turns are prompts); + ``compaction``/``branch_summary`` summaries and ``session_info`` names + are emitted as history text. Metadata-only entries are skipped. + """ + session_id: str | None = source.path.stem + conversation_id: str | None = None + for event in iter_jsonl(source.path): + if not isinstance(event, dict): + continue + mapping = t.cast("dict[str, object]", event) + entry_type = as_optional_str(mapping.get("type")) + if not entry_type: + continue + if entry_type == "session": + session_id = as_optional_str(mapping.get("id")) or session_id + conversation_id = as_optional_str(mapping.get("cwd")) + continue + entry_timestamp = as_optional_str(mapping.get("timestamp")) + if entry_type == "message": + candidate = _pi_message_candidate( + mapping, + entry_timestamp, + session_id, + conversation_id, + ) + if candidate is not None: + yield build_search_record(source, candidate) + continue + text = _pi_entry_text(entry_type, mapping) + if not text: + continue + yield SearchRecord( + kind="history", + agent=source.agent, + store=source.store, + adapter_id=source.adapter_id, + path=source.path, + text=text, + role=entry_type, + timestamp=entry_timestamp, + session_id=session_id, + conversation_id=conversation_id, + ) + + def parse_text_store_file( source: SourceHandle, ) -> cabc.Iterator[SearchRecord]: From bed66300c4a62f927a6e1134cd59f9a7d0fc58d5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:23:46 -0500 Subject: [PATCH 5/7] agentgrep(test[pi]): Cover pi discovery, env overrides, and parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The pi backend needs the same test depth as the Claude and Grok backends — discovery under both layouts, the env overrides, and the per-entry parse behaviour — so regressions surface before release. what: - Add a pi.sessions fixture and register it in PRIMARY_FIXTURES so the catalogue's primary-store invariant covers pi. - Cover PI_CODING_AGENT_DIR (verbatim override) and the flat PI_CODING_AGENT_SESSION_DIR layout (cwd recovered from the header), plus an end-to-end search asserting user->prompt and assistant->history with the model lifted. - Add a NamedTuple + test_id parametrized parse test over every entry type (message roles, compaction/branch summaries, session name, and the skipped metadata-only entries) and a v1 unix-ms timestamp fallback test. --- tests/samples/pi/pi.sessions/example.jsonl | 9 + tests/test_agentgrep.py | 399 ++++++++++++++++++++- tests/test_stores.py | 1 + 3 files changed, 408 insertions(+), 1 deletion(-) create mode 100644 tests/samples/pi/pi.sessions/example.jsonl diff --git a/tests/samples/pi/pi.sessions/example.jsonl b/tests/samples/pi/pi.sessions/example.jsonl new file mode 100644 index 0000000..b4601bd --- /dev/null +++ b/tests/samples/pi/pi.sessions/example.jsonl @@ -0,0 +1,9 @@ +{"type":"session","version":3,"id":"019e0000-0000-7000-8000-000000000001","timestamp":"2026-05-30T12:00:00.000Z","cwd":"/home/user/project"} +{"type":"model_change","id":"m1","parentId":null,"timestamp":"2026-05-30T12:00:01.000Z","provider":"openrouter","modelId":"example/model"} +{"type":"thinking_level_change","id":"tl1","parentId":"m1","timestamp":"2026-05-30T12:00:01.500Z","thinkingLevel":"high"} +{"type":"message","id":"u1","parentId":"tl1","timestamp":"2026-05-30T12:00:02.000Z","message":{"role":"user","content":[{"type":"text","text":"explain the parser design"}],"timestamp":1780228802000}} +{"type":"message","id":"a1","parentId":"u1","timestamp":"2026-05-30T12:00:03.000Z","message":{"role":"assistant","content":[{"type":"text","text":"The parser walks the session tree."}],"provider":"openrouter","model":"example/model","timestamp":1780228803000}} +{"type":"message","id":"t1","parentId":"a1","timestamp":"2026-05-30T12:00:04.000Z","message":{"role":"toolResult","toolName":"read","content":[{"type":"text","text":"file contents here"}],"isError":false,"timestamp":1780228804000}} +{"type":"compaction","id":"c1","parentId":"t1","timestamp":"2026-05-30T12:00:05.000Z","summary":"Compacted earlier discussion about the parser.","firstKeptEntryId":"u1","tokensBefore":1000} +{"type":"branch_summary","id":"b1","parentId":"c1","timestamp":"2026-05-30T12:00:06.000Z","fromId":"u1","summary":"Branch explored an alternate approach."} +{"type":"session_info","id":"s1","parentId":"b1","timestamp":"2026-05-30T12:00:07.000Z","name":"Parser design session"} diff --git a/tests/test_agentgrep.py b/tests/test_agentgrep.py index 14f37b6..c5fbc8c 100644 --- a/tests/test_agentgrep.py +++ b/tests/test_agentgrep.py @@ -27,7 +27,7 @@ if t.TYPE_CHECKING: import collections.abc as cabc -AgentName = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"] +AgentName = t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"] ANSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]") @@ -6332,6 +6332,403 @@ def test_search_grok_session_search_db( assert db_records[0].timestamp.startswith("2026-") +def _pi_session_header( + *, cwd: str = "/home/user/project", version: int | None = 3 +) -> dict[str, object]: + """Build a pi session-header line; ``version=None`` omits the field (v1).""" + header: dict[str, object] = { + "type": "session", + "id": "019e0000-0000-7000-8000-000000000abc", + "timestamp": "2026-05-30T12:00:00.000Z", + "cwd": cwd, + } + if version is not None: + header["version"] = version + return header + + +def _parse_pi_entries( + agentgrep: AgentGrepModule, + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, + entries: list[dict[str, object]], + *, + version: int | None = 3, +) -> list[t.Any]: + """Write a nested pi session of ``entries`` and return its parsed records.""" + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + monkeypatch.delenv("PI_CODING_AGENT_DIR", raising=False) + monkeypatch.delenv("PI_CODING_AGENT_SESSION_DIR", raising=False) + session_file = home / ".pi" / "agent" / "sessions" / "--home-user-project--" / "sess.jsonl" + write_jsonl(session_file, [_pi_session_header(version=version), *entries]) + backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) + sources = t.cast("t.Any", agentgrep).discover_sources(home, ("pi",), backends) + records: list[t.Any] = [] + for source in sources: + if source.store == "pi.sessions": + records.extend(t.cast("t.Any", agentgrep).iter_source_records(source)) + return records + + +def test_discover_pi_sources_honours_pi_coding_agent_dir( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """``PI_CODING_AGENT_DIR`` is used verbatim, overriding ``${HOME}/.pi/agent``.""" + agentgrep = load_agentgrep_module() + decoy_home = tmp_path / "home" + alt_dir = tmp_path / "elsewhere" / "agent" + monkeypatch.setenv("HOME", str(decoy_home)) + monkeypatch.setenv("PI_CODING_AGENT_DIR", str(alt_dir)) + monkeypatch.delenv("PI_CODING_AGENT_SESSION_DIR", raising=False) + decoy = decoy_home / ".pi" / "agent" / "sessions" / "--decoy--" / "d.jsonl" + write_jsonl(decoy, [_pi_session_header(cwd="/decoy")]) + real = alt_dir / "sessions" / "--real--" / "r.jsonl" + write_jsonl(real, [_pi_session_header(cwd="/real")]) + + backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) + sources = t.cast("t.Any", agentgrep).discover_pi_sources(decoy_home, backends) + + paths = {s.path for s in sources} + assert real in paths + assert decoy not in paths + + +def test_discover_pi_sources_session_dir_override_is_flat( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """``PI_CODING_AGENT_SESSION_DIR`` holds session files flat; cwd comes from the header.""" + agentgrep = load_agentgrep_module() + home = tmp_path / "home" + flat_dir = tmp_path / "pi-sessions" + monkeypatch.setenv("HOME", str(home)) + monkeypatch.delenv("PI_CODING_AGENT_DIR", raising=False) + monkeypatch.setenv("PI_CODING_AGENT_SESSION_DIR", str(flat_dir)) + session_file = flat_dir / "2026-05-30T12-00-00-000Z_019e0000-0000-7000-8000-0000000000aa.jsonl" + write_jsonl( + session_file, + [ + _pi_session_header(cwd="/srv/work/app"), + { + "type": "message", + "id": "u1", + "parentId": None, + "timestamp": "2026-05-30T12:00:02.000Z", + "message": { + "role": "user", + "content": "flat layout prompt", + "timestamp": 1780228802000, + }, + }, + ], + ) + + backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) + sources = t.cast("t.Any", agentgrep).discover_sources(home, ("pi",), backends) + pi_sources = [s for s in sources if s.store == "pi.sessions"] + + assert any(s.path == session_file for s in pi_sources) + records: list[t.Any] = [] + for source in pi_sources: + records.extend(t.cast("t.Any", agentgrep).iter_source_records(source)) + assert records, "expected the flat-layout session to parse" + assert records[0].conversation_id == "/srv/work/app" + + +def test_search_pi_sessions( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Pi sessions yield user prompts and assistant history carrying the model.""" + agentgrep = load_agentgrep_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + monkeypatch.delenv("PI_CODING_AGENT_DIR", raising=False) + monkeypatch.delenv("PI_CODING_AGENT_SESSION_DIR", raising=False) + session_file = home / ".pi" / "agent" / "sessions" / "--home-user-proj--" / "sess.jsonl" + write_jsonl( + session_file, + [ + _pi_session_header(cwd="/home/user/proj"), + { + "type": "message", + "id": "u1", + "parentId": None, + "timestamp": "2026-05-30T12:00:02.000Z", + "message": { + "role": "user", + "content": [{"type": "text", "text": "explain the streaming design"}], + "timestamp": 1780228802000, + }, + }, + { + "type": "message", + "id": "a1", + "parentId": "u1", + "timestamp": "2026-05-30T12:00:03.000Z", + "message": { + "role": "assistant", + "content": [{"type": "text", "text": "The streaming design is event-driven."}], + "provider": "openrouter", + "model": "example/model", + "timestamp": 1780228803000, + }, + }, + ], + ) + + backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) + query = t.cast("t.Any", agentgrep).SearchQuery( + terms=("streaming",), + search_type="all", + any_term=False, + regex=False, + case_sensitive=False, + agents=("pi",), + limit=None, + ) + sources = t.cast("t.Any", agentgrep).discover_sources(home, ("pi",), backends) + records = t.cast("t.Any", agentgrep).search_sources(query, sources, backends) + + assert len(records) >= 2, "expected user + assistant records" + by_role = {r.role: r for r in records} + assert by_role["user"].kind == "prompt" + assert by_role["user"].agent == "pi" + assert by_role["user"].conversation_id == "/home/user/proj" + assert by_role["assistant"].kind == "history" + assert by_role["assistant"].model == "example/model" + + +def test_parse_pi_session_v1_uses_unix_ms_timestamp_fallback( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A v1 session (no version) with no entry timestamp falls back to inner unix-ms.""" + agentgrep = load_agentgrep_module() + records = _parse_pi_entries( + agentgrep, + tmp_path, + monkeypatch, + [ + { + "type": "message", + "id": "u1", + "parentId": None, + "message": {"role": "user", "content": "v1 prompt", "timestamp": 1700000000000}, + }, + ], + version=None, + ) + + assert len(records) == 1 + assert records[0].kind == "prompt" + assert records[0].timestamp == "2023-11-14T22:13:20Z" + + +class PiEntryCase(t.NamedTuple): + """Parametrized case for one pi session entry through the parser.""" + + test_id: str + entry: dict[str, object] + expected_count: int + expected_kind: str | None + expected_role: str | None + expected_text_contains: str | None + expected_model: str | None + + +PI_ENTRY_CASES: tuple[PiEntryCase, ...] = ( + PiEntryCase( + "user-message-is-prompt", + { + "type": "message", + "id": "u1", + "timestamp": "2026-05-30T12:00:02.000Z", + "message": {"role": "user", "content": [{"type": "text", "text": "design question"}]}, + }, + 1, + "prompt", + "user", + "design question", + None, + ), + PiEntryCase( + "assistant-message-is-history-with-model", + { + "type": "message", + "id": "a1", + "timestamp": "2026-05-30T12:00:03.000Z", + "message": { + "role": "assistant", + "content": [{"type": "text", "text": "an answer"}], + "model": "example/model", + }, + }, + 1, + "history", + "assistant", + "an answer", + "example/model", + ), + PiEntryCase( + "tool-result-is-history", + { + "type": "message", + "id": "t1", + "timestamp": "2026-05-30T12:00:04.000Z", + "message": { + "role": "toolResult", + "toolName": "read", + "content": [{"type": "text", "text": "tool output"}], + "isError": False, + }, + }, + 1, + "history", + "toolResult", + "tool output", + None, + ), + PiEntryCase( + "compaction-summary-is-history", + { + "type": "compaction", + "id": "c1", + "timestamp": "2026-05-30T12:00:05.000Z", + "summary": "compacted summary text", + }, + 1, + "history", + "compaction", + "compacted summary text", + None, + ), + PiEntryCase( + "branch-summary-is-history", + { + "type": "branch_summary", + "id": "b1", + "timestamp": "2026-05-30T12:00:06.000Z", + "fromId": "u1", + "summary": "branch summary text", + }, + 1, + "history", + "branch_summary", + "branch summary text", + None, + ), + PiEntryCase( + "session-info-name-is-history", + { + "type": "session_info", + "id": "s1", + "timestamp": "2026-05-30T12:00:07.000Z", + "name": "Session title", + }, + 1, + "history", + "session_info", + "Session title", + None, + ), + PiEntryCase( + "model-change-is-skipped", + { + "type": "model_change", + "id": "m1", + "timestamp": "2026-05-30T12:00:01.000Z", + "provider": "openrouter", + "modelId": "example/model", + }, + 0, + None, + None, + None, + None, + ), + PiEntryCase( + "thinking-level-change-is-skipped", + { + "type": "thinking_level_change", + "id": "tl1", + "timestamp": "2026-05-30T12:00:01.500Z", + "thinkingLevel": "high", + }, + 0, + None, + None, + None, + None, + ), + PiEntryCase( + "empty-user-content-is-skipped", + { + "type": "message", + "id": "u2", + "timestamp": "2026-05-30T12:00:02.000Z", + "message": {"role": "user", "content": []}, + }, + 0, + None, + None, + None, + None, + ), + PiEntryCase( + "assistant-thinking-only-is-skipped", + { + "type": "message", + "id": "a2", + "timestamp": "2026-05-30T12:00:03.000Z", + "message": { + "role": "assistant", + "content": [{"type": "thinking", "thinking": "internal reasoning"}], + }, + }, + 0, + None, + None, + None, + None, + ), +) + + +@pytest.mark.parametrize( + PiEntryCase._fields, + PI_ENTRY_CASES, + ids=[case.test_id for case in PI_ENTRY_CASES], +) +def test_parse_pi_session_entry( + test_id: str, + entry: dict[str, object], + expected_count: int, + expected_kind: str | None, + expected_role: str | None, + expected_text_contains: str | None, + expected_model: str | None, + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Each pi session entry type maps to the expected record (or is skipped).""" + _ = test_id + agentgrep = load_agentgrep_module() + records = _parse_pi_entries(agentgrep, tmp_path, monkeypatch, [entry]) + + assert len(records) == expected_count + if expected_count: + record = records[0] + assert record.agent == "pi" + assert record.kind == expected_kind + assert record.role == expected_role + assert record.model == expected_model + if expected_text_contains is not None: + assert expected_text_contains in record.text + + class UnixToIsoCase(t.NamedTuple): """Parametrized case for _unix_to_isoformat edge cases.""" diff --git a/tests/test_stores.py b/tests/test_stores.py index d5efb35..e7c8ced 100644 --- a/tests/test_stores.py +++ b/tests/test_stores.py @@ -706,6 +706,7 @@ def test_descriptor_round_trips_through_json() -> None: ("cursor-cli.prompt_history", "prompt_history.json"), ("grok.prompt_history", "prompt_history.jsonl"), ("grok.sessions", "chat_history.jsonl"), + ("pi.sessions", "example.jsonl"), ) From 3214a7c50e49266134295662eedb1a2f4e2ae866 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:31:32 -0500 Subject: [PATCH 6/7] agentgrep(docs[pi]): Add pi backend page and storage catalogue entry why: Each backend gets a reference page documenting its layout, env overrides, and record schemas, and the support matrix and agent lists must name pi so readers can find it. what: - Add docs/backends/pi.md: base path and both env overrides, the per-working-directory session layout and the flat session-dir override, the pi.sessions record schema, and the documentary stores. - Add a Pi card and toctree entry to the backend index and a Pi section to the storage-catalogue dev page. - Name Pi in the README and docs landing agent lists. - Extend the backend-grid and coverage-grid doc tests to cover pi. --- README.md | 2 +- docs/backends/index.md | 7 +++ docs/backends/pi.md | 72 +++++++++++++++++++++++++++ docs/dev/index.md | 2 +- docs/dev/storage-catalog.md | 28 +++++++++++ docs/getting-started/configuration.md | 2 +- docs/index.md | 2 +- docs/mcp/resources.md | 2 +- docs/tui/index.md | 2 +- tests/test_storage_docs.py | 3 +- tests/test_widgets.py | 2 +- 11 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 docs/backends/pi.md diff --git a/README.md b/README.md index 8eebe6c..31fb228 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) Read-only search for local AI agent prompts and history across Codex, -Claude Code, Cursor, Gemini, and Grok. +Claude Code, Cursor, Gemini, Grok, and Pi. `agentgrep` provides a CLI and an MCP server over the same discovery + parsing layer: diff --git a/docs/backends/index.md b/docs/backends/index.md index 1fc0483..002c805 100644 --- a/docs/backends/index.md +++ b/docs/backends/index.md @@ -47,6 +47,12 @@ Gemini CLI chat sessions, prompt logs, checkpoints, settings, and skills. Grok CLI prompt history, session transcripts, memory, logs, and config. ::: +:::{grid-item-card} Pi +:link: pi +:link-type: doc +Pi (earendil-works) session transcripts, settings, prompts, and managed extensions. +::: + :::: ## Coverage levels @@ -84,4 +90,5 @@ cursor-cli cursor-ide gemini grok +pi ``` diff --git a/docs/backends/pi.md b/docs/backends/pi.md new file mode 100644 index 0000000..af424b0 --- /dev/null +++ b/docs/backends/pi.md @@ -0,0 +1,72 @@ +(backend-pi)= + +# Pi + +Base path: `~/.pi/agent` (env override: `PI_CODING_AGENT_DIR`). + +`observed_version`: `pi v0.78.0` (observed 2026-05-30). + +pi (the earendil-works "Pi Agent Harness") stores each conversation as +one append-only JSONL file under `~/.pi/agent/sessions/`, grouped by +working directory. The directory key is the cwd with its leading slash +stripped and `/`, `\`, and `:` replaced by `-`, wrapped in double +dashes (e.g. `--home-d-work-python-agentgrep--`). Each session file is +named `_.jsonl`. + +Unlike Codex or Grok, pi keeps no separate prompt-history log and no +SQLite session index — the session transcript is the entire searchable +surface, which makes pi the structural twin of the Claude Code backend. + +The optional `PI_CODING_AGENT_SESSION_DIR` override points at the +sessions directory directly. When it is set, pi writes session files +flat into that directory with no per-working-directory subdirectory; +agentgrep then recovers the cwd from each session's header rather than +the directory name. + +## Stores + +```{storage:agent} pi +``` + +## Record schemas + +### pi.sessions + +The first line is a session header; `version` is `3` and may be absent +in older (v1) files. + +```json +{"type": "session", "version": 3, "id": "019e5691-...", + "timestamp": "2026-05-23T20:41:01.417Z", + "cwd": "/home/d/work/python/agentgrep"} +``` + +Every later line is a `SessionEntry` sharing `id` / `parentId` / +`timestamp` (an append-only tree, not a flat list). A `message` entry +wraps an LLM message; `role` is `user`, `assistant`, or `toolResult`, +and `content` is a string or a content-blocks array. Assistant turns +carry `model` and `provider` inline. + +```json +{"type": "message", "id": "...", "parentId": "...", + "timestamp": "2026-05-23T20:41:05.000Z", + "message": {"role": "user", + "content": [{"type": "text", "text": "..."}], + "timestamp": 1779999665000}} +``` + +User turns surface as prompts and assistant / tool turns as history via +the shared role-to-kind mapping. `compaction` and `branch_summary` +entries contribute their `summary` text, and `session_info` contributes +its user-set `name`; `model_change`, `thinking_level_change`, `custom`, +and `label` entries are metadata only. Entry-level timestamps are +ISO-8601; the inner `message.timestamp` is unix-milliseconds and is used +only as a fallback. + +## Documentary stores + +The remaining `pi.*` rows are catalogued for completeness but not +searched: `pi.settings`, `pi.models`, `pi.themes`, `pi.tools`, +`pi.bin`, `pi.prompts`, `pi.debug_log`, and `pi.extensions_npm` (the +managed npm extension install root). `pi.auth` holds provider +credentials and is documented but never enumerated from disk. diff --git a/docs/dev/index.md b/docs/dev/index.md index 45fbffd..4e0d3fa 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -16,7 +16,7 @@ Cross-commit `hyperfine` sweeps across HEAD, trunk, ranges, lookback, tags, or e :::{grid-item-card} Storage catalogue :link: storage-catalog :link-type: doc -On-disk store layouts for Codex, Claude Code, Cursor, Gemini CLI, and Grok CLI — useful for adapter authors and anyone tracing why a record was or wasn't found. +On-disk store layouts for Codex, Claude Code, Cursor, Gemini CLI, Grok CLI, and Pi — useful for adapter authors and anyone tracing why a record was or wasn't found. ::: :::{grid-item-card} Architecture decisions diff --git a/docs/dev/storage-catalog.md b/docs/dev/storage-catalog.md index 52c5835..6a97ef5 100644 --- a/docs/dev/storage-catalog.md +++ b/docs/dev/storage-catalog.md @@ -279,6 +279,34 @@ Documentary-only entries cover events, summaries, memory, logs, worktrees, and config — all catalogued with `search_by_default=False` or deferred. +### Pi + +`observed_version`: ``pi v0.78.0`` (observed 2026-05-30). + +Pi (earendil-works) stores each conversation as one append-only JSONL +file under `${PI_CODING_AGENT_DIR or ${HOME}/.pi/agent}/sessions/`, +grouped by working directory (`----`, leading slash +stripped and `/ \ :` replaced by `-`). It keeps no separate +prompt-history log and no SQLite index, so a single adapter covers the +whole searchable surface: + +- `pi.sessions_jsonl.v1` parses `sessions/----/_.jsonl`. + Line one is a `type:"session"` header (`version` may be absent in v1 + files); each later line is a `SessionEntry` tagged union. `message` + entries carry an LLM message (`role` user / assistant / toolResult, + `content` string or content-blocks; assistant turns carry `model`), + while `compaction` / `branch_summary` summaries and `session_info` + names are emitted as history text. User turns surface as prompts via + the shared role-to-kind mapping. + +Discovery resolves two roots: `PI_CODING_AGENT_DIR` (the agent dir, +default `~/.pi/agent`) and the optional `PI_CODING_AGENT_SESSION_DIR`, +which holds session files flat with the cwd recovered from the header. + +Documentary-only entries cover settings, auth (private credentials), +models, themes, tools, managed binaries, prompt templates, the debug +log, and the npm extension install root. + ## Adding or updating a store 1. Edit `src/agentgrep/store_catalog.py`. Stamp `observed_version` diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 08c4d1d..d6b1e18 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -12,7 +12,7 @@ Use `--agent` one or more times to limit search or discovery: $ uv run agentgrep grep "cache" --agent codex ``` -Supported agents are `codex`, `claude`, `cursor-cli`, `cursor-ide`, `gemini`, and `grok`. Omitting `--agent` searches all supported agents. +Supported agents are `codex`, `claude`, `cursor-cli`, `cursor-ide`, `gemini`, `grok`, and `pi`. Omitting `--agent` searches all supported agents. ## Search type diff --git a/docs/index.md b/docs/index.md index ef0ee6f..f5bb9d5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,7 +2,7 @@ # agentgrep -Read-only search for local AI agent prompts and history across Codex, Claude Code, Cursor, Gemini, and Grok. +Read-only search for local AI agent prompts and history across Codex, Claude Code, Cursor, Gemini, Grok, and Pi. ```{warning} **Pre-alpha.** APIs may change. [Feedback welcome](https://github.com/tony/agentgrep/issues). diff --git a/docs/mcp/resources.md b/docs/mcp/resources.md index 784ff13..af9abb8 100644 --- a/docs/mcp/resources.md +++ b/docs/mcp/resources.md @@ -26,7 +26,7 @@ used to interpret that source. ```{fastmcp-resource-template} agentgrep_sources_by_agent ``` -Read `agentgrep://sources/codex`, `agentgrep://sources/claude`, `agentgrep://sources/cursor-cli`, `agentgrep://sources/cursor-ide`, `agentgrep://sources/gemini`, or `agentgrep://sources/grok` to filter discovery by agent. +Read `agentgrep://sources/codex`, `agentgrep://sources/claude`, `agentgrep://sources/cursor-cli`, `agentgrep://sources/cursor-ide`, `agentgrep://sources/gemini`, `agentgrep://sources/grok`, or `agentgrep://sources/pi` to filter discovery by agent. ## Store catalog diff --git a/docs/tui/index.md b/docs/tui/index.md index a18fb84..f29955a 100644 --- a/docs/tui/index.md +++ b/docs/tui/index.md @@ -3,7 +3,7 @@ # TUI The `agentgrep ui` command launches the interactive Textual explorer -over the same Codex, Claude Code, Cursor, Gemini, and Grok stores the rest +over the same Codex, Claude Code, Cursor, Gemini, Grok, and Pi stores the rest of the CLI walks. It is read-only — agentgrep never mutates the source stores. Bare `agentgrep` prints the directory of choices, so the explorer always needs the explicit `ui` subcommand. diff --git a/tests/test_storage_docs.py b/tests/test_storage_docs.py index 6b664f6..19af15d 100644 --- a/tests/test_storage_docs.py +++ b/tests/test_storage_docs.py @@ -144,6 +144,7 @@ def test_storage_coverage_grid_summarizes_catalog(tmp_path: pathlib.Path) -> Non cursor-ide gemini grok + pi ``` ```{storage:coverage-grid} @@ -152,7 +153,7 @@ def test_storage_coverage_grid_summarizes_catalog(tmp_path: pathlib.Path) -> Non ), encoding="utf-8", ) - for agent in ("claude", "codex", "cursor-cli", "cursor-ide", "gemini", "grok"): + for agent in ("claude", "codex", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"): (srcdir / f"{agent}.md").write_text( textwrap.dedent( f"""\ diff --git a/tests/test_widgets.py b/tests/test_widgets.py index f37afc5..eef31e8 100644 --- a/tests/test_widgets.py +++ b/tests/test_widgets.py @@ -149,5 +149,5 @@ def test_backend_index_renders_backend_shortcut_grid(tmp_path: pathlib.Path) -> backend_index = (tmp_path / "backends" / "index.html").read_text(encoding="utf-8") assert "Backend pages" in backend_index assert backend_index.index("Backend pages") < backend_index.index("Coverage levels") - for backend in ("codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok"): + for backend in ("codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi"): assert f'href="{backend}/"' in backend_index From 27ea893eb88d16af9cb446a47ca83566bc474c8b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 30 May 2026 14:36:31 -0500 Subject: [PATCH 7/7] docs(CHANGES) Add pi backend why: Record the new pi backend for the unreleased version so readers know agentgrep now searches earendil-works/pi. what: - Add a "Pi backend (#25)" deliverable under What's new for the unreleased 0.1.0a8 section, describing the single JSONL session store, the two env overrides, and the catalogued documentary stores. --- CHANGES | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGES b/CHANGES index 0eb2aa9..223171c 100644 --- a/CHANGES +++ b/CHANGES @@ -42,6 +42,24 @@ $ uvx --from 'agentgrep' --prerelease allow python +### What's new + +#### Pi backend (#28) + +agentgrep now searches [Pi](https://github.com/earendil-works/pi) (the +earendil-works "Pi Agent Harness") as a new backend. Pi keeps each +conversation as one append-only JSONL session file under +`~/.pi/agent/sessions/`, grouped by working directory, with no separate +prompt-history log or SQLite index — so user prompts surface as prompts +and assistant and tool turns as history straight from the transcript. +Discovery honours `PI_CODING_AGENT_DIR` and the flat +`PI_CODING_AGENT_SESSION_DIR` override (where the working directory is +recovered from the session header), and every other on-disk pi store — +settings, models, themes, tools, managed binaries, prompt templates, +the debug log, and the npm extension root — is catalogued for +completeness, with `auth.json` documented but never indexed. See +{doc}`/backends/pi` for details. + ## agentgrep 0.1.0a11 (2026-05-31) agentgrep 0.1.0a11 splits the Cursor backend into two agents —