From 505bc0bc7990b12253ab414540d7b53453961b2b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 31 May 2026 12:16:14 -0500 Subject: [PATCH 1/5] agentgrep(feat[scope]): Add explicit conversation search scope why: Prompt-history stores should participate in the default prompt search, while full conversation transcripts remain an explicit opt-in surface. Naming that boundary as scope gives CLI, query, and MCP callers a single user-facing selector. what: - Treat dedicated prompt-history records as prompts and classify conversation scope by store role. - Replace search and grep --type with --scope for prompt, conversation, and all searches. - Rename MCP search inputs and capability metadata to expose scope, with tests covering the new defaults. --- src/agentgrep/__init__.py | 52 +++++++++++++++++++------ src/agentgrep/cli/parser.py | 18 ++++----- src/agentgrep/mcp/_library.py | 2 +- src/agentgrep/mcp/models.py | 6 +-- src/agentgrep/mcp/prompts.py | 16 ++++---- src/agentgrep/mcp/resources.py | 4 +- src/agentgrep/mcp/tools/search_tools.py | 10 ++--- src/agentgrep/query/compile.py | 10 +++-- src/agentgrep/query/registry.py | 6 +-- tests/test_agentgrep.py | 20 +++++----- tests/test_agentgrep_mcp.py | 29 ++++++++++++-- tests/test_cli_grep.py | 17 ++++++++ tests/test_cli_search.py | 25 ++++++------ tests/test_query_compile.py | 42 +++++++++++++------- tests/test_query_engine.py | 33 +++++++--------- tests/test_query_ui.py | 4 +- 16 files changed, 186 insertions(+), 108 deletions(-) diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index 2fbfcd3..a4272de 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -36,6 +36,7 @@ import contextlib import dataclasses import datetime +import functools import importlib import itertools import json @@ -67,6 +68,7 @@ SourceKind, StoreCoverage, StoreDescriptor, + StoreRole, VersionDetectionConfidence, VersionDetectionStrategy, ) @@ -88,7 +90,7 @@ ] OutputMode = t.Literal["text", "json", "ndjson", "ui"] ProgressMode = t.Literal["auto", "always", "never"] -SearchType = t.Literal["prompts", "history", "all"] +SearchType = t.Literal["prompts", "conversations", "all"] ColorMode = t.Literal["auto", "always", "never"] GrepStyle = t.Literal["default", "pretty"] type JSONScalar = str | int | float | bool | None @@ -185,6 +187,7 @@ OPTIONS_EXPECTING_VALUE: frozenset[str] = frozenset( { "--agent", + "--scope", "--type", "--limit", "--color", @@ -239,7 +242,7 @@ def build_description( ( "agentgrep grep bliss", "agentgrep grep -i 'serene bliss'", - "agentgrep grep -F --type history TODO", + "agentgrep grep -F --scope conversations TODO", "agentgrep grep --json design", ), ), @@ -322,7 +325,7 @@ def build_description( ( "agentgrep grep bliss", "agentgrep grep -i 'serene bliss'", - "agentgrep grep -F --type history TODO", + "agentgrep grep -F --scope conversations TODO", "agentgrep grep --json design", "agentgrep grep --vimgrep --no-dedupe foo", ), @@ -4009,7 +4012,7 @@ def parse_codex_history_file( .replace("+00:00", "Z") ) yield SearchRecord( - kind="history", + kind="prompt", agent=source.agent, store=source.store, adapter_id=source.adapter_id, @@ -4407,7 +4410,7 @@ def parse_claude_history_file( continue session_id = as_optional_str(mapping.get("sessionId")) yield SearchRecord( - kind="history", + kind="prompt", agent=source.agent, store=source.store, adapter_id=source.adapter_id, @@ -4667,7 +4670,7 @@ def parse_gemini_logs_file( ) -> cabc.Iterator[SearchRecord]: """Parse a Gemini CLI ``logs.json`` file (flat JSON array of LogEntry). - Records are emitted as ``kind="history"`` — the file is an audit log of + Records are emitted as ``kind="prompt"`` — the file is an audit log of user prompts, the same role ``codex.history`` plays for Codex. """ payload = read_json_file(source.path) @@ -4681,7 +4684,7 @@ def parse_gemini_logs_file( continue session_id = as_optional_str(mapping.get("sessionId")) yield SearchRecord( - kind="history", + kind="prompt", agent=source.agent, store=source.store, adapter_id=source.adapter_id, @@ -4713,7 +4716,7 @@ def parse_grok_prompt_history( continue session_id = as_optional_str(mapping.get("session_id")) yield SearchRecord( - kind="history", + kind="prompt", agent=source.agent, store=source.store, adapter_id=source.adapter_id, @@ -5538,7 +5541,7 @@ def parse_cursor_prompt_history( continue seen.add(prompt) yield SearchRecord( - kind="history", + kind="prompt", agent=source.agent, store=source.store, adapter_id=source.adapter_id, @@ -6299,6 +6302,33 @@ def build_search_record(source: SourceHandle, candidate: MessageCandidate) -> Se ) +CONVERSATION_STORE_ROLES: frozenset[StoreRole] = frozenset( + {StoreRole.PRIMARY_CHAT, StoreRole.SUPPLEMENTARY_CHAT}, +) + + +@functools.cache +def store_role_for_record(store: str, adapter_id: str) -> StoreRole | None: + """Return the catalog role for a normalized record's source store.""" + from agentgrep.store_catalog import CATALOG + + for descriptor in CATALOG.stores: + for spec in descriptor.discovery: + if spec.store == store and spec.adapter_id == adapter_id: + return descriptor.role + return None + + +def record_matches_scope(record: SearchRecord, scope: SearchType) -> bool: + """Return whether ``record`` belongs to the requested search scope.""" + if scope == "all": + return True + if scope == "prompts": + return record.kind == "prompt" + role = store_role_for_record(record.store, record.adapter_id) + return role in CONVERSATION_STORE_ROLES + + def matches_record(record: SearchRecord, query: SearchQuery) -> bool: """Return whether a normalized record should be included. @@ -6307,9 +6337,7 @@ def matches_record(record: SearchRecord, query: SearchQuery) -> bool: checks. Pure-text queries skip the predicate evaluation since the compiler leaves ``compiled = None`` for them. """ - if query.search_type == "prompts" and record.kind != "prompt": - return False - if query.search_type == "history" and record.kind != "history": + if not record_matches_scope(record, query.search_type): return False if not matches_text(build_search_haystack(record), query): return False diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py index b642757..7d4ac23 100644 --- a/src/agentgrep/cli/parser.py +++ b/src/agentgrep/cli/parser.py @@ -348,10 +348,10 @@ def create_parser( help="Show column numbers in output (implies -n)", ) _ = grep_parser.add_argument( - "--type", - choices=["prompts", "history", "all"], + "--scope", + choices=["prompts", "conversations", "all"], dest="search_type", - help="Record type to search (default: prompts)", + help="Search scope: prompts, conversations, or all (default: prompts)", ) _ = grep_parser.add_argument( "--progress", @@ -513,10 +513,10 @@ def create_parser( help="Search terms (combined as AND by default)", ) _ = search_parser.add_argument( - "--type", - choices=["prompts", "history", "all"], + "--scope", + choices=["prompts", "conversations", "all"], dest="search_type", - help="Record type to search (default: prompts)", + help="Search scope: prompts, conversations, or all (default: prompts)", ) _ = search_parser.add_argument( "--case-sensitive", @@ -587,7 +587,7 @@ def _search_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]: if t.cast("list[str]", namespace.agent): flags["agent"] = "--agent" if t.cast("str | None", namespace.search_type) is not None: - flags["type"] = "--type" + flags["scope"] = "--scope" return flags @@ -597,7 +597,7 @@ def _grep_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]: if t.cast("list[str]", namespace.agent): flags["agent"] = "--agent" if t.cast("str | None", namespace.search_type) is not None: - flags["type"] = "--type" + flags["scope"] = "--scope" return flags @@ -620,7 +620,7 @@ def _effective_search_type( explicit = t.cast("SearchType | None", namespace.search_type) if explicit is not None: return explicit - if "type" in query_fields: + if "scope" in query_fields: return "all" return "prompts" diff --git a/src/agentgrep/mcp/_library.py b/src/agentgrep/mcp/_library.py index 1392654..92886d1 100644 --- a/src/agentgrep/mcp/_library.py +++ b/src/agentgrep/mcp/_library.py @@ -19,7 +19,7 @@ AgentSelector = t.Literal[ "codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi", "opencode", "all" ] -SearchTypeName = t.Literal["prompts", "history", "all"] +SearchTypeName = t.Literal["prompts", "conversations", "all"] SERVER_VERSION = "0.1.0" KNOWN_ADAPTERS: tuple[str, ...] = ( diff --git a/src/agentgrep/mcp/models.py b/src/agentgrep/mcp/models.py index ecfb1d0..f3e6f3e 100644 --- a/src/agentgrep/mcp/models.py +++ b/src/agentgrep/mcp/models.py @@ -112,7 +112,7 @@ class SearchToolQuery(AgentGrepModel): terms: list[str] agent: AgentSelector - search_type: SearchTypeName + scope: SearchTypeName case_sensitive: bool limit: int | None = None @@ -159,7 +159,7 @@ class CapabilitiesModel(AgentGrepModel): agents: list[ t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi", "opencode"] ] - search_types: list[SearchTypeName] + search_scopes: list[SearchTypeName] adapters: list[str] tools: list[str] resources: list[str] @@ -175,7 +175,7 @@ class SearchRequestModel(AgentGrepModel): terms: list[str] agent: AgentSelector - search_type: SearchTypeName + scope: SearchTypeName case_sensitive: bool limit: int | None = None diff --git a/src/agentgrep/mcp/prompts.py b/src/agentgrep/mcp/prompts.py index b58fdd7..2227f17 100644 --- a/src/agentgrep/mcp/prompts.py +++ b/src/agentgrep/mcp/prompts.py @@ -19,25 +19,25 @@ def register_prompts(mcp: FastMCP) -> None: def search_prompts_prompt(topic: str, agent: str = "all") -> str: return ( "Use the `search` tool to find full user prompts about " - f"{topic!r}. Search `prompts` only, keep newest-first ordering, " + f"{topic!r}. Search scope `prompts` only, keep newest-first ordering, " f"and limit the search to agent={agent!r} if requested." ) _ = search_prompts_prompt @mcp.prompt( - name="search_history", - description="Guide the client to search assistant or command history records.", - tags={"search", "history", "readonly"}, + name="search_conversations", + description="Guide the client to search full conversation/session records.", + tags={"search", "conversations", "readonly"}, ) - def search_history_prompt(topic: str, agent: str = "all") -> str: + def search_conversations_prompt(topic: str, agent: str = "all") -> str: return ( - "Use the `search` tool to find matching history records about " - f"{topic!r}. Search `history` only, and restrict to " + "Use the `search` tool to find matching conversation records about " + f"{topic!r}. Search scope `conversations` only, and restrict to " f"agent={agent!r} when appropriate." ) - _ = search_history_prompt + _ = search_conversations_prompt @mcp.prompt( name="inspect_stores", diff --git a/src/agentgrep/mcp/resources.py b/src/agentgrep/mcp/resources.py index 3f39f9c..771c5e9 100644 --- a/src/agentgrep/mcp/resources.py +++ b/src/agentgrep/mcp/resources.py @@ -72,7 +72,7 @@ def build_capabilities() -> CapabilitiesModel: backends = agentgrep.select_backends() return CapabilitiesModel( agents=list(agentgrep.AGENT_CHOICES), - search_types=["prompts", "history", "all"], + search_scopes=["prompts", "conversations", "all"], adapters=list(KNOWN_ADAPTERS), tools=[ "search", @@ -94,7 +94,7 @@ def build_capabilities() -> CapabilitiesModel: "agentgrep://store-roles", "agentgrep://store-formats", ], - prompts=["search_prompts", "search_history", "inspect_stores"], + prompts=["search_prompts", "search_conversations", "inspect_stores"], backends=BackendAvailabilityModel( find_tool=backends.find_tool, grep_tool=backends.grep_tool, diff --git a/src/agentgrep/mcp/tools/search_tools.py b/src/agentgrep/mcp/tools/search_tools.py index 23c6929..f35bdf9 100644 --- a/src/agentgrep/mcp/tools/search_tools.py +++ b/src/agentgrep/mcp/tools/search_tools.py @@ -35,7 +35,7 @@ def _search_sync(request: SearchRequestModel) -> SearchToolResponse: """Run the blocking search work and build a typed response.""" query = agentgrep.SearchQuery( terms=tuple(request.terms), - search_type=request.search_type, + search_type=request.scope, any_term=False, regex=False, case_sensitive=request.case_sensitive, @@ -47,7 +47,7 @@ def _search_sync(request: SearchRequestModel) -> SearchToolResponse: query=SearchToolQuery( terms=request.terms, agent=request.agent, - search_type=request.search_type, + scope=request.scope, case_sensitive=request.case_sensitive, limit=request.limit, ), @@ -98,9 +98,9 @@ async def search_tool( AgentSelector, Field(description="Limit search to one agent or search all agents."), ] = "all", - search_type: t.Annotated[ + scope: t.Annotated[ SearchTypeName, - Field(description="Search prompts, history, or both."), + Field(description="Search prompts, conversations, or both."), ] = "prompts", case_sensitive: t.Annotated[ bool, @@ -118,7 +118,7 @@ async def search_tool( request = SearchRequestModel( terms=terms, agent=agent, - search_type=search_type, + scope=scope, case_sensitive=case_sensitive, limit=limit, ) diff --git a/src/agentgrep/query/compile.py b/src/agentgrep/query/compile.py index 1225e8f..18ace6f 100644 --- a/src/agentgrep/query/compile.py +++ b/src/agentgrep/query/compile.py @@ -144,7 +144,7 @@ def _validate_ast(node: QueryNode, registry: FieldRegistry) -> None: - **comparison against non-comparable field**: e.g. ``agent:>codex`` (the agent enum doesn't support comparison). - **range against non-range field**: e.g. - ``type:[prompts TO history]``. + ``scope:[prompts TO conversations]``. The walk is O(nodes) and runs once before the closures are built; the closures themselves keep their defensive raises so @@ -491,9 +491,11 @@ def _field_matches_record( if spec.layer == "source": # Source-level fields can be read off the record too. return _field_matches_record_via_source(node, record, spec) - if spec.name == "type": - target = "prompt" if node.value == "prompts" else "history" - return record.kind == target + if spec.name == "scope": + return agentgrep.record_matches_scope( + record, + t.cast("agentgrep.SearchType", node.value), + ) if spec.name == "timestamp": return _date_predicate_matches( node, diff --git a/src/agentgrep/query/registry.py b/src/agentgrep/query/registry.py index 3883460..b52e89a 100644 --- a/src/agentgrep/query/registry.py +++ b/src/agentgrep/query/registry.py @@ -106,7 +106,7 @@ def default_registry() -> FieldRegistry: ``adapter`` string source Alias of ``adapter_id`` ``path`` path source Glob against the file basename by default ``mtime`` date source File mtime; supports comparison + range - ``type`` enum record Values: prompts, history + ``scope`` enum record Values: prompts, conversations, all ``timestamp`` date record Record timestamp; comparison + range ``model`` string record Substring against ``record.model`` ``role`` string record Substring against ``record.role`` @@ -145,10 +145,10 @@ def default_registry() -> FieldRegistry: supports_range=True, ), FieldSpec( - name="type", + name="scope", kind="enum", layer="record", - enum_values=("prompts", "history"), + enum_values=("prompts", "conversations", "all"), ), FieldSpec( name="timestamp", diff --git a/tests/test_agentgrep.py b/tests/test_agentgrep.py index 1cccfbc..9d008d4 100644 --- a/tests/test_agentgrep.py +++ b/tests/test_agentgrep.py @@ -3006,7 +3006,7 @@ def test_search_codex_history_json_returns_history_record( query = agentgrep.SearchQuery( terms=("serenity",), - search_type="history", + search_type="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3021,7 +3021,7 @@ def test_search_codex_history_json_returns_history_record( records = agentgrep.search_sources(query, sources, agentgrep.BackendSelection(None, None, None)) assert len(records) == 1 - assert records[0].kind == "history" + assert records[0].kind == "prompt" assert records[0].text == "serenity command example" @@ -3071,7 +3071,7 @@ def test_cursor_ai_tracking_summary_is_exposed_as_history( query = agentgrep.SearchQuery( terms=("serenity", "bliss"), - search_type="history", + search_type="conversations", any_term=False, regex=False, case_sensitive=False, @@ -4294,7 +4294,7 @@ def test_search_codex_history_jsonl_uses_modern_text_schema( backends = agentgrep.BackendSelection(None, None, None) query = agentgrep.SearchQuery( terms=("modern",), - search_type="history", + search_type="prompts", any_term=False, regex=False, case_sensitive=False, @@ -5519,7 +5519,7 @@ def test_search_claude_history_expands_external_pasted_text( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("bliss",), - search_type="history", + search_type="prompts", any_term=False, regex=False, case_sensitive=False, @@ -5534,7 +5534,7 @@ def test_search_claude_history_expands_external_pasted_text( assert record.agent == "claude" assert record.store == "claude.history" assert record.adapter_id == "claude.history_jsonl.v1" - assert record.kind == "history" + assert record.kind == "prompt" assert record.role == "user" assert record.timestamp == "2023-11-14T22:13:20Z" assert record.session_id == "session-1" @@ -5575,7 +5575,7 @@ def test_search_claude_history_tolerates_missing_paste_cache( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("missing",), - search_type="history", + search_type="prompts", any_term=False, regex=False, case_sensitive=False, @@ -6091,7 +6091,7 @@ def test_search_gemini_logs_returns_user_message( assert log_records, "expected at least one gemini.tmp_logs record" assert log_records[0].text == "libtmux trace" assert log_records[0].role == "user" - assert log_records[0].kind == "history" + assert log_records[0].kind == "prompt" assert log_records[0].timestamp == "2026-05-17T12:00:05Z" assert log_records[0].session_id == "sess-1" @@ -6146,7 +6146,7 @@ def test_search_grok_prompt_history( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, ) -> None: - """Grok prompt_history.jsonl records surface as kind=history, role=user.""" + """Grok prompt_history.jsonl records surface as kind=prompt, role=user.""" agentgrep = load_agentgrep_module() home = tmp_path / "home" monkeypatch.setenv("HOME", str(home)) @@ -6184,7 +6184,7 @@ def test_search_grok_prompt_history( records = t.cast("t.Any", agentgrep).search_sources(query, sources, backends) assert records, "expected at least one grok prompt history record" - assert records[0].kind == "history" + assert records[0].kind == "prompt" assert records[0].role == "user" assert records[0].agent == "grok" assert "summarise" in records[0].text diff --git a/tests/test_agentgrep_mcp.py b/tests/test_agentgrep_mcp.py index 3572fa0..a0c0cb3 100644 --- a/tests/test_agentgrep_mcp.py +++ b/tests/test_agentgrep_mcp.py @@ -31,7 +31,7 @@ class SearchQueryLike(t.Protocol): """Structural type for search query echoes.""" terms: list[str] - search_type: str + scope: str agent: str @@ -190,14 +190,14 @@ async def test_mcp_search_tool_returns_full_prompt( { "terms": ["serenity", "bliss"], "agent": "codex", - "search_type": "prompts", + "scope": "prompts", "limit": 5, }, ) data = t.cast("SearchToolDataLike", result.data) assert data.query.terms == ["serenity", "bliss"] - assert data.query.search_type == "prompts" + assert data.query.scope == "prompts" assert data.query.agent == "codex" assert len(data.results) == 1 assert data.results[0].kind == "prompt" @@ -253,7 +253,9 @@ async def test_mcp_capabilities_resource_reports_read_only() -> None: assert "find" in tools_advertised assert "list_stores" in tools_advertised prompts = t.cast("list[str]", data["prompts"]) - assert "search_history" in prompts + assert "search_conversations" in prompts + assert "search_scopes" in data + assert data["search_scopes"] == ["prompts", "conversations", "all"] async def test_mcp_capabilities_lists_every_supported_agent_and_adapter() -> None: @@ -301,6 +303,25 @@ async def test_mcp_prompt_guides_search() -> None: assert "codex" in rendered +async def test_mcp_search_tool_rejects_legacy_search_type() -> None: + """The MCP search tool accepts ``scope`` instead of legacy ``search_type``.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool( + "search", + { + "terms": ["serenity"], + "agent": "codex", + "search_type": "history", + "limit": 5, + }, + raise_on_error=False, + ) + + assert result.is_error is True + + async def test_audit_middleware_emits_extras( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_cli_grep.py b/tests/test_cli_grep.py index cb33601..b16acf6 100644 --- a/tests/test_cli_grep.py +++ b/tests/test_cli_grep.py @@ -162,6 +162,23 @@ def test_grep_max_count_propagates() -> None: assert parsed.max_count == 5 +def test_grep_scope_conversations_propagates() -> None: + """``--scope conversations`` selects full conversation/session content.""" + parsed = agentgrep.parse_args(["grep", "--scope", "conversations", "foo"]) + assert isinstance(parsed, agentgrep.GrepArgs) + assert parsed.search_type == "conversations" + + +def test_grep_type_flag_is_rejected(capsys: pytest.CaptureFixture[str]) -> None: + """``grep --type`` is no longer the public search-breadth selector.""" + with pytest.raises(SystemExit) as exc_info: + _ = agentgrep.parse_args(["grep", "--type", "history", "foo"]) + assert exc_info.value.code == 2 + captured = capsys.readouterr() + assert "unrecognized arguments" in captured.err + assert "--type" in captured.err + + class QueryTranslationCase(t.NamedTuple): """Parametrized case for :func:`agentgrep.build_grep_query`.""" diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py index d021503..1694621 100644 --- a/tests/test_cli_search.py +++ b/tests/test_cli_search.py @@ -106,13 +106,13 @@ class SearchParseCase(t.NamedTuple): False, ), SearchParseCase( - "type-history", - ("search", "--type", "history", "todo"), + "scope-conversations", + ("search", "--scope", "conversations", "todo"), ("todo",), 0, False, False, - "history", + "conversations", False, ), SearchParseCase( @@ -211,6 +211,7 @@ class RemovedSearchFlagCase(t.NamedTuple): REMOVED_SEARCH_FLAG_CASES: tuple[RemovedSearchFlagCase, ...] = ( RemovedSearchFlagCase("any-mode", "--any", ("search", "--any", "foo", "bar")), RemovedSearchFlagCase("regex-mode", "--regex", ("search", "--regex", "foo.*bar")), + RemovedSearchFlagCase("type-mode", "--type", ("search", "--type", "history", "todo")), ) @@ -233,25 +234,25 @@ def test_search_removed_flags_are_rejected( assert "Traceback" not in captured.err -def test_search_type_field_broadens_coarse_search_type() -> None: - """A query-language ``type:`` predicate controls record-kind filtering.""" - parsed = agentgrep.parse_args(("search", "type:history", "bliss")) +def test_search_scope_field_broadens_coarse_search_type() -> None: + """A query-language ``scope:`` predicate controls search-scope filtering.""" + parsed = agentgrep.parse_args(("search", "scope:conversations", "bliss")) assert isinstance(parsed, agentgrep.SearchArgs) assert parsed.search_type == "all" assert parsed.terms == ("bliss",) assert parsed.compiled is not None -def test_search_type_field_history_record_reaches_compiled_predicate() -> None: - """``type:history`` must not be pre-filtered by the default prompts scope.""" - parsed = agentgrep.parse_args(("search", "type:history", "bliss")) +def test_search_scope_field_conversation_record_reaches_compiled_predicate() -> None: + """``scope:conversations`` must not be pre-filtered by default prompt scope.""" + parsed = agentgrep.parse_args(("search", "scope:conversations", "bliss")) assert isinstance(parsed, agentgrep.SearchArgs) record = agentgrep.SearchRecord( kind="history", agent="codex", - store="history", - adapter_id="codex.history_json.v1", - path=pathlib.Path("/tmp/history.json"), + store="codex.sessions", + adapter_id="codex.sessions_jsonl.v1", + path=pathlib.Path("/tmp/session.jsonl"), text="bliss command", ) query = agentgrep.SearchQuery( diff --git a/tests/test_query_compile.py b/tests/test_query_compile.py index 88f0cd7..5f7c670 100644 --- a/tests/test_query_compile.py +++ b/tests/test_query_compile.py @@ -325,15 +325,29 @@ class RecordPredicateCase(t.NamedTuple): expected_matches=True, ), RecordPredicateCase( - test_id="type-prompts-on-prompt-kind", - query="type:prompts", + test_id="scope-prompts-on-prompt-kind", + query="scope:prompts", record_kwargs={"kind": "prompt"}, expected_matches=True, ), RecordPredicateCase( - test_id="type-history-on-prompt-kind", - query="type:history", - record_kwargs={"kind": "prompt"}, + test_id="scope-conversations-on-chat-record", + query="scope:conversations", + record_kwargs={ + "kind": "history", + "store": "codex.sessions", + "adapter_id": "codex.sessions_jsonl.v1", + }, + expected_matches=True, + ), + RecordPredicateCase( + test_id="scope-conversations-excludes-prompt-history", + query="scope:conversations", + record_kwargs={ + "kind": "prompt", + "store": "codex.history", + "adapter_id": "codex.history_jsonl.v1", + }, expected_matches=False, ), RecordPredicateCase( @@ -467,14 +481,14 @@ class EnumValidationCase(t.NamedTuple): expected_fragment="invalid agent value 'clauded'", ), EnumValidationCase( - test_id="type-unknown-value", - query="type:bogus bliss", - expected_fragment="invalid type value 'bogus'", + test_id="scope-unknown-value", + query="scope:bogus bliss", + expected_fragment="invalid scope value 'bogus'", ), EnumValidationCase( - test_id="type-near-miss", - query="type:prompt bliss", - expected_fragment="invalid type value 'prompt'", + test_id="scope-near-miss", + query="scope:prompt bliss", + expected_fragment="invalid scope value 'prompt'", ), ) @@ -557,9 +571,9 @@ class ComparisonOnStringFieldCase(t.NamedTuple): expected_fragment="'agent' does not support comparison", ), ComparisonOnStringFieldCase( - test_id="range-against-type", - query="type:[prompts TO history] bliss", - expected_fragment="'type' does not support range", + test_id="range-against-scope", + query="scope:[prompts TO conversations] bliss", + expected_fragment="'scope' does not support range", ), ComparisonOnStringFieldCase( test_id="comparison-against-path", diff --git a/tests/test_query_engine.py b/tests/test_query_engine.py index d7c5ee0..30c4ecb 100644 --- a/tests/test_query_engine.py +++ b/tests/test_query_engine.py @@ -710,8 +710,8 @@ class MangledFieldPredicateCase(t.NamedTuple): argv=("find", "-timestamp:2026"), ), MangledFieldPredicateCase( - test_id="grep-mangled-type", - argv=("grep", "-type:prompts", "bliss"), + test_id="grep-mangled-scope", + argv=("grep", "-scope:prompts", "bliss"), ), ) @@ -805,29 +805,24 @@ class FlagFieldCollisionCase(t.NamedTuple): expected_message_fragment="cannot combine --agent flag with agent: field", ), FlagFieldCollisionCase( - test_id="grep-type-flag-and-field", - argv=("grep", "--type", "history", "type:prompts", "bliss"), - expected_message_fragment="cannot combine --type flag with type: field", + test_id="grep-scope-flag-and-field", + argv=("grep", "--scope", "conversations", "scope:prompts", "bliss"), + expected_message_fragment="cannot combine --scope flag with scope: field", ), FlagFieldCollisionCase( - test_id="grep-default-type-flag-and-field", - argv=("grep", "--type", "prompts", "type:history", "bliss"), - expected_message_fragment="cannot combine --type flag with type: field", + test_id="grep-default-scope-flag-and-field", + argv=("grep", "--scope", "prompts", "scope:conversations", "bliss"), + expected_message_fragment="cannot combine --scope flag with scope: field", ), FlagFieldCollisionCase( - test_id="search-type-flag-and-field", - argv=("search", "--type", "history", "type:prompts", "bliss"), - expected_message_fragment="cannot combine --type flag with type: field", + test_id="search-scope-flag-and-field", + argv=("search", "--scope", "conversations", "scope:prompts", "bliss"), + expected_message_fragment="cannot combine --scope flag with scope: field", ), FlagFieldCollisionCase( - test_id="search-default-type-flag-and-field", - argv=("search", "--type", "prompts", "type:history", "bliss"), - expected_message_fragment="cannot combine --type flag with type: field", - ), - FlagFieldCollisionCase( - test_id="find-default-type-flag-and-field", - argv=("find", "--type", "all", "type:history"), - expected_message_fragment="cannot combine --type flag with type: field", + test_id="search-default-scope-flag-and-field", + argv=("search", "--scope", "prompts", "scope:conversations", "bliss"), + expected_message_fragment="cannot combine --scope flag with scope: field", ), ) diff --git a/tests/test_query_ui.py b/tests/test_query_ui.py index a0dc0e2..ab5b373 100644 --- a/tests/test_query_ui.py +++ b/tests/test_query_ui.py @@ -137,7 +137,7 @@ def test_build_query_inherits_base_filter_scope() -> None: """The helper carries search_type / agents / limit through from base.""" base = agentgrep.SearchQuery( terms=("placeholder",), - search_type="history", + search_type="conversations", any_term=True, regex=True, case_sensitive=True, @@ -147,7 +147,7 @@ def test_build_query_inherits_base_filter_scope() -> None: ) result = build_query_from_input("agent:codex bliss", base, default_registry()) assert result.query is not None - assert result.query.search_type == "history" + assert result.query.search_type == "conversations" assert result.query.any_term is True assert result.query.regex is True assert result.query.case_sensitive is True From 08dbdf0443171852ee763c811361f8c9d200c704 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 31 May 2026 12:20:26 -0500 Subject: [PATCH 2/5] agentgrep(refactor[scope]): Rename search scope internals why: The search breadth selector is now scope in the public CLI and MCP surfaces. Keeping internal dataclass fields, type aliases, and test fixtures named search_type would preserve the ambiguity that the new vocabulary removes. what: - Rename SearchType/SearchTypeName aliases to SearchScope/SearchScopeName. - Rename SearchQuery, SearchArgs, and GrepArgs fields from search_type to scope. - Update engine, CLI, TUI, MCP, and tests to use scope throughout the internal API. --- src/agentgrep/__init__.py | 12 +-- src/agentgrep/cli/parser.py | 26 +++---- src/agentgrep/cli/render.py | 8 +- src/agentgrep/mcp/__init__.py | 4 +- src/agentgrep/mcp/_library.py | 4 +- src/agentgrep/mcp/models.py | 8 +- src/agentgrep/mcp/tools/diagnostic_tools.py | 2 +- src/agentgrep/mcp/tools/search_tools.py | 6 +- src/agentgrep/query/compile.py | 6 +- src/agentgrep/ui/app.py | 2 +- tests/test_agentgrep.py | 84 ++++++++++----------- tests/test_cli_grep.py | 8 +- tests/test_cli_search.py | 16 ++-- tests/test_cli_ui_overlay.py | 4 +- tests/test_grep_pretty.py | 2 +- tests/test_iter_search_events.py | 2 +- tests/test_query_engine.py | 12 +-- tests/test_query_ui.py | 8 +- 18 files changed, 107 insertions(+), 107 deletions(-) diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index a4272de..ce82bb7 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -15,7 +15,7 @@ >>> query = SearchQuery( ... terms=("serenity", "bliss"), -... search_type="prompts", +... scope="prompts", ... any_term=False, ... regex=False, ... case_sensitive=False, @@ -90,7 +90,7 @@ ] OutputMode = t.Literal["text", "json", "ndjson", "ui"] ProgressMode = t.Literal["auto", "always", "never"] -SearchType = t.Literal["prompts", "conversations", "all"] +SearchScope = t.Literal["prompts", "conversations", "all"] ColorMode = t.Literal["auto", "always", "never"] GrepStyle = t.Literal["default", "pretty"] type JSONScalar = str | int | float | bool | None @@ -1181,7 +1181,7 @@ class SearchQuery: """ terms: tuple[str, ...] - search_type: SearchType + scope: SearchScope any_term: bool regex: bool case_sensitive: bool @@ -6319,7 +6319,7 @@ def store_role_for_record(store: str, adapter_id: str) -> StoreRole | None: return None -def record_matches_scope(record: SearchRecord, scope: SearchType) -> bool: +def record_matches_scope(record: SearchRecord, scope: SearchScope) -> bool: """Return whether ``record`` belongs to the requested search scope.""" if scope == "all": return True @@ -6333,11 +6333,11 @@ def matches_record(record: SearchRecord, query: SearchQuery) -> bool: """Return whether a normalized record should be included. When ``query.compiled`` carries a record-level predicate, the - record must satisfy it in addition to the existing text + kind + record must satisfy it in addition to the existing text + scope checks. Pure-text queries skip the predicate evaluation since the compiler leaves ``compiled = None`` for them. """ - if not record_matches_scope(record, query.search_type): + if not record_matches_scope(record, query.scope): return False if not matches_text(build_search_haystack(record), query): return False diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py index 7d4ac23..8a6a696 100644 --- a/src/agentgrep/cli/parser.py +++ b/src/agentgrep/cli/parser.py @@ -33,7 +33,7 @@ GrepStyle, OutputMode, ProgressMode, - SearchType, + SearchScope, create_themed_formatter, ) @@ -116,7 +116,7 @@ class GrepArgs: patterns: tuple[str, ...] agents: tuple[AgentName, ...] - search_type: SearchType + scope: SearchScope case_mode: CaseMode pattern_mode: PatternMode invert_match: bool @@ -148,7 +148,7 @@ class SearchArgs: terms: tuple[str, ...] agents: tuple[AgentName, ...] - search_type: SearchType + scope: SearchScope case_sensitive: bool limit: int | None output_mode: OutputMode @@ -350,7 +350,7 @@ def create_parser( _ = grep_parser.add_argument( "--scope", choices=["prompts", "conversations", "all"], - dest="search_type", + dest="scope", help="Search scope: prompts, conversations, or all (default: prompts)", ) _ = grep_parser.add_argument( @@ -515,7 +515,7 @@ def create_parser( _ = search_parser.add_argument( "--scope", choices=["prompts", "conversations", "all"], - dest="search_type", + dest="scope", help="Search scope: prompts, conversations, or all (default: prompts)", ) _ = search_parser.add_argument( @@ -586,7 +586,7 @@ def _search_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]: flags: dict[str, str] = {} if t.cast("list[str]", namespace.agent): flags["agent"] = "--agent" - if t.cast("str | None", namespace.search_type) is not None: + if t.cast("str | None", namespace.scope) is not None: flags["scope"] = "--scope" return flags @@ -596,7 +596,7 @@ def _grep_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]: flags: dict[str, str] = {} if t.cast("list[str]", namespace.agent): flags["agent"] = "--agent" - if t.cast("str | None", namespace.search_type) is not None: + if t.cast("str | None", namespace.scope) is not None: flags["scope"] = "--scope" return flags @@ -611,13 +611,13 @@ def _find_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]: return flags -def _effective_search_type( +def _effective_search_scope( namespace: argparse.Namespace, *, query_fields: set[str], -) -> SearchType: - """Return the coarse search type after query-language reconciliation.""" - explicit = t.cast("SearchType | None", namespace.search_type) +) -> SearchScope: + """Return the coarse search scope after query-language reconciliation.""" + explicit = t.cast("SearchScope | None", namespace.scope) if explicit is not None: return explicit if "scope" in query_fields: @@ -934,7 +934,7 @@ def _build_grep_args( return GrepArgs( patterns=tuple(patterns_list), agents=agents, - search_type=_effective_search_type( + scope=_effective_search_scope( namespace, query_fields=grep_query_fields, ), @@ -999,7 +999,7 @@ def _build_search_args( return SearchArgs( terms=final_terms, agents=agents, - search_type=_effective_search_type( + scope=_effective_search_scope( namespace, query_fields=search_query_fields, ), diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py index 59d54a8..3f6ed71 100644 --- a/src/agentgrep/cli/render.py +++ b/src/agentgrep/cli/render.py @@ -395,7 +395,7 @@ def run_find_command(args: FindArgs) -> int: if args.output_mode == "ui": query = agentgrep.SearchQuery( terms=(args.pattern,) if args.pattern else (), - search_type="all", + scope="all", any_term=False, regex=args.pattern_mode == "regex", case_sensitive=args.case_mode == "respect", @@ -444,7 +444,7 @@ def run_ui_command(args: UIArgs) -> int: initial_terms = tuple(args.initial_query.split()) if args.initial_query else () query = agentgrep.SearchQuery( terms=initial_terms, - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -469,7 +469,7 @@ def run_search_command(args: SearchArgs) -> int: raise SystemExit(msg) query = agentgrep.SearchQuery( terms=args.terms, - search_type=args.search_type, + scope=args.scope, any_term=False, regex=False, case_sensitive=args.case_sensitive, @@ -877,7 +877,7 @@ def build_grep_query(args: GrepArgs) -> agentgrep.SearchQuery: return agentgrep.SearchQuery( terms=terms, - search_type=args.search_type, + scope=args.scope, any_term=False, regex=regex, case_sensitive=case_sensitive, diff --git a/src/agentgrep/mcp/__init__.py b/src/agentgrep/mcp/__init__.py index 294e14d..d142177 100644 --- a/src/agentgrep/mcp/__init__.py +++ b/src/agentgrep/mcp/__init__.py @@ -29,7 +29,7 @@ FindRecordLike, SearchQueryFactory, SearchRecordLike, - SearchTypeName, + SearchScopeName, SourceHandleLike, agentgrep, normalize_agent_selection, @@ -73,9 +73,9 @@ "SearchRecordLike", "SearchRecordModel", "SearchRequestModel", + "SearchScopeName", "SearchToolQuery", "SearchToolResponse", - "SearchTypeName", "SourceHandleLike", "SourceListAdapter", "SourceRecordModel", diff --git a/src/agentgrep/mcp/_library.py b/src/agentgrep/mcp/_library.py index 92886d1..6e8180f 100644 --- a/src/agentgrep/mcp/_library.py +++ b/src/agentgrep/mcp/_library.py @@ -19,7 +19,7 @@ AgentSelector = t.Literal[ "codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi", "opencode", "all" ] -SearchTypeName = t.Literal["prompts", "conversations", "all"] +SearchScopeName = t.Literal["prompts", "conversations", "all"] SERVER_VERSION = "0.1.0" KNOWN_ADAPTERS: tuple[str, ...] = ( @@ -138,7 +138,7 @@ def __call__( self, *, terms: tuple[str, ...], - search_type: str, + scope: str, any_term: bool, regex: bool, case_sensitive: bool, diff --git a/src/agentgrep/mcp/models.py b/src/agentgrep/mcp/models.py index f3e6f3e..100aaba 100644 --- a/src/agentgrep/mcp/models.py +++ b/src/agentgrep/mcp/models.py @@ -11,7 +11,7 @@ AgentSelector, FindRecordLike, SearchRecordLike, - SearchTypeName, + SearchScopeName, SourceHandleLike, agentgrep, ) @@ -112,7 +112,7 @@ class SearchToolQuery(AgentGrepModel): terms: list[str] agent: AgentSelector - scope: SearchTypeName + scope: SearchScopeName case_sensitive: bool limit: int | None = None @@ -159,7 +159,7 @@ class CapabilitiesModel(AgentGrepModel): agents: list[ t.Literal["codex", "claude", "cursor-cli", "cursor-ide", "gemini", "grok", "pi", "opencode"] ] - search_scopes: list[SearchTypeName] + search_scopes: list[SearchScopeName] adapters: list[str] tools: list[str] resources: list[str] @@ -175,7 +175,7 @@ class SearchRequestModel(AgentGrepModel): terms: list[str] agent: AgentSelector - scope: SearchTypeName + scope: SearchScopeName case_sensitive: bool limit: int | None = None diff --git a/src/agentgrep/mcp/tools/diagnostic_tools.py b/src/agentgrep/mcp/tools/diagnostic_tools.py index a90f7e2..68d7eb7 100644 --- a/src/agentgrep/mcp/tools/diagnostic_tools.py +++ b/src/agentgrep/mcp/tools/diagnostic_tools.py @@ -19,7 +19,7 @@ def _validate_query_sync(request: ValidateQueryRequest) -> ValidateQueryResponse """Dry-run a ``SearchQuery`` against sample text without searching files.""" query = agentgrep.SearchQuery( terms=tuple(request.terms), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=request.case_sensitive, diff --git a/src/agentgrep/mcp/tools/search_tools.py b/src/agentgrep/mcp/tools/search_tools.py index f35bdf9..ef9a280 100644 --- a/src/agentgrep/mcp/tools/search_tools.py +++ b/src/agentgrep/mcp/tools/search_tools.py @@ -13,7 +13,7 @@ from agentgrep.mcp._library import ( READONLY_TAGS, AgentSelector, - SearchTypeName, + SearchScopeName, agentgrep, normalize_agent_selection, ) @@ -35,7 +35,7 @@ def _search_sync(request: SearchRequestModel) -> SearchToolResponse: """Run the blocking search work and build a typed response.""" query = agentgrep.SearchQuery( terms=tuple(request.terms), - search_type=request.scope, + scope=request.scope, any_term=False, regex=False, case_sensitive=request.case_sensitive, @@ -99,7 +99,7 @@ async def search_tool( Field(description="Limit search to one agent or search all agents."), ] = "all", scope: t.Annotated[ - SearchTypeName, + SearchScopeName, Field(description="Search prompts, conversations, or both."), ] = "prompts", case_sensitive: t.Annotated[ diff --git a/src/agentgrep/query/compile.py b/src/agentgrep/query/compile.py index 18ace6f..5443bf0 100644 --- a/src/agentgrep/query/compile.py +++ b/src/agentgrep/query/compile.py @@ -261,7 +261,7 @@ def build_query_from_input( compiled query through ``SearchQuery.compiled`` so source and record predicates apply on the next search. - Inherits ``search_type``, ``any_term``, ``regex``, + Inherits ``scope``, ``any_term``, ``regex``, ``case_sensitive``, ``agents``, ``limit``, and ``dedupe`` from ``base_query`` so the search bar lives on top of the existing filter scope rather than resetting it. @@ -305,7 +305,7 @@ def _rebuild( """Clone ``base`` with new ``terms`` / ``compiled``; carry the rest forward.""" return agentgrep.SearchQuery( terms=terms, - search_type=base.search_type, + scope=base.scope, any_term=base.any_term, regex=base.regex, case_sensitive=base.case_sensitive, @@ -494,7 +494,7 @@ def _field_matches_record( if spec.name == "scope": return agentgrep.record_matches_scope( record, - t.cast("agentgrep.SearchType", node.value), + t.cast("agentgrep.SearchScope", node.value), ) if spec.name == "timestamp": return _date_predicate_matches( diff --git a/src/agentgrep/ui/app.py b/src/agentgrep/ui/app.py index 4d64be1..36dabad 100644 --- a/src/agentgrep/ui/app.py +++ b/src/agentgrep/ui/app.py @@ -1123,7 +1123,7 @@ def _build_search_query(self, text: str) -> SearchQuery: terms = tuple(text.split()) if text else () return SearchQuery( terms=terms, - search_type=self.query.search_type, + scope=self.query.scope, any_term=self.query.any_term, regex=self.query.regex, case_sensitive=self.query.case_sensitive, diff --git a/tests/test_agentgrep.py b/tests/test_agentgrep.py index 9d008d4..c08d179 100644 --- a/tests/test_agentgrep.py +++ b/tests/test_agentgrep.py @@ -72,7 +72,7 @@ def __call__( self, *, terms: tuple[str, ...], - search_type: str, + scope: str, any_term: bool, regex: bool, case_sensitive: bool, @@ -438,7 +438,7 @@ def test_search_codex_prompt_match_returns_full_prompt( query = agentgrep.SearchQuery( terms=("serenity", "bliss"), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -479,7 +479,7 @@ def test_search_reports_source_and_match_progress( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -573,7 +573,7 @@ def test_collect_search_records_calls_record_added_with_each_unique_record( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -640,7 +640,7 @@ def test_collect_search_records_reports_in_source_progress_and_yields_gil( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -864,7 +864,7 @@ def test_streaming_search_progress_translates_progress_callbacks( progress = agentgrep.StreamingSearchProgress(emit=emitted.append) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -1043,7 +1043,7 @@ def _build_empty_ui_app( ) query = agentgrep.SearchQuery( terms=(), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2308,7 +2308,7 @@ async def test_show_detail_memoizes_first_match_line( ) query = agentgrep.SearchQuery( terms=("needle",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2385,7 +2385,7 @@ async def test_show_detail_scrolls_to_first_match( # uses ``terms=()``, which would make first_match always return None. query = agentgrep.SearchQuery( terms=("needle",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2516,7 +2516,7 @@ async def test_show_detail_keeps_text_highlighting_for_plain_body( monkeypatch.setattr(agentgrep, "run_search_query", lambda *args, **kwargs: []) query = agentgrep.SearchQuery( terms=("libtmux",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2614,7 +2614,7 @@ def test_collect_search_records_returns_partial_results_on_answer_now( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2649,7 +2649,7 @@ def test_run_search_query_interrupts_progress_on_keyboard_interrupt( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2742,7 +2742,7 @@ def test_plan_search_sources_prefilters_one_root_once( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2805,7 +2805,7 @@ def test_search_prefers_newer_sources_when_limiting( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2858,7 +2858,7 @@ def test_search_dedupes_identical_prompts_within_session( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2902,7 +2902,7 @@ def test_search_keeps_identical_prompts_across_sessions( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -2964,7 +2964,7 @@ def test_search_limit_applies_to_unique_results( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3006,7 +3006,7 @@ def test_search_codex_history_json_returns_history_record( query = agentgrep.SearchQuery( terms=("serenity",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3071,7 +3071,7 @@ def test_cursor_ai_tracking_summary_is_exposed_as_history( query = agentgrep.SearchQuery( terms=("serenity", "bliss"), - search_type="conversations", + scope="conversations", any_term=False, regex=False, case_sensitive=False, @@ -3118,7 +3118,7 @@ def test_cursor_state_itemtable_extracts_prompt( query = agentgrep.SearchQuery( terms=("serenity", "bliss"), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3614,7 +3614,7 @@ def test_progress_no_color_overrides_color_always(monkeypatch: pytest.MonkeyPatc ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3646,7 +3646,7 @@ def test_progress_force_color_enables_auto_for_non_tty( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3738,7 +3738,7 @@ def test_non_tty_progress_emits_start_heartbeat_and_finish() -> None: ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3830,7 +3830,7 @@ def test_tty_progress_renders_spinner_and_clears(monkeypatch: pytest.MonkeyPatch ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3868,7 +3868,7 @@ def test_tty_progress_renders_answer_now_hint() -> None: ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3908,7 +3908,7 @@ def test_tty_progress_render_fits_terminal_width( ) query = agentgrep.SearchQuery( terms=("libtmux",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3946,7 +3946,7 @@ def test_tty_progress_answer_now_hint_is_white(monkeypatch: pytest.MonkeyPatch) ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -3976,7 +3976,7 @@ def test_tty_progress_interrupt_preserves_current_summary( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -4013,7 +4013,7 @@ def test_tty_progress_prefilter_uses_private_directory_path( ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -4043,7 +4043,7 @@ def test_non_tty_progress_interrupt_emits_current_summary() -> None: ) query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -4069,7 +4069,7 @@ def test_main_handles_keyboard_interrupt_without_traceback( args = agentgrep.GrepArgs( patterns=("bliss",), agents=("codex",), - search_type="prompts", + scope="prompts", case_mode="smart", pattern_mode="regex", invert_match=False, @@ -4197,7 +4197,7 @@ def _make_query(agentgrep: object, agents: tuple[AgentName, ...], terms: tuple[s mod = t.cast("t.Any", agentgrep) return mod.SearchQuery( terms=terms, - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -4294,7 +4294,7 @@ def test_search_codex_history_jsonl_uses_modern_text_schema( backends = agentgrep.BackendSelection(None, None, None) query = agentgrep.SearchQuery( terms=("modern",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -4352,7 +4352,7 @@ def test_search_codex_legacy_root_rollout_json_session( backends = agentgrep.BackendSelection(None, None, None) query = agentgrep.SearchQuery( terms=("legacy",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -5519,7 +5519,7 @@ def test_search_claude_history_expands_external_pasted_text( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -5575,7 +5575,7 @@ def test_search_claude_history_tolerates_missing_paste_cache( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("missing",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -6173,7 +6173,7 @@ def test_search_grok_prompt_history( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("summarise",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -6218,7 +6218,7 @@ def test_search_grok_chat_history_session( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("design",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -6259,7 +6259,7 @@ def test_search_grok_chat_history_drops_empty_content( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=(), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -6315,7 +6315,7 @@ def test_search_grok_session_search_db( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("middleware",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -6484,7 +6484,7 @@ def test_search_pi_sessions( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("streaming",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, @@ -6954,7 +6954,7 @@ def test_search_opencode_sessions( backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) query = t.cast("t.Any", agentgrep).SearchQuery( terms=("streaming",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, diff --git a/tests/test_cli_grep.py b/tests/test_cli_grep.py index b16acf6..7fb327b 100644 --- a/tests/test_cli_grep.py +++ b/tests/test_cli_grep.py @@ -166,7 +166,7 @@ def test_grep_scope_conversations_propagates() -> None: """``--scope conversations`` selects full conversation/session content.""" parsed = agentgrep.parse_args(["grep", "--scope", "conversations", "foo"]) assert isinstance(parsed, agentgrep.GrepArgs) - assert parsed.search_type == "conversations" + assert parsed.scope == "conversations" def test_grep_type_flag_is_rejected(capsys: pytest.CaptureFixture[str]) -> None: @@ -284,7 +284,7 @@ def test_build_grep_query_translates_modes(case: QueryTranslationCase) -> None: args = agentgrep.GrepArgs( patterns=case.patterns, agents=agentgrep.AGENT_CHOICES, - search_type="prompts", + scope="prompts", case_mode=case.case_mode, pattern_mode=case.pattern_mode, invert_match=False, @@ -313,7 +313,7 @@ def _make_grep_args(**overrides: object) -> agentgrep.GrepArgs: base: dict[str, object] = { "patterns": ("foo",), "agents": agentgrep.AGENT_CHOICES, - "search_type": "prompts", + "scope": "prompts", "case_mode": "smart", "pattern_mode": "regex", "invert_match": False, @@ -911,7 +911,7 @@ def _make_grep_args_for_helpers(**overrides: t.Any) -> agentgrep.GrepArgs: base: dict[str, t.Any] = { "patterns": ("foo",), "agents": agentgrep.AGENT_CHOICES, - "search_type": "prompts", + "scope": "prompts", "case_mode": "smart", "pattern_mode": "regex", "invert_match": False, diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py index 1694621..3739d35 100644 --- a/tests/test_cli_search.py +++ b/tests/test_cli_search.py @@ -30,7 +30,7 @@ class SearchParseCase(t.NamedTuple): expected_threshold: int expected_no_group: bool expected_no_rank: bool - expected_search_type: agentgrep.SearchType + expected_scope: agentgrep.SearchScope expected_case_sensitive: bool @@ -150,7 +150,7 @@ def test_search_parse_args( expected_threshold: int, expected_no_group: bool, expected_no_rank: bool, - expected_search_type: agentgrep.SearchType, + expected_scope: agentgrep.SearchScope, expected_case_sensitive: bool, ) -> None: """Search subparser captures ranking-specific flags correctly.""" @@ -161,7 +161,7 @@ def test_search_parse_args( assert parsed.threshold == expected_threshold assert parsed.no_group == expected_no_group assert parsed.no_rank == expected_no_rank - assert parsed.search_type == expected_search_type + assert parsed.scope == expected_scope assert parsed.case_sensitive == expected_case_sensitive @@ -234,11 +234,11 @@ def test_search_removed_flags_are_rejected( assert "Traceback" not in captured.err -def test_search_scope_field_broadens_coarse_search_type() -> None: +def test_search_scope_field_broadens_coarse_search_scope() -> None: """A query-language ``scope:`` predicate controls search-scope filtering.""" parsed = agentgrep.parse_args(("search", "scope:conversations", "bliss")) assert isinstance(parsed, agentgrep.SearchArgs) - assert parsed.search_type == "all" + assert parsed.scope == "all" assert parsed.terms == ("bliss",) assert parsed.compiled is not None @@ -257,7 +257,7 @@ def test_search_scope_field_conversation_record_reaches_compiled_predicate() -> ) query = agentgrep.SearchQuery( terms=parsed.terms, - search_type=parsed.search_type, + scope=parsed.scope, any_term=False, regex=False, case_sensitive=parsed.case_sensitive, @@ -266,7 +266,7 @@ def test_search_scope_field_conversation_record_reaches_compiled_predicate() -> compiled=parsed.compiled, ) - assert query.search_type == "all" + assert query.scope == "all" assert agentgrep.matches_record(record, query) @@ -280,7 +280,7 @@ def _make_search_args(**overrides: t.Any) -> agentgrep.SearchArgs: base: dict[str, t.Any] = { "terms": ("bliss",), "agents": agentgrep.AGENT_CHOICES, - "search_type": "prompts", + "scope": "prompts", "case_sensitive": False, "limit": None, "output_mode": "text", diff --git a/tests/test_cli_ui_overlay.py b/tests/test_cli_ui_overlay.py index b1d960d..2e997d7 100644 --- a/tests/test_cli_ui_overlay.py +++ b/tests/test_cli_ui_overlay.py @@ -54,7 +54,7 @@ class OverlayCase(t.NamedTuple): test_id: str argv: tuple[str, ...] - expected_search_type: agentgrep.SearchType + expected_scope: agentgrep.SearchScope expected_terms: tuple[str, ...] @@ -86,7 +86,7 @@ def test_ui_overlay_dispatches_to_run_ui( assert exit_code == 0 assert len(captured) == 1 query = captured[0] - assert query.search_type == case.expected_search_type + assert query.scope == case.expected_scope assert query.terms == case.expected_terms diff --git a/tests/test_grep_pretty.py b/tests/test_grep_pretty.py index 195382a..f27b13b 100644 --- a/tests/test_grep_pretty.py +++ b/tests/test_grep_pretty.py @@ -25,7 +25,7 @@ def _make_grep_args(**overrides: t.Any) -> GrepArgs: defaults: dict[str, t.Any] = { "patterns": ("streaming",), "agents": ("codex", "claude", "cursor-cli", "gemini"), - "search_type": "prompts", + "scope": "prompts", "case_mode": "smart", "pattern_mode": "fixed", "invert_match": False, diff --git a/tests/test_iter_search_events.py b/tests/test_iter_search_events.py index b0972e6..80ff101 100644 --- a/tests/test_iter_search_events.py +++ b/tests/test_iter_search_events.py @@ -51,7 +51,7 @@ def _make_query( """Build a :class:`agentgrep.SearchQuery` with the helper defaults.""" return agentgrep.SearchQuery( terms=terms, - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, diff --git a/tests/test_query_engine.py b/tests/test_query_engine.py index 30c4ecb..e331fe7 100644 --- a/tests/test_query_engine.py +++ b/tests/test_query_engine.py @@ -115,7 +115,7 @@ def _stub_iter( compiled = _compile_query("-agent:claude bliss") query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -171,7 +171,7 @@ def _stub_iter( compiled = _compile_query("agent:codex model:claude bliss") query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -228,7 +228,7 @@ def _stub_iter( compiled = _compile_query("agent:codex sonnet") query = agentgrep.SearchQuery( terms=("sonnet",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -344,7 +344,7 @@ def _stub_iter( compiled = _compile_query(case.query) query = agentgrep.SearchQuery( terms=compiled.text_terms, - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -524,7 +524,7 @@ def _stub_iter( compiled = _compile_query(case.query) query = agentgrep.SearchQuery( terms=compiled.text_terms, - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -896,7 +896,7 @@ def test_compiled_none_falls_through_to_legacy_path( query = agentgrep.SearchQuery( terms=("bliss",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, diff --git a/tests/test_query_ui.py b/tests/test_query_ui.py index ab5b373..1c67cdb 100644 --- a/tests/test_query_ui.py +++ b/tests/test_query_ui.py @@ -28,7 +28,7 @@ def _base_query() -> agentgrep.SearchQuery: """Build a synthetic base SearchQuery the helper inherits from.""" return agentgrep.SearchQuery( terms=("placeholder",), - search_type="prompts", + scope="prompts", any_term=False, regex=False, case_sensitive=False, @@ -134,10 +134,10 @@ def test_build_query_from_input_handles_every_shape( def test_build_query_inherits_base_filter_scope() -> None: - """The helper carries search_type / agents / limit through from base.""" + """The helper carries scope / agents / limit through from base.""" base = agentgrep.SearchQuery( terms=("placeholder",), - search_type="conversations", + scope="conversations", any_term=True, regex=True, case_sensitive=True, @@ -147,7 +147,7 @@ def test_build_query_inherits_base_filter_scope() -> None: ) result = build_query_from_input("agent:codex bliss", base, default_registry()) assert result.query is not None - assert result.query.search_type == "conversations" + assert result.query.scope == "conversations" assert result.query.any_term is True assert result.query.regex is True assert result.query.case_sensitive is True From e06b0bf62391b9d5ec8f070b5e5e18d6b889602e Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 31 May 2026 12:26:46 -0500 Subject: [PATCH 3/5] agentgrep(docs[scope]): Document prompt search scope why: The new scope vocabulary changes how users choose default prompt search versus explicit conversation search. The docs and MCP-facing guidance need to teach that boundary consistently and stop pointing at removed search_type/search_history surfaces. what: - Update README, CLI, MCP, library, query-language, backend, and changelog docs for --scope and scope: usage. - Refresh docs-only FastMCP signatures and library snippets to use scope. - Adjust user-facing MCP/TUI/source descriptions from prompt-history wording to prompt/conversation scope wording. --- CHANGES | 41 +++++++++++++++++++++++++ README.md | 14 ++++++--- docs/_ext/agentgrep_fastmcp.py | 10 +++--- docs/_ext/widgets/library_install.py | 2 +- docs/backends/index.md | 5 +++ docs/backends/opencode.md | 2 ++ docs/backends/pi.md | 3 ++ docs/cli/grep.md | 21 +++++++++---- docs/cli/index.md | 4 +-- docs/getting-started/configuration.md | 10 +++--- docs/getting-started/index.md | 12 ++++++-- docs/index.md | 8 ++--- docs/library/examples.md | 2 +- docs/library/query-language.md | 14 +++++++-- docs/library/tutorial.md | 12 ++++---- docs/manifest.json | 2 +- docs/mcp/index.md | 6 +++- docs/mcp/prompts.md | 8 ++--- docs/mcp/resources.md | 4 ++- docs/mcp/tools.md | 8 +++-- src/agentgrep/__init__.py | 2 +- src/agentgrep/_engine/search.py | 2 +- src/agentgrep/cli/render.py | 4 +-- src/agentgrep/mcp/instructions.py | 17 +++++----- src/agentgrep/mcp/tools/search_tools.py | 2 +- src/agentgrep/stores.py | 2 +- src/agentgrep/ui/app.py | 2 +- 27 files changed, 155 insertions(+), 64 deletions(-) diff --git a/CHANGES b/CHANGES index b9d5d71..5cc1683 100644 --- a/CHANGES +++ b/CHANGES @@ -42,6 +42,47 @@ $ uvx --from 'agentgrep' --prerelease allow python +agentgrep 0.1.0a14 makes prompt search the default search scope and +requires explicit opt-in for full conversation records. This release +renames the search breadth selector to `scope` across the CLI, MCP, +query language, and Python library so prompt-history logs are no +longer confused with conversation history. + +### Breaking changes + +Search and grep now use `--scope`, not `--type`, for prompt versus +conversation breadth. `find --type` is unchanged because it still +filters discovered files and stores. + +Before: + +```console +$ agentgrep grep "release notes" --type history +``` + +After: + +```console +$ agentgrep grep "release notes" --scope conversations +``` + +MCP search requests now send `scope` instead of `search_type`, and +library callers construct {class}`~agentgrep.SearchQuery` with +`scope=...`. + +### What's new + +#### Search scope vocabulary (#38) + +Bare CLI and MCP searches now run in `prompts` scope. Dedicated +prompt-history stores are included in that default, and transcript-only +backends still project user turns into prompt scope when an app does +not keep a separate prompt log. + +Full conversation, session, assistant, tool, and event records require +`--scope conversations`, `scope="conversations"`, or `scope:conversations`. +Use `all` to search prompts and conversations together. + ## agentgrep 0.1.0a13 (2026-05-31) agentgrep 0.1.0a13 adds OpenCode (anomalyco/opencode, formerly diff --git a/README.md b/README.md index 3bc08a0..f86b3b4 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ [![Python versions](https://img.shields.io/pypi/pyversions/agentgrep.svg)](https://pypi.org/project/agentgrep/) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) -Read-only search for local AI agent prompts and history across Codex, -Claude Code, Cursor, Gemini, Grok, Pi, and OpenCode. +Read-only search for local AI agent prompts and opt-in conversations +across Codex, Claude Code, Cursor, Gemini, Grok, Pi, and OpenCode. `agentgrep` provides a CLI and an MCP server over the same discovery + parsing layer: @@ -29,12 +29,18 @@ snippets live in the ## CLI quickstart -Search prompts and history across every configured agent: +Search user prompts across every configured agent: ```console $ agentgrep grep "deploy" ``` +Search full conversations explicitly: + +```console +$ agentgrep grep "deploy" --scope conversations +``` + Stream JSON so a non-MCP agent or shell pipeline can consume the results: @@ -73,7 +79,7 @@ import agentgrep backends = agentgrep.select_backends() query = agentgrep.SearchQuery( terms=("hello",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, diff --git a/docs/_ext/agentgrep_fastmcp.py b/docs/_ext/agentgrep_fastmcp.py index e1d387f..7d70617 100644 --- a/docs/_ext/agentgrep_fastmcp.py +++ b/docs/_ext/agentgrep_fastmcp.py @@ -16,8 +16,8 @@ from agentgrep.mcp import ( AgentSelector, FindToolResponse, + SearchScopeName, SearchToolResponse, - SearchTypeName, ) from agentgrep.mcp.models import ( DiscoverySummaryResponse, @@ -45,9 +45,9 @@ async def search( AgentSelector, Field(description="Limit search to one agent or search all agents."), ] = "all", - search_type: t.Annotated[ - SearchTypeName, - Field(description="Search prompts, history, or both."), + scope: t.Annotated[ + SearchScopeName, + Field(description="Search prompts, conversations, or both."), ] = "prompts", case_sensitive: t.Annotated[ bool, @@ -62,7 +62,7 @@ async def search( ), ] = 20, ) -> SearchToolResponse: - """Search normalized prompts or history across local agent stores.""" + """Search normalized prompts or conversations across local agent stores.""" raise NotImplementedError(DOCS_ONLY_MESSAGE) diff --git a/docs/_ext/widgets/library_install.py b/docs/_ext/widgets/library_install.py index 9b44d06..a68d816 100644 --- a/docs/_ext/widgets/library_install.py +++ b/docs/_ext/widgets/library_install.py @@ -90,7 +90,7 @@ class Panel: backends = agentgrep.select_backends() query = agentgrep.SearchQuery( terms=("hello",), - search_type="all", + scope="all", any_term=False, regex=False, case_sensitive=False, diff --git a/docs/backends/index.md b/docs/backends/index.md index 7b1ed10..43e4eaf 100644 --- a/docs/backends/index.md +++ b/docs/backends/index.md @@ -72,6 +72,11 @@ stores expose safe structural samples for `inspect_record_sample`, but they still stay outside default search. Private stores are documented but intentionally not enumerated from disk. +Search scope is record-level. `--scope prompts` is the default and +includes dedicated prompt-history logs plus user turns projected from +transcript-only backends. Full conversation, assistant, tool, and event +records require `--scope conversations` or `--scope all`. + ## Version detection Source discovery reports version metadata separately from record diff --git a/docs/backends/opencode.md b/docs/backends/opencode.md index 27d6c45..62d1696 100644 --- a/docs/backends/opencode.md +++ b/docs/backends/opencode.md @@ -26,6 +26,8 @@ rather than a JSONL-transcript backend. A relational `session → message → part` schema (Drizzle). A conversation turn is reconstructed by joining a `part` row up to its `message` (for the role) and `session` (for the title and working directory). +User text parts participate in the default prompt scope; assistant and +reasoning parts require `--scope conversations` or `--scope all`. `session` table — one row per session: diff --git a/docs/backends/pi.md b/docs/backends/pi.md index af424b0..b658a19 100644 --- a/docs/backends/pi.md +++ b/docs/backends/pi.md @@ -16,6 +16,9 @@ named `_.jsonl`. Unlike Codex or Grok, pi keeps no separate prompt-history log and no SQLite session index — the session transcript is the entire searchable surface, which makes pi the structural twin of the Claude Code backend. +agentgrep projects user turns from that transcript into the default +prompt scope; assistant, tool, summary, and branch records require +`--scope conversations` or `--scope all`. The optional `PI_CODING_AGENT_SESSION_DIR` override points at the sessions directory directly. When it is set, pi writes session files diff --git a/docs/cli/grep.md b/docs/cli/grep.md index 9df2a59..98cdf63 100644 --- a/docs/cli/grep.md +++ b/docs/cli/grep.md @@ -2,10 +2,11 @@ # agentgrep grep -The `agentgrep grep` command searches normalized prompt and history -records with the flag grammar and output behavior of `ripgrep` and +The `agentgrep grep` command searches normalized prompt records by +default, with explicit scope controls for conversation records. It +uses the flag grammar and output behavior of `ripgrep` and `the_silver_searcher`. If you already reach for `rg -i` or `ag -F` -without thinking, the same flags work here against your AI history. +without thinking, the same flags work here against your AI prompts. Defaults follow rg: smart-case (case-insensitive unless the pattern contains uppercase), regex pattern interpretation, color on TTY, @@ -32,10 +33,10 @@ Force case-insensitive matching: $ agentgrep grep -i 'serene bliss' ``` -Treat the pattern as a literal substring (not a regex): +Search full conversation records with a literal substring: ```console -$ agentgrep grep -F --type history 'v1.2.3' +$ agentgrep grep -F --scope conversations 'v1.2.3' ``` Stream an rg-style event stream as JSON: @@ -104,6 +105,14 @@ The eager output modes (`--json`, `-c`, `-l`, `-v`) buffer because their output shape needs the final tally or cross-record deduplication. +## Search scope + +`grep` searches `--scope prompts` by default. That includes dedicated +prompt-history logs and user turns projected from transcript-only +stores. Pass `--scope conversations` for full conversation, session, +assistant, tool, and event records, or `--scope all` to search both +surfaces together. + ## Progress The stderr progress spinner (when stderr is a TTY) lets you know a @@ -190,7 +199,7 @@ question that the engine's current output supports. Tracking issue: By default `grep` deduplicates matches by session so a single conversation that repeats near-identical text doesn't drown the output. This is the one place where `agentgrep grep` deliberately -diverges from `rg`'s raw behavior — AI history stores often replay +diverges from `rg`'s raw behavior — AI conversation stores often replay the same message text many times across one session, which makes the raw rg view noisier than a filesystem grep. diff --git a/docs/cli/index.md b/docs/cli/index.md index 30edab5..3a6ce56 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -89,10 +89,10 @@ Combine multiple patterns with an agent filter: $ agentgrep grep serene bliss --agent codex ``` -Stream history matches as NDJSON: +Stream full-conversation matches as NDJSON: ```console -$ agentgrep grep prompt history --type history --ndjson +$ agentgrep grep prompt history --scope conversations --ndjson ``` List stores for one agent as JSON: diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 11ac70e..eb7afa9 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -14,15 +14,17 @@ $ uv run agentgrep grep "cache" --agent codex Supported agents are `codex`, `claude`, `cursor-cli`, `cursor-ide`, `gemini`, `grok`, `pi`, and `opencode`. Omitting `--agent` searches all supported agents. -## Search type +## Search scope -Use `--type` to choose records: +Search and grep default to prompt scope: user-authored prompts, +including dedicated prompt-history logs and user turns projected from +transcript-only stores. Use `--scope` to opt into broader records: ```console -$ uv run agentgrep grep "docs deploy" --type prompts +$ uv run agentgrep grep "docs deploy" --scope conversations ``` -Allowed values are `prompts`, `history`, and `all`. +Allowed values are `prompts`, `conversations`, and `all`. ## Output diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md index 667c489..fb15613 100644 --- a/docs/getting-started/index.md +++ b/docs/getting-started/index.md @@ -13,9 +13,9 @@ From the repository root: $ uv sync --all-groups ``` -## 2. Search local agent history +## 2. Search local agent prompts -Search all supported stores: +Search prompt-scope records across supported stores: ```console $ uv run agentgrep grep "release notes" @@ -24,7 +24,13 @@ $ uv run agentgrep grep "release notes" Search one agent's prompt records: ```console -$ uv run agentgrep grep "deploy docs" --agent codex --type prompts +$ uv run agentgrep grep "deploy docs" --agent codex +``` + +Search full conversation records explicitly: + +```console +$ uv run agentgrep grep "deploy docs" --agent codex --scope conversations ``` ## 3. Inspect the stores diff --git a/docs/index.md b/docs/index.md index ef072cf..b86a1be 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,7 +2,7 @@ # agentgrep -Read-only search for local AI agent prompts and history across Codex, Claude Code, Cursor, Gemini, Grok, Pi, and OpenCode. +Read-only search for local AI agent prompts and opt-in conversations across Codex, Claude Code, Cursor, Gemini, Grok, Pi, and OpenCode. ```{warning} **Pre-alpha.** APIs may change. [Feedback welcome](https://github.com/tony/agentgrep/issues). @@ -34,7 +34,7 @@ Search and find from the terminal. Pipe `--json` / `--ndjson` for scripts and ag :::{grid-item-card} TUI :link: tui/index :link-type: doc -Interactive Textual explorer for browsing prompts and history. +Interactive Textual explorer for browsing prompt and conversation records. ::: :::{grid-item-card} MCP @@ -73,7 +73,7 @@ Per-agent store layouts, record schemas, and support matrix. ### Prompt Search -Find full prompt and history records by literal term or regular expression. +Find prompt records by literal term or regular expression, with explicit opt-in for conversations. search @@ -87,7 +87,7 @@ List the stores, session files, and SQLite databases that agentgrep can read. Use prompts for common agent workflows: -{ref}`fastmcp-prompt-search-prompts` · {ref}`fastmcp-prompt-search-history` · {ref}`fastmcp-prompt-inspect-stores` +{ref}`fastmcp-prompt-search-prompts` · {ref}`fastmcp-prompt-search-conversations` · {ref}`fastmcp-prompt-inspect-stores` ```{toctree} :hidden: diff --git a/docs/library/examples.md b/docs/library/examples.md index dd1856e..fb762e0 100644 --- a/docs/library/examples.md +++ b/docs/library/examples.md @@ -22,7 +22,7 @@ $ uv run agentgrep find cursor-cli --agent cursor-cli --json "arguments": { "terms": ["database migration"], "agent": "codex", - "search_type": "prompts", + "scope": "prompts", "limit": 10 } } diff --git a/docs/library/query-language.md b/docs/library/query-language.md index 3956670..9354905 100644 --- a/docs/library/query-language.md +++ b/docs/library/query-language.md @@ -2,7 +2,7 @@ # Query language -`agentgrep grep`, `agentgrep grep`, and `agentgrep find` accept a +`agentgrep search`, `agentgrep grep`, and `agentgrep find` accept a Lucene-style query language for inline field predicates, boolean composition, and date ranges. The same syntax works across all three subcommands; each interprets the predicates against its natural @@ -64,7 +64,7 @@ predicate has admitted the source. | Field | Kind | Notes | |---|---|---| -| `type` | enum | One of `prompts`, `history` | +| `scope` | enum | One of `prompts`, `conversations`, `all` | | `timestamp` | date | Record timestamp; supports comparison + range; alias `date` | | `model` | string | Substring against `record.model` | | `role` | string | Substring against `record.role` | @@ -144,6 +144,14 @@ $ agentgrep grep 'timestamp:[2026-01 TO 2026-03] model:claude' Records in Q1 2026 from any claude-* model. +```console +$ agentgrep grep 'scope:conversations pytest' +``` + +Conversation-scope records mentioning "pytest". A bare search uses +prompt scope; `scope:conversations` is the inline form of +`--scope conversations`. + ```console $ agentgrep find path:~/.codex agent:codex ``` @@ -167,7 +175,7 @@ $ agentgrep grep --agent codex agent:claude bliss agentgrep grep: error: cannot combine --agent flag with agent: field predicate; pick one syntax ``` -Currently checked: `--agent` × `agent:`, `--type` × `type:`. Other +Currently checked: `--agent` × `agent:`, `--scope` × `scope:`. Other flags don't yet have query-field counterparts. ## Performance diff --git a/docs/library/tutorial.md b/docs/library/tutorial.md index f9f39a0..4f0441d 100644 --- a/docs/library/tutorial.md +++ b/docs/library/tutorial.md @@ -13,21 +13,21 @@ $ uv run agentgrep grep "draft pr" Search only Codex prompts: ```console -$ uv run agentgrep grep "draft pr" --agent codex --type prompts +$ uv run agentgrep grep "draft pr" --agent codex ``` -## Search history +## Search conversations -Search assistant and command history: +Search assistant, tool, event, and full conversation records: ```console -$ uv run agentgrep grep "pytest" --type history +$ uv run agentgrep grep "pytest" --scope conversations ``` -Search prompts and history together: +Search prompts and conversations together: ```console -$ uv run agentgrep grep "docs" --type all +$ uv run agentgrep grep "docs" --scope all ``` ## Combine terms diff --git a/docs/manifest.json b/docs/manifest.json index 3281793..7a274b3 100644 --- a/docs/manifest.json +++ b/docs/manifest.json @@ -1,7 +1,7 @@ { "name": "agentgrep", "short_name": "agentgrep", - "description": "Read-only search for local AI agent prompts and history (Codex, Claude, Cursor)", + "description": "Read-only search for local AI agent prompts and opt-in conversations", "theme_color": "#2196f3", "background_color": "#fff", "display": "browser", diff --git a/docs/mcp/index.md b/docs/mcp/index.md index bdf4b3c..5321297 100644 --- a/docs/mcp/index.md +++ b/docs/mcp/index.md @@ -2,7 +2,11 @@ # MCP -agentgrep's MCP server exposes a read-only search surface over stdio. It does not mutate local agent stores, open SQLite in write mode, or execute arbitrary shell commands. +agentgrep's MCP server exposes a read-only search surface over stdio. +Search defaults to prompt scope; full conversation records are an +explicit `scope="conversations"` opt-in. The server does not mutate +local agent stores, open SQLite in write mode, or execute arbitrary +shell commands. ## Install diff --git a/docs/mcp/prompts.md b/docs/mcp/prompts.md index e622ab0..0964a5e 100644 --- a/docs/mcp/prompts.md +++ b/docs/mcp/prompts.md @@ -14,14 +14,14 @@ Use this when the user wants matching user prompts. ```{fastmcp-prompt-input} search_prompts ``` -## Search history +## Search conversations -```{fastmcp-prompt} search_history +```{fastmcp-prompt} search_conversations ``` -Use this when the user wants assistant or command history records. +Use this when the user wants full conversation records. -```{fastmcp-prompt-input} search_history +```{fastmcp-prompt-input} search_conversations ``` ## Inspect stores diff --git a/docs/mcp/resources.md b/docs/mcp/resources.md index c27d54a..71360fc 100644 --- a/docs/mcp/resources.md +++ b/docs/mcp/resources.md @@ -9,7 +9,9 @@ MCP resources expose passive read-only data at `agentgrep://` URIs. Clients read ```{fastmcp-resource} agentgrep_capabilities ``` -Read `agentgrep://capabilities` to see supported agents, adapters, tools, resources, prompts, and optional backend selections. +Read `agentgrep://capabilities` to see supported agents, adapters, +search scopes, tools, resources, prompts, and optional backend +selections. ## Sources diff --git a/docs/mcp/tools.md b/docs/mcp/tools.md index cc16631..21f126b 100644 --- a/docs/mcp/tools.md +++ b/docs/mcp/tools.md @@ -4,13 +4,15 @@ agentgrep's tools are read-only. They return structured Pydantic models and protect private paths before serialization. -## Prompt and History Search +## Prompt and Conversation Search ```{fastmcp-tool} search :no-index: ``` -**Use when** you need full prompt or history records matching one or more terms. +**Use when** you need prompt records matching one or more terms. Pass +`scope="conversations"` for full conversation, assistant, tool, and +event records, or `scope="all"` for both surfaces. **Returns:** query metadata plus normalized records with agent, store, adapter, path, text, title, role, timestamp, model, session ID, conversation ID, and metadata. @@ -22,7 +24,7 @@ agentgrep's tools are read-only. They return structured Pydantic models and prot "arguments": { "terms": ["release notes"], "agent": "all", - "search_type": "prompts", + "scope": "prompts", "limit": 20 } } diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index ce82bb7..78713bf 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -3,7 +3,7 @@ # requires-python = ">=3.14" # dependencies = ["pydantic>=2.11.3", "textual>=3.2.0"] # /// -"""Search local AI agent prompts and history without mutating agent stores. +"""Search local AI agent prompts and conversations without mutating agent stores. The tool discovers known read-only stores under ``~/.codex``, ``~/.claude``, ``~/.cursor``, and Cursor's official IDE storage locations, then normalizes diff --git a/src/agentgrep/_engine/search.py b/src/agentgrep/_engine/search.py index 49e6a74..3690fea 100644 --- a/src/agentgrep/_engine/search.py +++ b/src/agentgrep/_engine/search.py @@ -1,7 +1,7 @@ """Search event-stream producer. The :func:`iter_search_events` generator is the primary entry point -into agentgrep's search engine: it scans the user's prompt and history +into agentgrep's search engine: it scans the user's prompt and conversation stores and yields :class:`agentgrep.events.SearchEvent` values as it goes. Consumers (the CLI text path, the TUI worker, the MCP tool wrapper) filter the event stream for the variants they need. diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py index 3f6ed71..79c6a7d 100644 --- a/src/agentgrep/cli/render.py +++ b/src/agentgrep/cli/render.py @@ -285,7 +285,7 @@ def _type_matches(record: FindRecord, args: FindArgs) -> bool: transcripts) and ``history``/``prompts`` -> ``history_file`` (the prompt-history audit logs, where standalone prompt records live). The prompt/history distinction is a record-level concept (``search`` - ``--type``); at the file granularity ``find`` operates on, both map + ``--scope``); at the file granularity ``find`` operates on, both map to the same path kind. """ if args.type_filter == "all": @@ -388,7 +388,7 @@ def run_find_command(args: FindArgs) -> int: for the routing decision. The ``--ui`` overlay translates the find filters into a - :class:`SearchQuery` seeded with the same agent / type narrowing, + :class:`SearchQuery` seeded with the same agent / scope narrowing, then opens the Textual explorer. This mirrors the ``tig`` model: same query semantics, different presentation. """ diff --git a/src/agentgrep/mcp/instructions.py b/src/agentgrep/mcp/instructions.py index 98c602c..5d23d14 100644 --- a/src/agentgrep/mcp/instructions.py +++ b/src/agentgrep/mcp/instructions.py @@ -10,13 +10,13 @@ _INSTR_HEADER = ( "agentgrep MCP server. Read-only search over local AI-agent prompts and " - "history across Codex, Claude Code, Cursor, Gemini, Grok, Pi, and OpenCode CLIs. All tools " - "are read-only and never spawn writes." + "opt-in conversations across Codex, Claude Code, Cursor, Gemini, Grok, Pi, and " + "OpenCode CLIs. All tools are read-only and never spawn writes." ) _INSTR_SCOPE = ( "TRIGGERS: invoke for retrospective questions about what the user typed " - "into or received from a coding-agent CLI (prompts, history, session " + "into or received from a coding-agent CLI (prompts, prompt history, session " "transcripts, store discovery). Bare 'prompt', 'history', 'transcript', " "'session', 'what did I ask Claude/Codex/Cursor/Gemini/Grok/Pi/OpenCode' default to " "agentgrep.\n" @@ -27,15 +27,16 @@ ) _INSTR_SEARCH_VS_DISCOVERY = ( - "search vs discovery: search() finds matching prompts/history text; " - "find() enumerates the on-disk stores agentgrep can read. Use the " - "agentgrep://capabilities and agentgrep://sources resources to inspect " - "the server's catalog before deciding which stores are worth searching." + "search vs discovery: search() finds matching prompt-scope text by default; " + "pass scope='conversations' to opt into full conversation records. find() " + "enumerates the on-disk stores agentgrep can read. Use the agentgrep://capabilities " + "and agentgrep://sources resources to inspect the server's catalog before " + "deciding which stores are worth searching." ) _INSTR_DEFAULTS = ( "Defaults: results are newest-first and deduplicated by session. " - "search uses substring AND-matching across all terms." + "search uses substring AND-matching across all terms and scope='prompts'." ) _INSTR_RESOURCES = ( diff --git a/src/agentgrep/mcp/tools/search_tools.py b/src/agentgrep/mcp/tools/search_tools.py index ef9a280..091be78 100644 --- a/src/agentgrep/mcp/tools/search_tools.py +++ b/src/agentgrep/mcp/tools/search_tools.py @@ -84,7 +84,7 @@ def register(mcp: FastMCP) -> None: @mcp.tool( name="search", tags=READONLY_TAGS | {"search"}, - description="Search normalized prompts or history across local agent stores.", + description=("Search normalized prompts by default; opt into conversations with scope."), ) async def search_tool( terms: t.Annotated[ diff --git a/src/agentgrep/stores.py b/src/agentgrep/stores.py index 25129be..cabd775 100644 --- a/src/agentgrep/stores.py +++ b/src/agentgrep/stores.py @@ -1,6 +1,6 @@ """Pydantic-backed catalogue of every on-disk store agentgrep knows about. -agentgrep searches AI agent prompt and history stores that live in the user's +agentgrep searches AI agent prompt and conversation stores that live in the user's ``$HOME``. Those stores move (Claude has renamed paths between minor versions), grow (Cursor added a CLI agent with its own layout), and overlap (Gemini keeps a pruned archive alongside its live tmp tree). Keeping that diff --git a/src/agentgrep/ui/app.py b/src/agentgrep/ui/app.py index 36dabad..7b3a84c 100644 --- a/src/agentgrep/ui/app.py +++ b/src/agentgrep/ui/app.py @@ -935,7 +935,7 @@ def compose(self) -> cabc.Iterator[object]: initial_search = " ".join(self.query.terms) if self.query.terms else "" yield SearchInput( value=initial_search, - placeholder="Search prompts and history", + placeholder="Search prompts", id="search", ) with horizontal(id="body"): From b1e4fc51128ad24040b20fd19b975a89beda0ebc Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 31 May 2026 12:51:57 -0500 Subject: [PATCH 4/5] agentgrep(fix[scope]): Prune transcript sources from prompt scope why: Prompt-scope search should prefer dedicated prompt-history stores when they are present. Filtering only by record kind allowed user turns from Claude project transcripts to leak into default grep/search results and made broad default searches slower than promised. what: - Track agents with discovered prompt-history sources and skip their chat transcript sources during prompt-scope planning. - Keep record-level scope matching local to record kind so transcript-only source sets still work. - Add regression coverage for Claude history versus project transcripts and source planning. --- src/agentgrep/__init__.py | 63 +++++++++++++++++++-- tests/test_agentgrep.py | 109 ++++++++++++++++++++++++++++++++++++ tests/test_query_compile.py | 29 +++++++++- 3 files changed, 194 insertions(+), 7 deletions(-) diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index 78713bf..c5086b1 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -3485,10 +3485,20 @@ def plan_search_sources( """Return the candidate sources to parse for a search query.""" active_progress = noop_search_progress() if progress is None else progress active_control = SearchControl() if control is None else control + prompt_history_agents = prompt_history_agents_for_sources(sources) + scoped_sources = [ + source + for source in sources + if source_matches_scope( + source, + query.scope, + prompt_history_agents=prompt_history_agents, + ) + ] if not query.terms: - return sources + return scoped_sources - planned_sources = list(sources) + planned_sources = scoped_sources if backends.grep_tool is not None: planned_sources = prefilter_sources_by_root( query, @@ -3652,11 +3662,18 @@ def current_count() -> int: return len(deduped) if query.dedupe else len(raw) source_predicate = query.compiled.source_predicate if query.compiled is not None else None + prompt_history_agents = prompt_history_agents_for_sources(sources) for index, source in enumerate(sources, start=1): if active_control.answer_now_requested() or ( query.limit is not None and current_count() >= query.limit ): break + if not source_matches_scope( + source, + query.scope, + prompt_history_agents=prompt_history_agents, + ): + continue # Compiled-query source pruning: when a field predicate like # ``agent:codex`` can be decided from the SourceHandle alone, # skip the source without opening it. Mirrors the same guard @@ -6308,17 +6325,25 @@ def build_search_record(source: SourceHandle, candidate: MessageCandidate) -> Se @functools.cache -def store_role_for_record(store: str, adapter_id: str) -> StoreRole | None: - """Return the catalog role for a normalized record's source store.""" +def store_descriptor_for_record(store: str, adapter_id: str) -> StoreDescriptor | None: + """Return the catalog descriptor for a normalized record's source store.""" from agentgrep.store_catalog import CATALOG for descriptor in CATALOG.stores: for spec in descriptor.discovery: if spec.store == store and spec.adapter_id == adapter_id: - return descriptor.role + return descriptor return None +def store_role_for_record(store: str, adapter_id: str) -> StoreRole | None: + """Return the catalog role for a normalized record's source store.""" + descriptor = store_descriptor_for_record(store, adapter_id) + if descriptor is None: + return None + return descriptor.role + + def record_matches_scope(record: SearchRecord, scope: SearchScope) -> bool: """Return whether ``record`` belongs to the requested search scope.""" if scope == "all": @@ -6329,6 +6354,34 @@ def record_matches_scope(record: SearchRecord, scope: SearchScope) -> bool: return role in CONVERSATION_STORE_ROLES +def prompt_history_agents_for_sources(sources: cabc.Iterable[SourceHandle]) -> frozenset[str]: + """Return agents with a dedicated prompt-history source in ``sources``.""" + return frozenset( + source.agent + for source in sources + if store_role_for_record(source.store, source.adapter_id) == StoreRole.PROMPT_HISTORY + ) + + +def source_matches_scope( + source: SourceHandle, + scope: SearchScope, + *, + prompt_history_agents: frozenset[str] = frozenset(), +) -> bool: + """Return whether ``source`` can yield records for the requested scope.""" + if scope == "all": + return True + role = store_role_for_record(source.store, source.adapter_id) + if scope == "conversations": + return role in CONVERSATION_STORE_ROLES + if role == StoreRole.PROMPT_HISTORY: + return True + if role in CONVERSATION_STORE_ROLES: + return source.agent not in prompt_history_agents + return True + + def matches_record(record: SearchRecord, query: SearchQuery) -> bool: """Return whether a normalized record should be included. diff --git a/tests/test_agentgrep.py b/tests/test_agentgrep.py index c08d179..d6ee260 100644 --- a/tests/test_agentgrep.py +++ b/tests/test_agentgrep.py @@ -2775,6 +2775,64 @@ def fake_run( assert [source.path for source in planned] == [first] +def test_plan_search_sources_prunes_chat_sources_from_prompt_scope( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Prompt-scope planning skips Claude transcript files before parsing.""" + agentgrep = t.cast("t.Any", load_agentgrep_module()) + history = agentgrep.SourceHandle( + agent="claude", + store="claude.history", + adapter_id="claude.history_jsonl.v1", + path=tmp_path / "history.jsonl", + path_kind="history_file", + source_kind="jsonl", + search_root=None, + mtime_ns=2, + ) + transcript = agentgrep.SourceHandle( + agent="claude", + store="claude.projects", + adapter_id="claude.projects_jsonl.v1", + path=tmp_path / "projects" / "session.jsonl", + path_kind="session_file", + source_kind="jsonl", + search_root=None, + mtime_ns=1, + ) + query = agentgrep.SearchQuery( + terms=("biome",), + scope="prompts", + any_term=False, + regex=False, + case_sensitive=False, + agents=("claude",), + limit=None, + ) + checked: list[str] = [] + + def direct_source_matches( + source: object, + query: object, + backends: object, + control: object | None = None, + ) -> bool: + checked.append(t.cast("t.Any", source).store) + return True + + monkeypatch.setattr(agentgrep, "direct_source_matches", direct_source_matches) + + planned = agentgrep.plan_search_sources( + query, + [history, transcript], + agentgrep.BackendSelection(None, None, None), + ) + + assert [source.store for source in planned] == ["claude.history"] + assert checked == ["claude.history"] + + def test_search_prefers_newer_sources_when_limiting( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, @@ -5544,6 +5602,57 @@ def test_search_claude_history_expands_external_pasted_text( assert "[Pasted text" not in record.text +def test_prompt_scope_excludes_claude_project_user_turns_when_history_exists( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Default prompt scope uses Claude's prompt history, not transcript replay.""" + agentgrep = load_agentgrep_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + claude_home = home / ".claude" + write_jsonl( + claude_home / "history.jsonl", + [ + { + "display": "biome from prompt history", + "timestamp": 1_700_000_000_000, + "project": "/synthetic/project", + "sessionId": "session-1", + "pastedContents": {}, + }, + ], + ) + write_jsonl( + claude_home / "projects" / "-synthetic-project" / "session-1.jsonl", + [ + { + "type": "user", + "sessionId": "session-1", + "version": "2.1.157", + "message": {"role": "user", "content": "biome from transcript"}, + }, + ], + ) + + backends = t.cast("t.Any", agentgrep).BackendSelection(None, None, None) + query = t.cast("t.Any", agentgrep).SearchQuery( + terms=("biome",), + scope="prompts", + any_term=False, + regex=False, + case_sensitive=False, + agents=("claude",), + limit=None, + ) + sources = t.cast("t.Any", agentgrep).discover_sources(home, ("claude",), backends) + records = t.cast("t.Any", agentgrep).search_sources(query, sources, backends) + + assert [(record.store, record.text) for record in records] == [ + ("claude.history", "biome from prompt history"), + ] + + def test_search_claude_history_tolerates_missing_paste_cache( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch, diff --git a/tests/test_query_compile.py b/tests/test_query_compile.py index 5f7c670..d04c926 100644 --- a/tests/test_query_compile.py +++ b/tests/test_query_compile.py @@ -325,9 +325,34 @@ class RecordPredicateCase(t.NamedTuple): expected_matches=True, ), RecordPredicateCase( - test_id="scope-prompts-on-prompt-kind", + test_id="scope-prompts-on-prompt-history", query="scope:prompts", - record_kwargs={"kind": "prompt"}, + record_kwargs={ + "kind": "prompt", + "store": "codex.history", + "adapter_id": "codex.history_jsonl.v1", + }, + expected_matches=True, + ), + RecordPredicateCase( + test_id="scope-prompts-on-chat-prompt-record-layer", + query="scope:prompts", + record_kwargs={ + "kind": "prompt", + "store": "codex.sessions", + "adapter_id": "codex.sessions_jsonl.v1", + }, + expected_matches=True, + ), + RecordPredicateCase( + test_id="scope-prompts-includes-transcript-only-prompt", + query="scope:prompts", + record_kwargs={ + "kind": "prompt", + "agent": "pi", + "store": "pi.sessions", + "adapter_id": "pi.sessions_jsonl.v1", + }, expected_matches=True, ), RecordPredicateCase( From fa7a50e9446a1210318afb37a0f1cba9ec316bab Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 31 May 2026 13:02:57 -0500 Subject: [PATCH 5/5] agentgrep(fix[grep]): Match line output against record text why: Grep output is line-oriented. Letting bare patterns match source metadata such as paths could emit heading-only records when the record text had no matching line, which made default terminal output look like empty responses. what: - Add an internal search match-surface selector to SearchQuery. - Make grep use the text-only surface while regular search keeps the metadata-rich haystack. - Preserve the selected surface when rebuilding parsed query-language searches. - Add regression coverage for terms that appear only in a source path. --- src/agentgrep/__init__.py | 14 +++++++++- src/agentgrep/cli/render.py | 1 + src/agentgrep/query/compile.py | 1 + tests/test_cli_grep.py | 49 ++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py index c5086b1..9b2eefc 100644 --- a/src/agentgrep/__init__.py +++ b/src/agentgrep/__init__.py @@ -91,6 +91,7 @@ OutputMode = t.Literal["text", "json", "ndjson", "ui"] ProgressMode = t.Literal["auto", "always", "never"] SearchScope = t.Literal["prompts", "conversations", "all"] +SearchMatchSurface = t.Literal["haystack", "text"] ColorMode = t.Literal["auto", "always", "never"] GrepStyle = t.Literal["default", "pretty"] type JSONScalar = str | int | float | bool | None @@ -1178,6 +1179,9 @@ class SearchQuery: ``compiled.source_predicate`` to prune sources before any file is opened, and :func:`matches_record` consults ``compiled.record_predicate`` after the existing text match. + ``match_surface`` lets line-oriented callers such as ``grep`` + require a match in record text while fuzzy search and filtering + can keep using the metadata-rich haystack. """ terms: tuple[str, ...] @@ -1189,6 +1193,7 @@ class SearchQuery: limit: int | None dedupe: bool = True compiled: CompiledQuery | None = None + match_surface: SearchMatchSurface = "haystack" @dataclasses.dataclass(slots=True) @@ -6392,7 +6397,7 @@ def matches_record(record: SearchRecord, query: SearchQuery) -> bool: """ if not record_matches_scope(record, query.scope): return False - if not matches_text(build_search_haystack(record), query): + if not matches_text(build_record_match_surface(record, query.match_surface), query): return False compiled = query.compiled if compiled is not None and compiled.record_predicate is not None: @@ -6400,6 +6405,13 @@ def matches_record(record: SearchRecord, query: SearchQuery) -> bool: return True +def build_record_match_surface(record: SearchRecord, surface: SearchMatchSurface) -> str: + """Build the text surface used for unfielded query terms.""" + if surface == "text": + return record.text + return build_search_haystack(record) + + def build_search_haystack(record: SearchRecord) -> str: """Build a searchable text surface for a record.""" parts = [ diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py index 79c6a7d..f8f2791 100644 --- a/src/agentgrep/cli/render.py +++ b/src/agentgrep/cli/render.py @@ -885,6 +885,7 @@ def build_grep_query(args: GrepArgs) -> agentgrep.SearchQuery: limit=args.max_count, dedupe=not args.no_dedupe, compiled=args.compiled, + match_surface="text", ) diff --git a/src/agentgrep/query/compile.py b/src/agentgrep/query/compile.py index 5443bf0..2c33322 100644 --- a/src/agentgrep/query/compile.py +++ b/src/agentgrep/query/compile.py @@ -313,6 +313,7 @@ def _rebuild( limit=base.limit, dedupe=base.dedupe, compiled=compiled, + match_surface=base.match_surface, ) diff --git a/tests/test_cli_grep.py b/tests/test_cli_grep.py index 7fb327b..40bc702 100644 --- a/tests/test_cli_grep.py +++ b/tests/test_cli_grep.py @@ -306,6 +306,55 @@ def test_build_grep_query_translates_modes(case: QueryTranslationCase) -> None: assert query.regex is case.expected_regex assert query.dedupe is case.expected_dedupe assert query.terms == case.expected_terms + assert query.match_surface == "text" + + +class GrepMatchSurfaceCase(t.NamedTuple): + """Parametrized case for grep's record-level match surface.""" + + test_id: str + record_path: str + record_text: str + expected_matches: bool + + +GREP_MATCH_SURFACE_CASES: tuple[GrepMatchSurfaceCase, ...] = ( + GrepMatchSurfaceCase( + test_id="term-in-text", + record_path="/tmp/plain-project/prompt_history.jsonl", + record_text="tmux prompt investigation", + expected_matches=True, + ), + GrepMatchSurfaceCase( + test_id="term-only-in-path", + record_path="/tmp/vibe-tmux-py/prompt_history.jsonl", + record_text="prompt without the needle", + expected_matches=False, + ), +) + + +@pytest.mark.parametrize( + "case", + GREP_MATCH_SURFACE_CASES, + ids=[c.test_id for c in GREP_MATCH_SURFACE_CASES], +) +def test_grep_query_matches_record_text_not_source_path(case: GrepMatchSurfaceCase) -> None: + """Line-oriented grep should only emit records with matching text lines.""" + args = _make_grep_args(patterns=("tmux",), pattern_mode="fixed") + query = agentgrep.build_grep_query(args) + record = agentgrep.SearchRecord( + kind="prompt", + agent="grok", + store="grok.prompt_history", + adapter_id="grok.prompt_history_jsonl.v1", + path=pathlib.Path(case.record_path), + text=case.record_text, + title="Grok prompt history", + role="user", + ) + + assert agentgrep.matches_record(record, query) is case.expected_matches def _make_grep_args(**overrides: object) -> agentgrep.GrepArgs: