diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py index e3b7e79..72fb6ec 100644 --- a/src/session_analytics/cli.py +++ b/src/session_analytics/cli.py @@ -990,6 +990,7 @@ def cmd_sample_sequences(args): count=args.limit, context_events=args.context, days=args.days, + expand=args.expand, ) print(format_output(result, args.json)) @@ -1606,6 +1607,11 @@ def main(): sub.add_argument( "--context", type=int, default=2, help="Context events before/after (default: 2)" ) + sub.add_argument( + "--expand", + action="store_true", + help="Match expanded tool names (Bash→command, Skill→skill_name, Task→subagent_type)", + ) sub.set_defaults(func=cmd_sample_sequences) # journey (maps to get_session_messages MCP tool) diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md index 8a8adc5..e6de324 100644 --- a/src/session_analytics/guide.md +++ b/src/session_analytics/guide.md @@ -36,11 +36,18 @@ identify permission gaps. | Tool | Purpose | |------|---------| -| `get_tool_sequences(days?, min_count?, length?, limit?)` | Common tool chains (e.g., Read → Edit → Bash) | -| `sample_sequences(pattern, limit?, context_events?)` | Random samples of a pattern with surrounding context | +| `get_tool_sequences(days?, min_count?, length?, limit?, expand?)` | Common tool chains (e.g., Read → Edit → Bash) | +| `sample_sequences(pattern, limit?, context_events?, expand?)` | Random samples of a pattern with surrounding context | | `get_permission_gaps(days?, min_count?)` | Commands not covered by settings.json (supports glob patterns) | | `get_insights(days?, refresh?)` | Pre-computed patterns for /improve-workflow | +**expand**: When `True`, expands tool names to specific variants: +- Bash → specific command (e.g., "git", "make") +- Skill → skill name (e.g., "commit", "pr-review") +- Task → subagent type (e.g., "Explore", "Plan") + +Use `get_tool_sequences(expand=True)` to discover expanded patterns, then `sample_sequences(pattern, expand=True)` to get examples. + ### Failure Analysis | Tool | Purpose | diff --git a/src/session_analytics/patterns.py b/src/session_analytics/patterns.py index 2f6eb75..4f4fa58 100644 --- a/src/session_analytics/patterns.py +++ b/src/session_analytics/patterns.py @@ -17,6 +17,38 @@ DEFAULT_SETTINGS_PATH = Path.home() / ".claude" / "settings.json" +def _get_effective_name(row: dict, expand: bool) -> str: + """Get the effective name for a tool, optionally expanded. + + Args: + row: Database row with tool_name, command, skill_name, tool_input_json + expand: If True, expand Bash→command, Skill→skill_name, Task→subagent_type + + Returns: + Effective tool name (expanded or base depending on expand flag) + """ + if not expand: + return row["tool_name"] + + tool = row["tool_name"] + if tool == "Bash" and row["command"]: + return row["command"] + elif tool == "Skill" and row["skill_name"]: + return row["skill_name"] + elif tool == "Task" and row["tool_input_json"]: + try: + input_data = json.loads(row["tool_input_json"]) + if subagent := input_data.get("subagent_type"): + return subagent + except (json.JSONDecodeError, TypeError) as e: + logger.debug( + "Failed to parse tool_input_json for Task event %s: %s", + row.get("id", "unknown"), + e, + ) + return tool + + def compute_tool_frequency_patterns( storage: SQLiteStorage, days: int = 7, @@ -140,25 +172,6 @@ def compute_sequence_patterns( (cutoff,), ) - def get_effective_name(row) -> str: - """Get the effective name for a tool, optionally expanded.""" - if not expand: - return row["tool_name"] - - tool = row["tool_name"] - if tool == "Bash" and row["command"]: - return row["command"] - elif tool == "Skill" and row["skill_name"]: - return row["skill_name"] - elif tool == "Task" and row["tool_input_json"]: - try: - input_data = json.loads(row["tool_input_json"]) - if subagent := input_data.get("subagent_type"): - return subagent - except (json.JSONDecodeError, TypeError): - pass - return tool - # Group by session and extract sequences sequences: Counter = Counter() current_session = None @@ -175,7 +188,7 @@ def get_effective_name(row) -> str: current_session = row["session_id"] session_tools = [] - session_tools.append(get_effective_name(row)) + session_tools.append(_get_effective_name(row, expand)) # Process last session if len(session_tools) >= sequence_length: @@ -209,6 +222,7 @@ def sample_sequences( count: int = 5, context_events: int = 2, days: int = 7, + expand: bool = False, ) -> dict: """Return random samples of a sequence pattern with surrounding context. @@ -221,6 +235,8 @@ def sample_sequences( count: Number of random samples to return (default: 5) context_events: Number of events before/after to include (default: 2) days: Number of days to analyze + expand: If True, match expanded tool names (Bash→command, Skill→skill_name, + Task→subagent_type). Use with patterns from get_tool_sequences(expand=True). Returns: Dict with pattern info, total occurrences, and sampled instances @@ -230,7 +246,9 @@ def sample_sequences( # Validate pattern input if len(pattern) > 500: return { + "status": "ok", "pattern": pattern[:50] + "...", + "expanded": expand, "error": "Pattern too long (max 500 characters)", "total_occurrences": 0, "samples": [], @@ -242,12 +260,14 @@ def sample_sequences( else: target_tools = [t.strip() for t in pattern.split(",")] - # Validate individual tool names (alphanumeric and underscores only) + # Validate individual tool names (alphanumeric, underscores, and hyphens for expanded names) for tool in target_tools: - if not tool or not all(c.isalnum() or c == "_" for c in tool): + if not tool or not all(c.isalnum() or c in "_-" for c in tool): return { + "status": "ok", "pattern": pattern, - "error": f"Invalid tool name: '{tool}' (must be alphanumeric or underscores)", + "expanded": expand, + "error": f"Invalid tool name: '{tool}' (must be alphanumeric, underscores, or hyphens)", "total_occurrences": 0, "samples": [], } @@ -255,16 +275,20 @@ def sample_sequences( sequence_length = len(target_tools) if sequence_length < 2: return { + "status": "ok", "pattern": pattern, + "expanded": expand, "error": "Pattern must contain at least 2 tools", "total_occurrences": 0, "samples": [], } # Get all tool events ordered by session and timestamp + # Include extra columns needed for expansion rows = storage.execute_query( """ - SELECT id, session_id, tool_name, timestamp, project_path, file_path, command + SELECT id, session_id, tool_name, timestamp, project_path, file_path, + command, skill_name, tool_input_json FROM events WHERE timestamp >= ? AND tool_name IS NOT NULL ORDER BY session_id, timestamp @@ -282,7 +306,9 @@ def sample_sequences( # Process previous session to find pattern matches if len(session_events) >= sequence_length: for i in range(len(session_events) - sequence_length + 1): - tools = [session_events[j]["tool_name"] for j in range(i, i + sequence_length)] + tools = [ + session_events[j]["effective_name"] for j in range(i, i + sequence_length) + ] if tools == target_tools: # Calculate context boundaries start_ctx = max(0, i - context_events) @@ -305,6 +331,7 @@ def sample_sequences( { "id": row["id"], "tool_name": row["tool_name"], + "effective_name": _get_effective_name(row, expand), "timestamp": row["timestamp"], "project_path": row["project_path"], "file_path": row["file_path"], @@ -315,7 +342,7 @@ def sample_sequences( # Process last session if len(session_events) >= sequence_length: for i in range(len(session_events) - sequence_length + 1): - tools = [session_events[j]["tool_name"] for j in range(i, i + sequence_length)] + tools = [session_events[j]["effective_name"] for j in range(i, i + sequence_length)] if tools == target_tools: start_ctx = max(0, i - context_events) end_ctx = min(len(session_events), i + sequence_length + context_events) @@ -347,10 +374,13 @@ def sample_sequences( formatted_events = [] for idx, evt in enumerate(events): formatted_evt = { - "tool": evt["tool_name"], + "tool": evt["effective_name"] if expand else evt["tool_name"], "timestamp": evt["timestamp"].isoformat() if evt["timestamp"] else None, "is_match": match_start <= idx < match_end, } + # When expanded, also show base tool for context + if expand and evt["effective_name"] != evt["tool_name"]: + formatted_evt["base_tool"] = evt["tool_name"] if evt["file_path"]: formatted_evt["file"] = evt["file_path"] if evt["command"]: @@ -372,7 +402,9 @@ def sample_sequences( ) return { + "status": "ok", "pattern": pattern, + "expanded": expand, "parsed_tools": target_tools, "total_occurrences": total_occurrences, "sample_count": len(formatted_samples), diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py index b4ee586..25816c3 100644 --- a/src/session_analytics/server.py +++ b/src/session_analytics/server.py @@ -247,7 +247,13 @@ def get_tool_sequences( @mcp.tool() -def sample_sequences(pattern: str, limit: int = 5, context_events: int = 2, days: int = 7) -> dict: +def sample_sequences( + pattern: str, + limit: int = 5, + context_events: int = 2, + days: int = 7, + expand: bool = False, +) -> dict: """Get random samples of a sequence pattern with surrounding context. Instead of just counting "Read → Edit" occurrences, returns actual examples @@ -258,15 +264,21 @@ def sample_sequences(pattern: str, limit: int = 5, context_events: int = 2, days limit: Number of random samples to return (default: 5) context_events: Number of events before/after to include (default: 2) days: Number of days to analyze (default: 7) + expand: If True, match expanded tool names (Bash→command, Skill→skill_name, + Task→subagent_type). Use with patterns from get_tool_sequences(expand=True). Returns: Pattern info, total occurrences, and sampled instances with context """ queries.ensure_fresh_data(storage, days=days) - result = patterns.sample_sequences( - storage, pattern=pattern, count=limit, context_events=context_events, days=days + return patterns.sample_sequences( + storage, + pattern=pattern, + count=limit, + context_events=context_events, + days=days, + expand=expand, ) - return {"status": "ok", **result} @mcp.tool() diff --git a/tests/test_cli.py b/tests/test_cli.py index 7ff48c9..88e2590 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -504,6 +504,7 @@ class Args: limit = 5 context = 2 days = 7 + expand = False with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage): cmd_sample_sequences(Args()) diff --git a/tests/test_patterns.py b/tests/test_patterns.py index 3412d3e..d0ee4a2 100644 --- a/tests/test_patterns.py +++ b/tests/test_patterns.py @@ -462,6 +462,121 @@ def test_sample_sequences_marks_match_events(self, pattern_storage): tools = [e["tool"] for e in matched_events] assert tools == ["Read", "Edit"] + def test_sample_sequences_expand_matches_commands(self, storage): + """Test that expand=True matches Bash command names instead of 'Bash'.""" + now = datetime.now() + + events = [ + Event( + id=None, + uuid="exp-1", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test", + entry_type="tool_use", + tool_name="Bash", + command="git", + ), + Event( + id=None, + uuid="exp-2", + timestamp=now - timedelta(hours=1, minutes=-1), + session_id="s1", + project_path="-test", + entry_type="tool_use", + tool_name="Edit", + ), + ] + storage.add_events_batch(events) + + # Without expand, "git → Edit" should NOT match (tool_name is "Bash") + result_unexpanded = sample_sequences(storage, pattern="git → Edit", expand=False, days=7) + assert result_unexpanded["total_occurrences"] == 0 + assert result_unexpanded["expanded"] is False + + # With expand, "git → Edit" SHOULD match + result_expanded = sample_sequences(storage, pattern="git → Edit", expand=True, days=7) + assert result_expanded["total_occurrences"] == 1 + assert result_expanded["expanded"] is True + # Verify the sample includes base_tool for the expanded event + sample_events = result_expanded["samples"][0]["events"] + git_event = next(e for e in sample_events if e.get("tool") == "git") + assert git_event.get("base_tool") == "Bash" + + def test_sample_sequences_expand_matches_skill_names(self, storage): + """Test that expand=True matches Skill names.""" + now = datetime.now() + + events = [ + Event( + id=None, + uuid="skill-1", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test", + entry_type="tool_use", + tool_name="Skill", + skill_name="commit", + ), + Event( + id=None, + uuid="skill-2", + timestamp=now - timedelta(hours=1, minutes=-1), + session_id="s1", + project_path="-test", + entry_type="tool_use", + tool_name="Read", + ), + ] + storage.add_events_batch(events) + + # With expand, "commit → Read" should match + result = sample_sequences(storage, pattern="commit → Read", expand=True, days=7) + assert result["total_occurrences"] == 1 + assert result["expanded"] is True + + def test_sample_sequences_expand_matches_task_subagent(self, storage): + """Test that expand=True matches Task subagent_type.""" + import json + + now = datetime.now() + + events = [ + Event( + id=None, + uuid="task-1", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test", + entry_type="tool_use", + tool_name="Task", + tool_input_json=json.dumps({"subagent_type": "Explore"}), + ), + Event( + id=None, + uuid="task-2", + timestamp=now - timedelta(hours=1, minutes=-1), + session_id="s1", + project_path="-test", + entry_type="tool_use", + tool_name="Read", + ), + ] + storage.add_events_batch(events) + + # With expand, "Explore → Read" should match + result = sample_sequences(storage, pattern="Explore → Read", expand=True, days=7) + assert result["total_occurrences"] == 1 + assert result["expanded"] is True + + def test_sample_sequences_expand_allows_hyphenated_names(self, storage): + """Test that expanded patterns with hyphens are valid.""" + result = sample_sequences(storage, pattern="pr-review → Edit", expand=True, days=7) + # Should not error - pattern is valid + assert "error" not in result or "Invalid tool name" not in result.get("error", "") + assert result["parsed_tools"] == ["pr-review", "Edit"] + assert result["expanded"] is True + class TestAnalyzeFailures: """Tests for the analyze_failures function (Phase 4: Failure Analysis).""" diff --git a/tests/test_server.py b/tests/test_server.py index 286acd2..d0880cc 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -151,6 +151,21 @@ def test_sample_sequences(): assert "total_occurrences" in result assert "samples" in result assert isinstance(result["samples"], list) + assert "expanded" in result + assert result["expanded"] is False + + +def test_sample_sequences_expand(): + """Test that sample_sequences respects expand parameter.""" + # Test with expand=True - should return expanded field set to True + result = sample_sequences.fn( + pattern="git → Edit", limit=5, context_events=2, days=7, expand=True + ) + assert result["status"] == "ok" + assert result["expanded"] is True + # Pattern may or may not match, but structure should be correct + assert "parsed_tools" in result + assert result["parsed_tools"] == ["git", "Edit"] def test_get_session_messages():