From 20f56d90b0614527829e1af77c1bacf13ca3e83b Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Thu, 8 Jan 2026 21:03:26 +0000 Subject: [PATCH 1/3] feat: Add get_error_details() for tool failure analysis (#60) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add endpoint to retrieve detailed error information including the tool parameters that caused failures. This enables drill-down from aggregate error counts to actionable specifics. New MCP tool: - get_error_details(days, tool, limit) - shows which patterns/commands/files failed New CLI command: - session-analytics-cli error-details [--tool NAME] [--limit N] Key insight: No schema changes needed - uses json_extract() on existing tool_input_json column to extract Glob/Grep patterns, Bash commands, etc. Example output: Glob errors by pattern: "*" - 922 errors (rust-genai) "**/*.rs" - 6 errors Closes #60 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- README.md | 5 +- src/session_analytics/cli.py | 20 ++++++ src/session_analytics/guide.md | 8 ++- src/session_analytics/queries.py | 106 +++++++++++++++++++++++++++++++ src/session_analytics/server.py | 20 ++++++ 5 files changed, 156 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 654158e..7c54df6 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ session-analytics-cli agents # Task subagent activity vs main sessi session-analytics-cli signals # Raw session metrics for LLM interpretation session-analytics-cli classify # Categorize sessions (debug/dev/research) session-analytics-cli failures # Error patterns and rework detection +session-analytics-cli error-details # Detailed errors with tool parameters session-analytics-cli trends # Compare usage across time periods session-analytics-cli handoff # Context summary for session handoff @@ -91,7 +92,7 @@ All commands support: ## MCP Tools -30 tools available when running as an MCP server: +31 tools available when running as an MCP server: | Category | Tools | |----------|-------| @@ -100,7 +101,7 @@ All commands support: | **Patterns** | `get_tool_sequences`, `sample_sequences`, `get_permission_gaps`, `get_insights` | | **Files** | `get_file_activity`, `get_languages`, `get_projects`, `get_mcp_usage` | | **Agents** | `get_agent_activity` | -| **Sessions** | `get_session_signals`, `classify_sessions`, `analyze_failures`, `analyze_trends`, `get_handoff_context` | +| **Sessions** | `get_session_signals`, `classify_sessions`, `analyze_failures`, `get_error_details`, `analyze_trends`, `get_handoff_context` | | **Messages** | `get_session_messages`, `search_messages` | | **Relationships** | `detect_parallel_sessions`, `find_related_sessions` | | **Git** | `ingest_git_history`, `correlate_git_with_sessions`, `get_session_commits` | diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py index c729f5e..eb1dbc2 100644 --- a/src/session_analytics/cli.py +++ b/src/session_analytics/cli.py @@ -27,6 +27,7 @@ query_agent_activity, query_bus_events, query_commands, + query_error_details, query_file_activity, query_languages, query_mcp_usage, @@ -806,6 +807,18 @@ def cmd_failures(args): print(format_output(result, args.json)) +def cmd_error_details(args): + """Show detailed error information with tool parameters.""" + storage = SQLiteStorage() + result = query_error_details( + storage, + days=args.days, + tool=args.tool, + limit=args.limit, + ) + print(format_output(result, args.json)) + + def cmd_classify(args): """Show session classifications.""" storage = SQLiteStorage() @@ -1069,6 +1082,13 @@ def main(): ) sub.set_defaults(func=cmd_failures) + # error-details + sub = subparsers.add_parser("error-details", help="Show error details with tool parameters") + sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)") + sub.add_argument("--tool", help="Filter by tool name (e.g., Glob, Bash, Edit)") + sub.add_argument("--limit", type=int, default=50, help="Max errors per tool (default: 50)") + sub.set_defaults(func=cmd_error_details) + # classify sub = subparsers.add_parser("classify", help="Classify sessions by activity type") sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)") diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md index 00bb3db..a60de0f 100644 --- a/src/session_analytics/guide.md +++ b/src/session_analytics/guide.md @@ -46,12 +46,18 @@ identify permission gaps. | Tool | Purpose | |------|---------| | `analyze_failures(days?, project?)` | Failure patterns with drill-down to specific commands | +| `get_error_details(days?, tool?, limit?)` | Detailed errors with tool parameters (patterns, commands, files) | -Returns: +`analyze_failures()` returns: - `errors_by_tool`: Count of errors per tool - `error_examples`: Top failing commands (Bash) or files (Edit/Read/Write) for drill-down - `rework_patterns`: Files edited 3+ times within 10 minutes +`get_error_details()` shows *which specific parameters* caused failures: +- Glob/Grep: The pattern that failed (e.g., `"*"` with 922 errors) +- Bash: The command that failed (e.g., `pwd` with 492 errors) +- Edit/Read/Write: The file path that failed + ### Session Classification | Tool | Purpose | diff --git a/src/session_analytics/queries.py b/src/session_analytics/queries.py index 7e9ca49..81c488c 100644 --- a/src/session_analytics/queries.py +++ b/src/session_analytics/queries.py @@ -1899,3 +1899,109 @@ def query_bus_events( "event_types": type_counts, "events": events, } + + +def query_error_details( + storage: SQLiteStorage, + days: int = 7, + tool: str | None = None, + limit: int = 50, +) -> dict: + """Get detailed error information including tool parameters that caused failures. + + Joins tool_result errors with tool_use events to extract the parameters + (pattern for Glob/Grep, command for Bash, file_path for file operations) + that caused the failure. + + Args: + storage: Storage instance + days: Number of days to analyze (default: 7) + tool: Optional filter by tool name (e.g., "Glob", "Bash") + limit: Maximum errors to return per tool (default: 50) + + Returns: + Dict with error details grouped by tool and parameter + """ + cutoff = get_cutoff(days=days) + + # Build tool filter + tool_filter = "" + params: list = [cutoff] + if tool: + tool_filter = "AND e2.tool_name = ?" + params.append(tool) + + # Query errors with tool parameters + # Uses json_extract to get the relevant parameter based on tool type: + # - Glob/Grep: pattern + # - Bash: command (already extracted to column) + # - Read/Edit/Write: file_path (already extracted to column) + rows = storage.execute_query( + f""" + SELECT + e2.tool_name, + e2.command, + e2.file_path, + json_extract(e2.tool_input_json, '$.pattern') as pattern, + json_extract(e2.tool_input_json, '$.path') as search_path, + e1.project_path, + COUNT(*) as error_count + FROM events e1 + JOIN events e2 ON e1.tool_id = e2.tool_id AND e2.entry_type = 'tool_use' + WHERE e1.timestamp >= ? + AND e1.is_error = 1 + AND e1.entry_type = 'tool_result' + {tool_filter} + GROUP BY e2.tool_name, e2.command, e2.file_path, pattern, search_path, e1.project_path + ORDER BY e2.tool_name, error_count DESC + """, + tuple(params), + ) + + # Organize by tool with the relevant parameter + errors_by_tool: dict[str, list[dict]] = {} + tool_totals: dict[str, int] = {} + + for row in rows: + tool_name = row["tool_name"] + if not tool_name: + continue + + # Determine the key parameter based on tool type + if tool_name in ("Glob", "Grep"): + key_param = row["pattern"] + param_type = "pattern" + elif tool_name == "Bash": + key_param = row["command"] + param_type = "command" + else: + key_param = row["file_path"] + param_type = "file_path" + + if tool_name not in errors_by_tool: + errors_by_tool[tool_name] = [] + tool_totals[tool_name] = 0 + + tool_totals[tool_name] += row["error_count"] + + # Only keep top N per tool + if len(errors_by_tool[tool_name]) < limit: + error_detail = { + "param_type": param_type, + "param_value": key_param, + "error_count": row["error_count"], + "project": row["project_path"], + } + # Add search_path for Glob/Grep if present + if tool_name in ("Glob", "Grep") and row["search_path"]: + error_detail["search_path"] = row["search_path"] + + errors_by_tool[tool_name].append(error_detail) + + return { + "days": days, + "tool_filter": tool, + "errors_by_tool": errors_by_tool, + "tool_totals": tool_totals, + "total_errors": sum(tool_totals.values()), + } diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py index 9f1fd3a..31cabb3 100644 --- a/src/session_analytics/server.py +++ b/src/session_analytics/server.py @@ -458,6 +458,26 @@ def analyze_failures(days: int = 7, rework_window_minutes: int = 10) -> dict: return {"status": "ok", **result} +@mcp.tool() +def get_error_details(days: int = 7, tool: str | None = None, limit: int = 50) -> dict: + """Get detailed error information including tool parameters that caused failures. + + Shows which specific patterns (Glob/Grep), commands (Bash), or files caused errors. + Use this to drill down from analyze_failures() counts to actionable specifics. + + Args: + days: Number of days to analyze (default: 7) + tool: Optional filter by tool name (e.g., "Glob", "Bash", "Edit") + limit: Maximum errors to return per tool (default: 50) + + Returns: + Error details grouped by tool with the failing parameter (pattern/command/file) + """ + queries.ensure_fresh_data(storage, days=days) + result = queries.query_error_details(storage, days=days, tool=tool, limit=limit) + return {"status": "ok", **result} + + @mcp.tool() def classify_sessions(days: int = 7, project: str | None = None) -> dict: """Classify sessions based on their dominant activity patterns. From 71ec2d5aacdba02a4261d02e90957ca94dc2410e Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Thu, 8 Jan 2026 21:10:38 +0000 Subject: [PATCH 2/3] test: Add tests for query_error_details() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses reviewer feedback on PR #61 requesting test coverage for the new error details query function. Tests cover: - Basic error aggregation by tool and parameter - Tool filter parameter - Limit parameter caps errors per tool - File path errors for Edit/Read/Write - Grep/Glob pattern extraction with search_path - Empty database handling - Days filter excludes old errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/test_queries.py | 359 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) diff --git a/tests/test_queries.py b/tests/test_queries.py index 9119316..f1cf7bd 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -7,6 +7,7 @@ get_cutoff, query_agent_activity, query_commands, + query_error_details, query_file_activity, query_languages, query_mcp_usage, @@ -1859,3 +1860,361 @@ def test_zero_division_protection(self, storage): # Should not raise ZeroDivisionError assert result["summary"]["agent_token_percentage"] == 0 + + +class TestQueryErrorDetails: + """Tests for query_error_details(). + + RFC #60: Shows which specific parameters (patterns, commands, files) + caused tool errors, enabling drill-down from aggregate error counts. + """ + + def test_basic_error_aggregation(self, storage): + """Test basic error aggregation by tool and parameter.""" + import json + + now = datetime.now() + + # Create tool_use events with tool_input_json + events = [ + # Glob error with pattern + Event( + id=None, + uuid="glob-use-1", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Glob", + tool_id="tool-glob-1", + tool_input_json=json.dumps({"pattern": "*.py", "path": "/src"}), + ), + Event( + id=None, + uuid="glob-result-1", + timestamp=now - timedelta(hours=1, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-glob-1", + is_error=True, + ), + # Another Glob error with same pattern (should aggregate) + Event( + id=None, + uuid="glob-use-2", + timestamp=now - timedelta(hours=2), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Glob", + tool_id="tool-glob-2", + tool_input_json=json.dumps({"pattern": "*.py", "path": "/src"}), + ), + Event( + id=None, + uuid="glob-result-2", + timestamp=now - timedelta(hours=2, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-glob-2", + is_error=True, + ), + # Bash error with command + Event( + id=None, + uuid="bash-use-1", + timestamp=now - timedelta(hours=3), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Bash", + tool_id="tool-bash-1", + command="git", + command_args="status", + ), + Event( + id=None, + uuid="bash-result-1", + timestamp=now - timedelta(hours=3, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-bash-1", + is_error=True, + ), + ] + storage.add_events_batch(events) + + result = query_error_details(storage, days=7) + + assert result["days"] == 7 + assert result["total_errors"] == 3 + assert "Glob" in result["errors_by_tool"] + assert "Bash" in result["errors_by_tool"] + + # Glob should have aggregated the 2 errors with same pattern + glob_errors = result["errors_by_tool"]["Glob"] + assert len(glob_errors) == 1 + assert glob_errors[0]["param_type"] == "pattern" + assert glob_errors[0]["param_value"] == "*.py" + assert glob_errors[0]["error_count"] == 2 + + # Bash should have 1 error + bash_errors = result["errors_by_tool"]["Bash"] + assert len(bash_errors) == 1 + assert bash_errors[0]["param_type"] == "command" + assert bash_errors[0]["param_value"] == "git" + assert bash_errors[0]["error_count"] == 1 + + def test_tool_filter(self, storage): + """Test filtering errors by specific tool.""" + import json + + now = datetime.now() + + events = [ + # Glob error + Event( + id=None, + uuid="glob-use", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Glob", + tool_id="tool-glob", + tool_input_json=json.dumps({"pattern": "*.rs"}), + ), + Event( + id=None, + uuid="glob-result", + timestamp=now - timedelta(hours=1, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-glob", + is_error=True, + ), + # Bash error + Event( + id=None, + uuid="bash-use", + timestamp=now - timedelta(hours=2), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Bash", + tool_id="tool-bash", + command="make", + ), + Event( + id=None, + uuid="bash-result", + timestamp=now - timedelta(hours=2, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-bash", + is_error=True, + ), + ] + storage.add_events_batch(events) + + # Filter to only Glob errors + result = query_error_details(storage, days=7, tool="Glob") + + assert result["tool_filter"] == "Glob" + assert result["total_errors"] == 1 + assert "Glob" in result["errors_by_tool"] + assert "Bash" not in result["errors_by_tool"] + + def test_limit_parameter(self, storage): + """Test that limit parameter caps errors per tool.""" + import json + + now = datetime.now() + + events = [] + # Create 5 different Glob errors with different patterns + for i in range(5): + events.extend( + [ + Event( + id=None, + uuid=f"glob-use-{i}", + timestamp=now - timedelta(hours=i), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Glob", + tool_id=f"tool-glob-{i}", + tool_input_json=json.dumps({"pattern": f"pattern-{i}"}), + ), + Event( + id=None, + uuid=f"glob-result-{i}", + timestamp=now - timedelta(hours=i, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id=f"tool-glob-{i}", + is_error=True, + ), + ] + ) + storage.add_events_batch(events) + + # Limit to 2 per tool + result = query_error_details(storage, days=7, limit=2) + + # Should only have 2 errors in the details, but total should reflect all + assert len(result["errors_by_tool"]["Glob"]) == 2 + assert result["tool_totals"]["Glob"] == 5 + + def test_file_path_errors(self, storage): + """Test that file operation errors show file_path.""" + now = datetime.now() + + events = [ + Event( + id=None, + uuid="edit-use", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Edit", + tool_id="tool-edit", + file_path="/path/to/missing.py", + ), + Event( + id=None, + uuid="edit-result", + timestamp=now - timedelta(hours=1, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-edit", + is_error=True, + ), + ] + storage.add_events_batch(events) + + result = query_error_details(storage, days=7) + + assert "Edit" in result["errors_by_tool"] + edit_errors = result["errors_by_tool"]["Edit"] + assert len(edit_errors) == 1 + assert edit_errors[0]["param_type"] == "file_path" + assert edit_errors[0]["param_value"] == "/path/to/missing.py" + + def test_grep_pattern_with_search_path(self, storage): + """Test that Grep errors include search_path when available.""" + import json + + now = datetime.now() + + events = [ + Event( + id=None, + uuid="grep-use", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Grep", + tool_id="tool-grep", + tool_input_json=json.dumps({"pattern": "TODO", "path": "/src"}), + ), + Event( + id=None, + uuid="grep-result", + timestamp=now - timedelta(hours=1, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-grep", + is_error=True, + ), + ] + storage.add_events_batch(events) + + result = query_error_details(storage, days=7) + + grep_errors = result["errors_by_tool"]["Grep"] + assert len(grep_errors) == 1 + assert grep_errors[0]["param_type"] == "pattern" + assert grep_errors[0]["param_value"] == "TODO" + assert grep_errors[0]["search_path"] == "/src" + + def test_no_errors(self, storage): + """Test with no errors in the database.""" + result = query_error_details(storage, days=7) + + assert result["total_errors"] == 0 + assert result["errors_by_tool"] == {} + + def test_days_filter(self, storage): + """Test that days filter excludes old errors.""" + import json + + now = datetime.now() + + events = [ + # Recent error (1 hour ago) + Event( + id=None, + uuid="recent-use", + timestamp=now - timedelta(hours=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Glob", + tool_id="tool-recent", + tool_input_json=json.dumps({"pattern": "recent"}), + ), + Event( + id=None, + uuid="recent-result", + timestamp=now - timedelta(hours=1, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-recent", + is_error=True, + ), + # Old error (10 days ago) + Event( + id=None, + uuid="old-use", + timestamp=now - timedelta(days=10), + session_id="s1", + project_path="-test-project", + entry_type="tool_use", + tool_name="Glob", + tool_id="tool-old", + tool_input_json=json.dumps({"pattern": "old"}), + ), + Event( + id=None, + uuid="old-result", + timestamp=now - timedelta(days=10, seconds=1), + session_id="s1", + project_path="-test-project", + entry_type="tool_result", + tool_id="tool-old", + is_error=True, + ), + ] + storage.add_events_batch(events) + + # 7 days should only get recent error + result = query_error_details(storage, days=7) + assert result["total_errors"] == 1 + assert result["errors_by_tool"]["Glob"][0]["param_value"] == "recent" + + # 30 days should get both errors + result_30 = query_error_details(storage, days=30) + assert result_30["total_errors"] == 2 From 86a23bfde14eae164ae03df8e8b9a35d662dd651 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Thu, 8 Jan 2026 21:13:58 +0000 Subject: [PATCH 3/3] feat: Add CLI formatter for error-details command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses reviewer suggestion for human-readable output instead of raw JSON. Output now shows errors grouped by tool with counts and context: Error Details (last 7 days) Total errors: 3403 Bash (2053 errors): 'pwd': 621 errors (genai) 'ls': 329 errors (gemicro) ... Glob (943 errors): '*': 922 errors in /path/to/rust-genai ... 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/cli.py | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py index eb1dbc2..40e54e9 100644 --- a/src/session_analytics/cli.py +++ b/src/session_analytics/cli.py @@ -401,6 +401,44 @@ def _format_failures(data: dict) -> list[str]: return lines +@_register_formatter(lambda d: "errors_by_tool" in d and "tool_totals" in d) +def _format_error_details(data: dict) -> list[str]: + lines = [ + f"Error Details (last {data['days']} days)", + f"Total errors: {data['total_errors']}", + ] + if data.get("tool_filter"): + lines.append(f"Filter: {data['tool_filter']}") + lines.append("") + + errors_by_tool = data.get("errors_by_tool", {}) + tool_totals = data.get("tool_totals", {}) + + if not errors_by_tool: + lines.append("No errors found.") + return lines + + for tool_name in sorted(errors_by_tool.keys(), key=lambda t: -tool_totals.get(t, 0)): + total = tool_totals.get(tool_name, 0) + lines.append(f"{tool_name} ({total} errors):") + for err in errors_by_tool[tool_name][:10]: + param = err.get("param_value") or "(unknown)" + count = err.get("error_count", 0) + suffix = "" + if err.get("search_path"): + suffix = f" in {err['search_path']}" + elif err.get("project"): + # Extract repo name from project path + proj = err["project"] + if proj: + proj = proj.split("-")[-1] if "-" in proj else proj + suffix = f" ({proj})" + lines.append(f" {param!r}: {count} errors{suffix}") + lines.append("") + + return lines + + @_register_formatter(lambda d: "category_distribution" in d and "sessions" in d) def _format_classify_sessions(data: dict) -> list[str]: lines = [