From e48656c275d9798a295450efbcd1cde2e8729c68 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Sun, 11 Jan 2026 21:26:33 +0000 Subject: [PATCH 1/3] feat: Reduce MCP tool token overhead (~72% reduction) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements RFC #84 to reduce MCP tool token overhead: - Remove 7 never-used tools: get_languages, get_command_frequency, ingest_bus_events, get_bus_events, get_pre_compaction_events, analyze_pre_compaction_patterns, correlate_git_with_sessions, ingest_git_history_all_projects - Trim all 30 remaining tool docstrings (21,983 → 6,197 chars) - Consolidate git ingestion: add all_projects param to ingest_git_history with auto-correlation - Update CLI with --all-projects flag, preserve backward compatibility - Add MCP docstring authoring guidance to CLAUDE.md Closes #84 Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 26 ++ src/session_analytics/cli.py | 47 ++- src/session_analytics/guide.md | 41 +-- src/session_analytics/server.py | 576 +++++++------------------------- tests/test_server.py | 73 ++-- 5 files changed, 193 insertions(+), 570 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 8131e12..4a5c623 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -77,3 +77,29 @@ make reinstall # pip install -e . + restart (for pyproject.toml) 5. Documentation in `guide.md` 6. Self-play test: can you reach actionable info using only MCP? 7. Run `make check` + +### MCP Tool Docstrings + +Keep docstrings minimal - `guide.md` is the comprehensive reference. Docstrings add token overhead on every session. + +**Include:** +- First-line description (what it does) +- Brief `Args:` section (name + type + purpose) +- Behavioral notes (defaults, special cases) + +**Omit:** +- `Returns:` sections (structure is self-documenting in JSON) +- Usage examples (use guide.md) +- Tips/references to other docs + +Example: +```python +def get_tool_frequency(...): + """Get tool usage frequency counts. + + Args: + days: Days to analyze (default: 7) + project: Optional project path filter (LIKE match) + expand: Include Bash→command, Skill→name, Task→subagent breakdown + """ +``` diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py index 72fb6ec..43ff6e8 100644 --- a/src/session_analytics/cli.py +++ b/src/session_analytics/cli.py @@ -12,6 +12,9 @@ ingest_git_history_all_projects, ingest_logs, ) + +# Note: correlate_git_with_sessions and ingest_git_history_all_projects are kept +# for CLI backward compatibility, though MCP now consolidates them into ingest_git_history from session_analytics.patterns import ( analyze_failures, analyze_trends, @@ -1138,14 +1141,20 @@ def cmd_trends(args): def cmd_git_ingest(args): - """Ingest git history.""" + """Ingest git history and correlate with sessions.""" storage = SQLiteStorage() - result = ingest_git_history( - storage, - repo_path=args.repo_path, - days=args.days, - project_path=args.project, - ) + if getattr(args, "all_projects", False): + result = ingest_git_history_all_projects(storage, days=args.days) + else: + result = ingest_git_history( + storage, + repo_path=args.repo_path, + days=args.days, + project_path=args.project, + ) + # Auto-correlate commits with sessions (matches MCP behavior) + correlation = correlate_git_with_sessions(storage, days=args.days) + result["correlation"] = correlation print(format_output(result, args.json)) @@ -1357,9 +1366,6 @@ def cmd_benchmark(args): from session_analytics.patterns import ( sample_sequences as patterns_sample_sequences, ) - from session_analytics.queries import ( - analyze_pre_compaction_patterns as queries_analyze_pre_compaction_patterns, - ) from session_analytics.queries import ( classify_sessions as queries_classify_sessions, ) @@ -1384,21 +1390,12 @@ def cmd_benchmark(args): from session_analytics.queries import ( query_agent_activity as queries_query_agent_activity, ) - from session_analytics.queries import ( - query_bus_events as queries_query_bus_events, - ) - from session_analytics.queries import ( - query_commands as queries_query_commands, - ) from session_analytics.queries import ( query_error_details as queries_query_error_details, ) from session_analytics.queries import ( query_file_activity as queries_query_file_activity, ) - from session_analytics.queries import ( - query_languages as queries_query_languages, - ) from session_analytics.queries import ( query_mcp_usage as queries_query_mcp_usage, ) @@ -1424,11 +1421,12 @@ def cmd_benchmark(args): # Define all MCP tools with their default parameters # These call the underlying query functions directly (not the MCP wrappers) # Skip mutating tools (ingest_*) and tools requiring specific IDs + # Note: Removed tools not in MCP (get_command_frequency, get_languages, get_bus_events, + # analyze_pre_compaction_patterns) - CLI still has them for backward compat tool_functions = { "get_status": lambda: storage.get_db_stats(), "get_tool_frequency": lambda: queries_query_tool_frequency(storage, days=7), "get_session_events": lambda: queries_query_timeline(storage, limit=10), - "get_command_frequency": lambda: queries_query_commands(storage, days=7), "list_sessions": lambda: queries_query_sessions(storage, days=7), "get_token_usage": lambda: queries_query_tokens(storage, days=7), "get_tool_sequences": lambda: patterns_compute_sequence_patterns(storage, days=7), @@ -1456,11 +1454,9 @@ def cmd_benchmark(args): "get_session_signals": lambda: patterns_get_session_signals(storage, days=7), "get_session_commits": lambda: storage.get_session_commits(None), "get_file_activity": lambda: queries_query_file_activity(storage, days=7), - "get_languages": lambda: queries_query_languages(storage, days=7), "get_projects": lambda: queries_query_projects(storage, days=7), "get_mcp_usage": lambda: queries_query_mcp_usage(storage, days=7), "get_agent_activity": lambda: queries_query_agent_activity(storage, days=7), - "get_bus_events": lambda: queries_query_bus_events(storage, days=7, limit=10), # Issue #69: Compaction and efficiency tools "get_compaction_events": lambda: queries_get_compaction_events(storage, days=7), "get_compaction_events_agg": lambda: queries_get_compaction_events( @@ -1470,10 +1466,6 @@ def cmd_benchmark(args): storage, days=7, min_size_kb=10, limit=10 ), "get_session_efficiency": lambda: queries_get_session_efficiency(storage, days=7), - # Issue #81: Pre-compaction pattern analysis - "analyze_pre_compaction_patterns": lambda: queries_analyze_pre_compaction_patterns( - storage, days=7 - ), } # Skipped tools (require specific data or modify DB): @@ -1702,10 +1694,11 @@ def main(): sub.set_defaults(func=cmd_trends) # git-ingest - sub = subparsers.add_parser("git-ingest", help="Ingest git commit history") + sub = subparsers.add_parser("git-ingest", help="Ingest git commit history and correlate") sub.add_argument("--repo-path", help="Path to git repository (default: current dir)") sub.add_argument("--days", type=int, default=7, help="Days of history (default: 7)") sub.add_argument("--project", help="Project path to associate commits with") + sub.add_argument("--all-projects", action="store_true", help="Ingest from all known projects") sub.set_defaults(func=cmd_git_ingest) # git-correlate diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md index e6de324..72a26bb 100644 --- a/src/session_analytics/guide.md +++ b/src/session_analytics/guide.md @@ -22,13 +22,11 @@ identify permission gaps. | Tool | Purpose | |------|---------| -| `get_tool_frequency(days?, project?)` | Tool usage counts (Read, Edit, Bash, etc.) | -| `get_command_frequency(days?, prefix?, project?)` | Bash command breakdown | +| `get_tool_frequency(days?, project?, expand?)` | Tool usage counts with Bash/Skill/Task breakdown | | `list_sessions(days?, project?)` | Session metadata and token totals | | `get_token_usage(days?, by?, project?)` | Token usage by day, session, or model | | `get_session_events(days?, tool?, session_id?)` | Recent events with filtering | | `get_file_activity(days?, project?, limit?, collapse_worktrees?)` | File reads/edits/writes breakdown | -| `get_languages(days?, project?)` | Language distribution from file extensions | | `get_projects(days?)` | Activity across all projects | | `get_mcp_usage(days?, project?)` | MCP server and tool usage | @@ -116,9 +114,7 @@ Returns both core metrics (`events`, `sessions`, `errors`, `tokens`) and `effici | Tool | Purpose | |------|---------| -| `ingest_git_history(days?, repo_path?)` | Parse and store git commits from current repo | -| `ingest_git_history_all_projects(days?)` | Parse commits from all known projects | -| `correlate_git_with_sessions(days?)` | Link commits to sessions by timing | +| `ingest_git_history(days?, repo_path?, all_projects?)` | Ingest commits and auto-correlate with sessions | | `get_session_commits(session_id?)` | Get commits associated with a session | ### Session Signals @@ -139,8 +135,6 @@ Returns both core metrics (`events`, `sessions`, `errors`, `tokens`) and `effici | Tool | Purpose | |------|---------| | `get_compaction_events(days?, session_id?, limit?, aggregate?)` | List compaction events (context resets) | -| `get_pre_compaction_events(session_id, compaction_timestamp, limit?)` | Events before a compaction for analysis | -| `analyze_pre_compaction_patterns(days?, events_before?, limit?)` | Aggregated patterns before compactions (RFC #81) | | `get_large_tool_results(days?, min_size_kb?, limit?)` | Find tool results consuming context space | | `get_session_efficiency(days?, project?, limit?)` | Session efficiency metrics and burn rate | @@ -151,21 +145,6 @@ Returns both core metrics (`events`, `sessions`, `errors`, `tokens`) and `effici - **Read/Edit ratio**: High ratio suggests inefficient exploration (should use Task/Explore) - **Files read multiple times**: Redundant reads indicate opportunity to cache context -### Event-Bus Integration - -| Tool | Purpose | -|------|---------| -| `ingest_bus_events(days?)` | Import events from event-bus for cross-session insights | -| `get_bus_events(days?, event_type?, session_id?, repo?, limit?)` | Query event-bus events (gotchas, patterns, help) | - -Cross-session events include: -- `gotcha_discovered` - Non-obvious issues found during work -- `pattern_found` - Useful patterns identified -- `help_needed` / `help_response` - Cross-session coordination -- `task_completed` / `task_started` - Work progress tracking - -These appear in `get_insights()` under `cross_session_activity` when available. - ## Quick Start ### 1. Check status @@ -200,7 +179,6 @@ use the APIs however best fits your needs. ├─────────────────────────────────────────────────────────────────┤ │ get_status() → Is data fresh? How many events? │ │ get_tool_frequency() → What tools are used most? │ -│ get_command_frequency()→ What commands are common? │ ├─────────────────────────────────────────────────────────────────┤ │ DISCOVER PATTERNS │ ├─────────────────────────────────────────────────────────────────┤ @@ -241,10 +219,8 @@ analyze_trends() → "Usage is increasing/decreasing" ``` get_compaction_events() → "When did context resets happen?" get_compaction_events(aggregate=True) → "Which sessions had most compactions?" -analyze_pre_compaction_patterns() → "What patterns precede compactions?" (RFC #81) get_session_efficiency() → "Which sessions burn context fastest?" get_large_tool_results() → "What operations consume the most space?" -get_pre_compaction_events() → "What led up to a specific reset?" ``` ## Reference @@ -280,18 +256,17 @@ Add suggestions to `permissions.allow` in your settings. ### Git Integration -Git correlation requires two steps: +Git ingestion automatically correlates commits with sessions: ``` -# Option 1: Ingest from all known projects (recommended) -ingest_git_history_all_projects(days=30) +# Ingest from all known projects (recommended) +ingest_git_history(all_projects=True, days=30) -# Option 2: Ingest from current repo only +# Or from current repo only ingest_git_history(days=30) -# Then correlate and query -correlate_git_with_sessions() # Link to sessions by timing -get_session_commits(session_id="abc") # View results +# Query results +get_session_commits(session_id="abc") ``` ## Tips diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py index 25816c3..2ffb77e 100644 --- a/src/session_analytics/server.py +++ b/src/session_analytics/server.py @@ -1,20 +1,6 @@ """MCP Session Analytics Server. -Provides tools for querying Claude Code session logs: -- ingest_logs: Refresh data from JSONL files -- list_sessions: Session metadata -- get_session_events: Events for a session/time window -- get_session_messages: User messages across sessions -- get_session_signals: Raw session signals for LLM interpretation -- get_session_commits: Session-commit mappings -- get_tool_frequency: Tool usage counts -- get_command_frequency: Bash command breakdown -- get_tool_sequences: Common tool patterns -- get_token_usage: Token usage analysis -- get_permission_gaps: Commands needing settings.json -- get_insights: Pre-computed patterns for /improve-workflow -- get_status: Ingestion status + DB stats -- search_messages: Full-text search on user messages +Provides tools for querying Claude Code session logs. See guide.md for full API reference. """ import logging @@ -63,11 +49,7 @@ def usage_guide() -> str: @mcp.tool() def get_status() -> dict: - """Get ingestion status and database stats. - - Returns: - Status info including last ingestion time, event count, and DB size - """ + """Get ingestion status and database stats.""" stats = storage.get_db_stats() last_ingest = storage.get_last_ingestion_time() @@ -84,12 +66,9 @@ def ingest_logs(days: int = 7, project: str | None = None, force: bool = False) """Refresh data from JSONL session log files. Args: - days: Number of days to look back (default: 7) - project: Optional project path filter - force: Force re-ingestion even if data is fresh - - Returns: - Ingestion stats (files processed, entries added, etc.) + days: Days to look back (default: 7) + project: Project path filter + force: Force re-ingestion even if fresh """ result = ingest.ingest_logs(storage, days=days, project=project, force=force) return { @@ -103,13 +82,9 @@ def get_tool_frequency(days: int = 7, project: str | None = None, expand: bool = """Get tool usage frequency counts. Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - expand: Include breakdown for Skill (by skill_name), Task (by subagent_type), - and Bash (by command). Default: True - - Returns: - Tool frequency breakdown with optional nested breakdowns + days: Days to analyze (default: 7) + project: Project path filter + expand: Include Skill/Task/Bash breakdowns (default: True) """ queries.ensure_fresh_data(storage, days=days, project=project) result = queries.query_tool_frequency(storage, days=days, project=project, expand=expand) @@ -128,15 +103,12 @@ def get_session_events( """Get events in a time window or for a specific session. Args: - start: Start time (ISO format, default: 24 hours ago) + start: Start time (ISO format, default: 24h ago) end: End time (ISO format, default: now) - tool: Optional tool name filter - project: Optional project path filter - session_id: Optional session ID filter (get full session trace) - limit: Maximum events to return (default: 100) - - Returns: - Timeline of events + tool: Tool name filter + project: Project path filter + session_id: Session ID filter + limit: Max events (default: 100) """ from datetime import datetime @@ -156,35 +128,13 @@ def get_session_events( return {"status": "ok", **result} -@mcp.tool() -def get_command_frequency( - days: int = 7, project: str | None = None, prefix: str | None = None -) -> dict: - """Get Bash command breakdown. - - Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - prefix: Optional command prefix filter (e.g., "git") - - Returns: - Command frequency breakdown - """ - queries.ensure_fresh_data(storage, days=days, project=project) - result = queries.query_commands(storage, days=days, project=project, prefix=prefix) - return {"status": "ok", **result} - - @mcp.tool() def list_sessions(days: int = 7, project: str | None = None) -> dict: """List all sessions with metadata. Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - - Returns: - Session information + days: Days to analyze (default: 7) + project: Project path filter """ queries.ensure_fresh_data(storage, days=days, project=project) result = queries.query_sessions(storage, days=days, project=project) @@ -196,12 +146,9 @@ def get_token_usage(days: int = 7, project: str | None = None, by: str = "day") """Get token usage analysis. Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - by: Grouping: 'day', 'session', or 'model' (default: 'day') - - Returns: - Token usage breakdown + days: Days to analyze (default: 7) + project: Project path filter + by: Grouping: 'day', 'session', or 'model' """ queries.ensure_fresh_data(storage, days=days, project=project) result = queries.query_tokens(storage, days=days, project=project, by=by) @@ -219,14 +166,11 @@ def get_tool_sequences( """Get common tool patterns (sequences). Args: - days: Number of days to analyze (default: 7) - min_count: Minimum occurrences to include (default: 3) + days: Days to analyze (default: 7) + min_count: Min occurrences (default: 3) length: Sequence length (default: 2) - expand: Expand Bash→commands, Skill→skill names, Task→subagent types (default: False) - limit: Maximum patterns to return (default: 50) - - Returns: - Common tool sequences + expand: Expand Bash/Skill/Task to specifics + limit: Max patterns (default: 50) """ queries.ensure_fresh_data(storage, days=days) sequence_patterns = patterns.compute_sequence_patterns( @@ -254,21 +198,14 @@ def sample_sequences( days: int = 7, expand: bool = False, ) -> dict: - """Get random samples of a sequence pattern with surrounding context. - - Instead of just counting "Read → Edit" occurrences, returns actual examples - with context for LLM interpretation of workflow patterns. + """Get random samples of a sequence pattern with context. Args: - pattern: Sequence pattern (e.g., "Read → Edit" or "Read,Edit") - limit: Number of random samples to return (default: 5) - context_events: Number of events before/after to include (default: 2) - days: Number of days to analyze (default: 7) - expand: If True, match expanded tool names (Bash→command, Skill→skill_name, - Task→subagent_type). Use with patterns from get_tool_sequences(expand=True). - - Returns: - Pattern info, total occurrences, and sampled instances with context + pattern: Sequence pattern (e.g., "Read → Edit") + limit: Samples to return (default: 5) + context_events: Events before/after (default: 2) + days: Days to analyze (default: 7) + expand: Match expanded names from get_tool_sequences(expand=True) """ queries.ensure_fresh_data(storage, days=days) return patterns.sample_sequences( @@ -283,14 +220,11 @@ def sample_sequences( @mcp.tool() def get_permission_gaps(days: int = 7, min_count: int = 5) -> dict: - """Find commands that may need to be added to settings.json. + """Find commands to add to settings.json. Args: - days: Number of days to analyze (default: 7) - min_count: Minimum usage count to suggest (default: 5) - - Returns: - Commands that are frequently used but not in allowed list + days: Days to analyze (default: 7) + min_count: Min usage to suggest (default: 5) """ queries.ensure_fresh_data(storage, days=days) gap_patterns = patterns.compute_permission_gaps(storage, days=days, threshold=min_count) @@ -320,20 +254,13 @@ def get_session_messages( ) -> dict: """Get messages chronologically across sessions. - Shows how the user moved across sessions and projects over time, - revealing task switching, project interleaving, and work patterns. - Includes both user messages and assistant responses for conversation replay. - Args: - days: Number of days to look back (default: 1, supports fractions like 0.5 for 12h) - include_projects: Include project info in output (default: True) - session_id: Optional session ID filter (get messages from specific session) - limit: Maximum messages to return (default: 100) - entry_types: Which entry types to include (default: ["user", "assistant"]) - max_message_length: Truncate messages to this length (default: 500, 0=no limit) - - Returns: - Journey events with timestamps, sessions, and messages + days: Days to look back (default: 1, supports 0.5 for 12h) + include_projects: Include project info (default: True) + session_id: Session ID filter + limit: Max messages (default: 100) + entry_types: Types to include (default: ["user", "assistant"]) + max_message_length: Truncate length (default: 500, 0=no limit) """ hours = int(days * 24) queries.ensure_fresh_data(storage, days=max(1, int(days) + 1)) @@ -356,24 +283,13 @@ def search_messages( project: str | None = None, entry_types: list[str] | None = None, ) -> dict: - """Search messages using full-text search. - - Uses FTS5 to efficiently search across all message types (user, assistant, - tool_result, summary). Useful for finding discussions about specific topics, - decisions, or patterns across sessions. + """Search messages using FTS5 full-text search. Args: - query: FTS5 query string. Supports: - - Simple terms: "authentication" - - Phrases: '"fix the bug"' - - Boolean: "auth AND error", "skip OR defer" - - Prefix: "implement*" - limit: Maximum results to return (default: 50) - project: Optional project path filter - entry_types: Optional list of entry types to filter (e.g., ["user", "assistant"]) - - Returns: - Matching messages with session context and timestamps + query: FTS5 query (terms, "phrases", AND/OR, prefix*) + limit: Max results (default: 50) + project: Project path filter + entry_types: Types to filter (e.g., ["user", "assistant"]) """ queries.ensure_fresh_data(storage) try: @@ -408,17 +324,11 @@ def search_messages( @mcp.tool() def detect_parallel_sessions(days: float = 1, min_overlap_minutes: int = 5) -> dict: - """Find sessions that were active simultaneously. - - Identifies when multiple sessions were active at the same time, - indicating worktree usage, waiting on CI, or multi-task work. + """Find sessions active simultaneously. Args: - days: Number of days to look back (default: 1, supports fractions like 0.5 for 12h) - min_overlap_minutes: Minimum overlap to consider parallel (default: 5) - - Returns: - Parallel session periods with timing and session details + days: Days to look back (default: 1) + min_overlap_minutes: Min overlap (default: 5) """ hours = int(days * 24) queries.ensure_fresh_data(storage, days=max(1, int(days) + 1)) @@ -434,16 +344,11 @@ def find_related_sessions( ) -> dict: """Find sessions related to a given session. - Identifies sessions that share common files, commands, or temporal proximity. - Args: - session_id: The session ID to find related sessions for - method: How to find related: 'files', 'commands', or 'temporal' (default: 'files') - days: Number of days to search (default: 7) - limit: Maximum related sessions to return (default: 10) - - Returns: - Related sessions with their connection details + session_id: Session to find related sessions for + method: 'files', 'commands', or 'temporal' (default: 'files') + days: Days to search (default: 7) + limit: Max related sessions (default: 10) """ queries.ensure_fresh_data(storage, days=days) result = queries.find_related_sessions( @@ -456,16 +361,10 @@ def find_related_sessions( def get_insights(refresh: bool = False, days: int = 7, include_advanced: bool = True) -> dict: """Get pre-computed patterns for /improve-workflow. - Includes traditional pattern analysis plus advanced analytics from RFC #17: - trends, failure analysis, and session classification summaries. - Args: - refresh: Force recomputation of patterns (default: False) - days: Number of days to analyze if refreshing (default: 7) - include_advanced: Include trends, failures, classification (default: True) - - Returns: - Insights organized by type with optional advanced analytics + refresh: Force recomputation (default: False) + days: Days to analyze (default: 7) + include_advanced: Include trends/failures/classification (default: True) """ queries.ensure_fresh_data(storage, days=days) result = patterns.get_insights( @@ -478,15 +377,9 @@ def get_insights(refresh: bool = False, days: int = 7, include_advanced: bool = def analyze_failures(days: int = 7, rework_window_minutes: int = 10) -> dict: """Analyze failure patterns and recovery behavior. - Identifies tool errors, rework patterns (same file edited multiple times), - and error clustering by tool/command. - Args: - days: Number of days to analyze (default: 7) - rework_window_minutes: Time window for detecting rework (default: 10) - - Returns: - Failure analysis including error counts, rework patterns, and recovery times + days: Days to analyze (default: 7) + rework_window_minutes: Rework detection window (default: 10) """ queries.ensure_fresh_data(storage, days=days) result = patterns.analyze_failures( @@ -497,18 +390,12 @@ def analyze_failures(days: int = 7, rework_window_minutes: int = 10) -> dict: @mcp.tool() def get_error_details(days: int = 7, tool: str | None = None, limit: int = 50) -> dict: - """Get detailed error information including tool parameters that caused failures. - - Shows which specific patterns (Glob/Grep), commands (Bash), or files caused errors. - Use this to drill down from analyze_failures() counts to actionable specifics. + """Get error details with failing parameters. Drill down from analyze_failures(). Args: - days: Number of days to analyze (default: 7) - tool: Optional filter by tool name (e.g., "Glob", "Bash", "Edit") - limit: Maximum errors to return per tool (default: 50) - - Returns: - Error details grouped by tool with the failing parameter (pattern/command/file) + days: Days to analyze (default: 7) + tool: Filter by tool (e.g., "Glob", "Bash", "Edit") + limit: Max errors per tool (default: 50) """ queries.ensure_fresh_data(storage, days=days) result = queries.query_error_details(storage, days=days, tool=tool, limit=limit) @@ -517,17 +404,11 @@ def get_error_details(days: int = 7, tool: str | None = None, limit: int = 50) - @mcp.tool() def classify_sessions(days: int = 7, project: str | None = None) -> dict: - """Classify sessions based on their dominant activity patterns. - - Categories include: debugging (high error rate), development (edit-heavy), - research (read/search heavy), maintenance (CI/git heavy), mixed. + """Classify sessions by activity pattern (debugging/development/research/maintenance/mixed). Args: - days: Number of days to analyze (default: 7) - project: Optional project filter - - Returns: - Session classifications with category distribution + days: Days to analyze (default: 7) + project: Project filter """ queries.ensure_fresh_data(storage, days=days) result = queries.classify_sessions(storage, days=days, project=project) @@ -536,18 +417,12 @@ def classify_sessions(days: int = 7, project: str | None = None) -> dict: @mcp.tool() def get_handoff_context(session_id: str | None = None, days: float = 0.17, limit: int = 10) -> dict: - """Get context for session handoff (useful for /status-report). - - Provides recent activity summary including last user messages, - files modified, commands run, and session duration/activity stats. + """Get context for session handoff (messages, files, commands). Args: - session_id: Specific session ID (default: most recent session) - days: Days to look back if no session specified (default: 0.17 = ~4 hours) - limit: Maximum messages to return (default: 10) - - Returns: - Handoff context including messages, files, commands, and activity summary + session_id: Session ID (default: most recent) + days: Days to look back (default: 0.17 = ~4h) + limit: Max messages (default: 10) """ hours = int(days * 24) queries.ensure_fresh_data(storage, days=max(1, int(days) + 1)) @@ -559,16 +434,11 @@ def get_handoff_context(session_id: str | None = None, days: float = 0.17, limit @mcp.tool() def analyze_trends(days: int = 7, compare_to: str = "previous") -> dict: - """Analyze trends by comparing current period to previous period. - - Compares metrics between two time periods to identify changes in usage patterns. + """Analyze trends by comparing current period to previous. Args: - days: Length of current period in days (default: 7) - compare_to: 'previous' (same length before current) or 'same_last_month' (default: previous) - - Returns: - Trend analysis including percentage changes and direction for events, sessions, errors, tokens + days: Current period length (default: 7) + compare_to: 'previous' or 'same_last_month' """ queries.ensure_fresh_data(storage, days=days * 2) result = patterns.analyze_trends(storage, days=days, compare_to=compare_to) @@ -577,80 +447,40 @@ def analyze_trends(days: int = 7, compare_to: str = "previous") -> dict: @mcp.tool() def ingest_git_history( - repo_path: str | None = None, days: int = 7, project_path: str | None = None + repo_path: str | None = None, + days: int = 7, + project_path: str | None = None, + all_projects: bool = False, ) -> dict: - """Ingest git commit history from a repository. - - Parses git log and stores commits for correlation with session activity. - - Args: - repo_path: Path to git repository (default: current directory) - days: Number of days of history to ingest (default: 7) - project_path: Optional project path to associate commits with - - Returns: - Ingestion stats including commits found and added - """ - result = ingest.ingest_git_history( - storage, repo_path=repo_path, days=days, project_path=project_path - ) - return {"status": "ok", **result} - - -@mcp.tool() -def correlate_git_with_sessions(days: int = 7) -> dict: - """Correlate git commits with session activity. - - Associates commits with sessions based on timing. + """Ingest git commit history and correlate with sessions. Args: - days: Number of days to correlate (default: 7) - - Returns: - Correlation stats including commits correlated + repo_path: Git repo path (default: cwd). Ignored if all_projects=True. + days: Days of history (default: 7) + project_path: Project path to associate commits with + all_projects: Ingest from all known projects (default: False) """ - result = ingest.correlate_git_with_sessions(storage, days=days) - return {"status": "ok", **result} - - -@mcp.tool() -def ingest_git_history_all_projects(days: int = 7) -> dict: - """Ingest git commit history from all known projects. - - Scans unique project paths from the events table, decodes them to filesystem - paths, and runs git ingestion on each that has a .git directory. + if all_projects: + result = ingest.ingest_git_history_all_projects(storage, days=days) + else: + result = ingest.ingest_git_history( + storage, repo_path=repo_path, days=days, project_path=project_path + ) - This is more comprehensive than ingest_git_history() which only processes - the current directory. + # Auto-correlate commits with sessions + correlation = ingest.correlate_git_with_sessions(storage, days=days) + result["correlation"] = correlation - Args: - days: Number of days of history to ingest (default: 7) - - Returns: - Aggregate stats across all projects including total commits added - """ - result = ingest.ingest_git_history_all_projects(storage, days=days) return {"status": "ok", **result} @mcp.tool() def get_session_signals(days: int = 7, min_count: int = 1) -> dict: - """Get raw session signals for LLM interpretation. - - RFC #26 (revised per RFC #17 principle): Extracts observable session data - without interpretation. Per RFC #17: "Don't over-distill - raw data with - light structure beats heavily processed summaries. The LLM can handle context." - - Returns raw signals like event counts, error rates, commit counts, and - boolean flags (has_rework, has_pr_activity). The consuming LLM should - interpret these to determine outcomes like success or abandonment. + """Get raw session signals (event counts, error rates, flags) for LLM interpretation. Args: - days: Number of days to analyze (default: 7) - min_count: Minimum events for a session to be included (default: 1) - - Returns: - Raw session signals for LLM interpretation + days: Days to analyze (default: 7) + min_count: Min events to include session (default: 1) """ queries.ensure_fresh_data(storage, days=days) result = patterns.get_session_signals(storage, days=days, min_count=min_count) @@ -661,16 +491,9 @@ def get_session_signals(days: int = 7, min_count: int = 1) -> dict: def get_session_commits(session_id: str | None = None, days: int = 7) -> dict: """Get commits associated with sessions. - RFC #26: Returns commits linked to sessions with timing metadata: - - time_to_commit_seconds: Time from session start to commit - - is_first_commit: Whether this was the first commit in the session - Args: - session_id: Specific session ID (optional, returns all if not specified) - days: Number of days to look back (default: 7) - - Returns: - Session-commit mappings with timing metadata + session_id: Session ID (optional, returns all if not specified) + days: Days to look back (default: 7) """ queries.ensure_fresh_data(storage, days=days) @@ -701,16 +524,13 @@ def get_file_activity( limit: int = 20, collapse_worktrees: bool = False, ) -> dict: - """Get file activity (reads, edits, writes) with breakdown. + """Get file activity (reads, edits, writes) breakdown. Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - limit: Maximum files to return (default: 20) - collapse_worktrees: If True, consolidate .worktrees// paths - - Returns: - File activity data with read/edit/write breakdown per file + days: Days to analyze (default: 7) + project: Project path filter + limit: Max files (default: 20) + collapse_worktrees: Consolidate .worktrees/ paths """ queries.ensure_fresh_data(storage, days=days, project=project) result = queries.query_file_activity( @@ -723,33 +543,12 @@ def get_file_activity( return {"status": "ok", **result} -@mcp.tool() -def get_languages(days: int = 7, project: str | None = None) -> dict: - """Get language distribution from file extensions. - - Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - - Returns: - Language distribution with counts and percentages - """ - queries.ensure_fresh_data(storage, days=days, project=project) - result = queries.query_languages(storage, days=days, project=project) - return {"status": "ok", **result} - - @mcp.tool() def get_projects(days: int = 7) -> dict: - """Get activity breakdown by project. - - Note: No project filter - this shows activity *across* all projects. + """Get activity breakdown across all projects. Args: - days: Number of days to analyze (default: 7) - - Returns: - Project activity data with event counts and session counts per project + days: Days to analyze (default: 7) """ queries.ensure_fresh_data(storage, days=days) result = queries.query_projects(storage, days=days) @@ -761,11 +560,8 @@ def get_mcp_usage(days: int = 7, project: str | None = None) -> dict: """Get MCP server and tool usage breakdown. Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - - Returns: - MCP usage grouped by server with tool breakdown + days: Days to analyze (default: 7) + project: Project path filter """ queries.ensure_fresh_data(storage, days=days, project=project) result = queries.query_mcp_usage(storage, days=days, project=project) @@ -774,79 +570,17 @@ def get_mcp_usage(days: int = 7, project: str | None = None) -> dict: @mcp.tool() def get_agent_activity(days: int = 7, project: str | None = None) -> dict: - """Get activity breakdown by Task subagent. - - RFC #41: Tracks agent activity from Task tool invocations, - distinguishing work done by agents vs main session. + """Get activity breakdown by Task subagent vs main session. Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - - Returns: - Dict with agent activity breakdown including: - - Main session stats (agent_id IS NULL) - - Per-agent stats with token usage and top tools - - Summary with agent vs main session token percentage + days: Days to analyze (default: 7) + project: Project path filter """ queries.ensure_fresh_data(storage, days=days, project=project) result = queries.query_agent_activity(storage, days=days, project=project) return {"status": "ok", **result} -@mcp.tool() -def ingest_bus_events(days: int = 7) -> dict: - """Ingest events from event-bus for cross-session insights. - - Reads from ~/.claude/contrib/event-bus/data.db and stores - events for correlation with session activity. - - Args: - days: Number of days to ingest on first run (default: 7) - - Returns: - Ingestion statistics including events_ingested count - """ - from session_analytics.bus_ingest import ingest_bus_events as do_ingest - - result = do_ingest(storage, days=days) - return {"status": "ok", **result} - - -@mcp.tool() -def get_bus_events( - days: int = 7, - event_type: str | None = None, - session_id: str | None = None, - repo: str | None = None, - limit: int = 100, -) -> dict: - """Get event-bus events with optional filters. - - Returns raw events from the event-bus for cross-session insights. - Events include gotcha_discovered, pattern_found, help_needed, etc. - - Args: - days: Number of days to analyze (default: 7) - event_type: Filter by event type (e.g., 'gotcha_discovered') - session_id: Filter by session ID - repo: Filter by repo name - limit: Maximum events to return (default: 100) - - Returns: - Event-bus events with breakdown by type - """ - result = queries.query_bus_events( - storage, - days=days, - event_type=event_type, - session_id=session_id, - repo=repo, - limit=limit, - ) - return {"status": "ok", **result} - - # Issue #69: Compaction detection and context efficiency tools @@ -857,20 +591,13 @@ def get_compaction_events( limit: int = 50, aggregate: bool = False, ) -> dict: - """List compaction events (context resets) across sessions. - - Compactions occur when Claude's context window fills and is summarized. - This helps identify sessions that hit context limits. + """List compaction events (context resets). Args: - days: Number of days to analyze (default: 7) - session_id: Filter to specific session - limit: Maximum events to return (default: 50) - aggregate: If True, group by session with counts instead of individual events - - Returns: - List of compaction events with timestamps and session info - (or session aggregates if aggregate=True) + days: Days to analyze (default: 7) + session_id: Filter to session + limit: Max events (default: 50) + aggregate: Group by session with counts """ queries.ensure_fresh_data(storage, days=days) result = queries.get_compaction_events( @@ -879,82 +606,18 @@ def get_compaction_events( return {"status": "ok", **result} -@mcp.tool() -def get_pre_compaction_events( - session_id: str, - compaction_timestamp: str, - limit: int = 50, -) -> dict: - """Get events that occurred before a compaction event. - - Use this to understand what was happening in the session - leading up to a context reset. - - Args: - session_id: The session to analyze - compaction_timestamp: ISO timestamp of the compaction event - limit: Maximum events to return (default: 50) - - Returns: - Events before the compaction, ordered by timestamp descending (most recent first) - """ - queries.ensure_fresh_data(storage, days=7) - result = queries.get_pre_compaction_events( - storage, - session_id=session_id, - compaction_timestamp=compaction_timestamp, - limit=limit, - ) - return {"status": "ok", **result} - - -@mcp.tool() -def analyze_pre_compaction_patterns( - days: int = 7, - events_before: int = 50, - limit: int = 20, -) -> dict: - """Analyze patterns in events leading up to compactions. - - RFC #81: Identifies antipatterns that accelerate context exhaustion: - - Consecutive reads without edits (exploration without action) - - Files read multiple times before compaction - - Large tool results that bloated context - - Tool distribution before compaction - - Args: - days: Number of days to analyze (default: 7) - events_before: Events to analyze before each compaction (default: 50) - limit: Max compactions to analyze (default: 20) - - Returns: - Dict with aggregated patterns and recommendations - """ - queries.ensure_fresh_data(storage, days=days) - result = queries.analyze_pre_compaction_patterns( - storage, days=days, events_before=events_before, limit=limit - ) - return {"status": "ok", **result} - - @mcp.tool() def get_large_tool_results( days: int = 7, min_size_kb: int = 10, limit: int = 50, ) -> dict: - """Find tool results that consumed significant context space. - - Helps identify bloat patterns - large file reads, verbose command - outputs, or other operations that accelerate context exhaustion. + """Find tool results consuming significant context space. Args: - days: Number of days to analyze (default: 7) - min_size_kb: Minimum result size in KB to include (default: 10) - limit: Maximum results to return (default: 50) - - Returns: - Large tool results with size, tool name, and parameters + days: Days to analyze (default: 7) + min_size_kb: Min size in KB (default: 10) + limit: Max results (default: 50) """ queries.ensure_fresh_data(storage, days=days) result = queries.get_large_tool_results( @@ -971,19 +634,10 @@ def get_session_efficiency( ) -> dict: """Analyze context efficiency and burn rate across sessions. - Calculates metrics like: - - Total context bytes consumed per session - - Average result size - - Compaction count (context resets) - - Efficiency ratio (output/input bytes) - Args: - days: Number of days to analyze (default: 7) - project: Optional project path filter - limit: Maximum sessions to return (default: 50) - - Returns: - Session efficiency metrics sorted by total bytes consumed + days: Days to analyze (default: 7) + project: Project path filter + limit: Max sessions (default: 50) """ queries.ensure_fresh_data(storage, days=days) result = queries.get_session_efficiency(storage, days=days, project=project, limit=limit) diff --git a/tests/test_server.py b/tests/test_server.py index d0880cc..2ac0017 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -2,18 +2,16 @@ from session_analytics.server import ( analyze_failures, - analyze_pre_compaction_patterns, analyze_trends, classify_sessions, - correlate_git_with_sessions, detect_parallel_sessions, find_related_sessions, - get_command_frequency, get_compaction_events, + get_error_details, get_file_activity, get_handoff_context, get_insights, - get_languages, + get_large_tool_results, get_mcp_usage, get_permission_gaps, get_projects, @@ -73,16 +71,6 @@ def test_get_session_events(): assert isinstance(result["events"], list) -def test_get_command_frequency(): - """Test that get_command_frequency returns command counts.""" - result = get_command_frequency.fn(days=7) - assert result["status"] == "ok" - assert "days" in result - assert "total_commands" in result - assert "commands" in result - assert isinstance(result["commands"], list) - - def test_list_sessions(): """Test that list_sessions returns session info.""" result = list_sessions.fn(days=7) @@ -233,19 +221,14 @@ def test_analyze_trends(): def test_ingest_git_history(): - """Test that ingest_git_history ingests git commits.""" + """Test that ingest_git_history ingests git commits and auto-correlates.""" result = ingest_git_history.fn(repo_path=None, days=7) assert result["status"] == "ok" assert "commits_found" in result assert "commits_added" in result - - -def test_correlate_git_with_sessions(): - """Test that correlate_git_with_sessions links commits to sessions.""" - result = correlate_git_with_sessions.fn(days=7) - assert result["status"] == "ok" - assert "days" in result - assert "commits_correlated" in result + # Verify auto-correlation is included + assert "correlation" in result + assert "commits_correlated" in result["correlation"] def test_get_session_signals(): @@ -279,16 +262,6 @@ def test_get_file_activity(): assert isinstance(result["files"], list) -def test_get_languages(): - """Test that get_languages returns language distribution.""" - result = get_languages.fn(days=7) - assert result["status"] == "ok" - assert "days" in result - assert "total_operations" in result - assert "languages" in result - assert isinstance(result["languages"], list) - - def test_get_projects(): """Test that get_projects returns project activity.""" result = get_projects.fn(days=7) @@ -405,19 +378,21 @@ def test_get_compaction_events_aggregate(): assert "total_summary_kb" in session -def test_analyze_pre_compaction_patterns(): - """Test that analyze_pre_compaction_patterns returns pattern data.""" - result = analyze_pre_compaction_patterns.fn(days=7, events_before=50, limit=20) - assert result["status"] == "ok" - assert "compactions_analyzed" in result - assert "patterns" in result - assert "recommendations" in result - assert isinstance(result["recommendations"], list) - # If patterns exist, verify structure - if result.get("compactions_analyzed", 0) > 0: - patterns = result["patterns"] - assert "avg_consecutive_reads" in patterns - assert "avg_files_read_multiple_times" in patterns - assert "avg_large_results" in patterns - assert "tool_distribution" in patterns - assert isinstance(patterns["tool_distribution"], list) +def test_get_error_details(): + """Test that get_error_details returns detailed error information.""" + result = get_error_details.fn(days=7, limit=50) + assert result["status"] == "ok" + assert "days" in result + assert "total_errors" in result + assert "errors_by_tool" in result + assert isinstance(result["errors_by_tool"], dict) + + +def test_get_large_tool_results(): + """Test that get_large_tool_results returns large result information.""" + result = get_large_tool_results.fn(days=7, min_size_kb=10, limit=50) + assert result["status"] == "ok" + assert "days" in result + assert "min_size_kb" in result + assert "large_results" in result + assert isinstance(result["large_results"], list) From 044f6c749f456b3b98a8478a18d8756eea9a7d1d Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Sun, 11 Jan 2026 21:26:49 +0000 Subject: [PATCH 2/3] docs: Clarify guide.md as canonical reference for MCP tools Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 4a5c623..b305b7d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -80,7 +80,7 @@ make reinstall # pip install -e . + restart (for pyproject.toml) ### MCP Tool Docstrings -Keep docstrings minimal - `guide.md` is the comprehensive reference. Docstrings add token overhead on every session. +Keep docstrings minimal - `guide.md` is the **canonical reference** and should contain verbose explanations, usage examples, and tips. Docstrings add token overhead on every session, so they should only provide quick context. **Include:** - First-line description (what it does) From ee8ecab01d0c1998e91f02d2bed9aaf27d2766df Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Sun, 11 Jan 2026 22:49:06 +0000 Subject: [PATCH 3/3] docs: Simplify ingest_git_history signature in guide.md Show only commonly-used parameters (days, all_projects) in table. Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md index 72a26bb..a1492ae 100644 --- a/src/session_analytics/guide.md +++ b/src/session_analytics/guide.md @@ -114,7 +114,7 @@ Returns both core metrics (`events`, `sessions`, `errors`, `tokens`) and `effici | Tool | Purpose | |------|---------| -| `ingest_git_history(days?, repo_path?, all_projects?)` | Ingest commits and auto-correlate with sessions | +| `ingest_git_history(days?, all_projects?)` | Ingest commits and auto-correlate with sessions | | `get_session_commits(session_id?)` | Get commits associated with a session | ### Session Signals