Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 35 additions & 9 deletions src/session_analytics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,18 @@ def _format_sequences(data: dict) -> list[str]:
desc = "Detailed sequences (Bash→commands, Skill→skills, Task→agents)"
else:
desc = "Tool chains showing workflow patterns (Read → Edit, etc.)"
lines = [
desc,
"",
"Sequences:",
]

total = data.get("total_patterns", len(data.get("sequences", [])))
shown = len(data.get("sequences", []))

lines = [desc, ""]

# Show truncation info if results are limited
if total > shown:
lines.append(f"Showing {shown} of {total} total patterns")
lines.append("")

lines.append("Sequences:")
for seq in data.get("sequences", []):
lines.append(f" {seq['pattern']}: {seq['count']}")
return lines
Expand Down Expand Up @@ -622,13 +629,22 @@ def format_metric(name: str, metric: dict) -> str:
def _format_compactions(data: dict) -> list[str]:
# Count unique sessions
unique_sessions = len({c["session_id"] for c in data.get("compactions", [])})
total_count = data.get("total_compaction_count", data["compaction_count"])
shown_count = data["compaction_count"]

lines = [
f"Compaction events (context resets) - last {data.get('days', 7)} days",
"",
f"Total compactions: {data['compaction_count']}",
f"Sessions affected: {unique_sessions}",
"",
]

# Show truncation info if results are limited
if total_count > shown_count:
lines.append(f"Showing {shown_count} of {total_count} total compactions")
else:
lines.append(f"Total compactions: {shown_count}")
lines.append(f"Sessions affected: {unique_sessions}")
lines.append("")

if data.get("compactions"):
lines.append("Recent compactions:")
for c in data["compactions"][:10]:
Expand Down Expand Up @@ -785,10 +801,15 @@ def cmd_sequences(args):
min_count=args.min_count,
expand=args.expand,
)
# Apply limit to match MCP behavior
limit = getattr(args, "limit", 50)
limited_patterns = sequence_patterns[:limit] if limit > 0 else sequence_patterns
result = {
"days": args.days,
"expanded": args.expand,
"sequences": [{"pattern": p.pattern_key, "count": p.count} for p in sequence_patterns],
"limit": limit,
"total_patterns": len(sequence_patterns),
"sequences": [{"pattern": p.pattern_key, "count": p.count} for p in limited_patterns],
}
print(format_output(result, args.json))

Expand Down Expand Up @@ -1141,6 +1162,7 @@ def cmd_compactions(args):
storage,
days=args.days,
session_id=getattr(args, "session_id", None),
limit=getattr(args, "limit", 50),
)
print(format_output(result, args.json))

Expand Down Expand Up @@ -1176,6 +1198,7 @@ def cmd_efficiency(args):
storage,
days=args.days,
project=getattr(args, "project", None),
limit=getattr(args, "limit", 50),
)
print(format_output(result, args.json))

Expand Down Expand Up @@ -1459,6 +1482,7 @@ def main():
action="store_true",
help="Expand Bash→commands, Skill→skills, Task→agents",
)
sub.add_argument("--limit", type=int, default=50, help="Max patterns to return (default: 50)")
sub.set_defaults(func=cmd_sequences)

# permissions
Expand Down Expand Up @@ -1657,6 +1681,7 @@ def main():
sub = subparsers.add_parser("compactions", help="Show compaction events (context resets)")
sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
sub.add_argument("--session-id", help="Filter to specific session ID")
sub.add_argument("--limit", type=int, default=50, help="Max events to return (default: 50)")
sub.set_defaults(func=cmd_compactions)

# pre-compaction
Expand All @@ -1679,6 +1704,7 @@ def main():
sub = subparsers.add_parser("efficiency", help="Show session context efficiency metrics")
sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
sub.add_argument("--project", help="Project path filter")
sub.add_argument("--limit", type=int, default=50, help="Max sessions to return (default: 50)")
sub.set_defaults(func=cmd_efficiency)

# benchmark (Issue #63)
Expand Down
26 changes: 20 additions & 6 deletions src/session_analytics/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ identify permission gaps.

| Tool | Purpose |
|------|---------|
| `get_tool_sequences(days?, min_count?, length?)` | Common tool chains (e.g., Read → Edit → Bash) |
| `get_tool_sequences(days?, min_count?, length?, limit?)` | Common tool chains (e.g., Read → Edit → Bash) |
| `sample_sequences(pattern, limit?, context_events?)` | Random samples of a pattern with surrounding context |
| `get_permission_gaps(days?, min_count?)` | Commands not covered by settings.json (supports glob patterns) |
| `get_insights(days?, refresh?)` | Pre-computed patterns for /improve-workflow |
Expand Down Expand Up @@ -68,11 +68,22 @@ Each session includes `classification_factors` explaining WHY it was categorized
- `trigger`: The threshold that was exceeded (e.g., "error_rate > 15%")
- Relevant metrics (error_rate, edit_rate, etc.)

Each session also includes `efficiency` metrics:
- `compaction_count`: Number of context resets
- `total_result_mb`: Total tool result size
- `files_read_multiple_times`: Indicator of rework
- `burn_rate`: "high", "medium", or "low" based on compactions/hour

### Trend Analysis

| Tool | Purpose |
|------|---------|
| `analyze_trends(days?, compare_to?)` | Token/event trends with growth rates |
| `analyze_trends(days?, compare_to?)` | Token/event trends with efficiency metrics |

Returns both core metrics (`events`, `sessions`, `errors`, `tokens`) and `efficiency` metrics:
- `avg_compactions_per_session`: Context resets per session (lower is better)
- `avg_result_mb_per_session`: Context consumption per session
- `files_read_multiple_times`: Rework indicator

### Session Messages

Expand Down Expand Up @@ -120,10 +131,10 @@ Each session includes `classification_factors` explaining WHY it was categorized

| Tool | Purpose |
|------|---------|
| `get_compaction_events(days?, session_id?)` | List compaction events (context resets) |
| `get_compaction_events(days?, session_id?, limit?)` | List compaction events (context resets) |
| `get_pre_compaction_events(session_id, compaction_timestamp, limit?)` | Events before a compaction for analysis |
| `get_large_tool_results(days?, min_size_kb?, limit?)` | Find tool results consuming context space |
| `get_session_efficiency(days?, project?)` | Session efficiency metrics and burn rate |
| `get_session_efficiency(days?, project?, limit?)` | Session efficiency metrics and burn rate |

**Context efficiency** helps identify why sessions hit context limits:
- **Compactions**: Context resets when Claude summarizes conversation
Expand Down Expand Up @@ -251,8 +262,11 @@ get_permission_gaps(min_count=5)

Add suggestions to `permissions.allow` in your settings.

**Note:** Supports glob pattern matching. Patterns like `Bash(make*)` will correctly
match commands `make`, `make-test`, etc. using fnmatch.
**Notes:**
- Supports glob pattern matching. Patterns like `Bash(make*)` will correctly
match commands `make`, `make-test`, etc. using fnmatch.
- Automatically filters non-actionable commands (shell builtins like `pwd`, `cd`, `echo`,
control flow like `for`, `if`, and info commands like `hostname`, `whoami`) to reduce noise.

### Git Integration

Expand Down
115 changes: 115 additions & 0 deletions src/session_analytics/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,55 @@ def _command_matches_patterns(cmd: str, base_commands: set[str], glob_patterns:
return False


# Commands that don't need allowlisting - shell builtins, context commands,
# and other non-actionable patterns that create noise in permission gap analysis
NON_ACTIONABLE_COMMANDS = frozenset(
{
# Shell builtins / context commands
"pwd",
"cd",
"echo",
"true",
"false",
"exit",
"return",
"export",
"source",
".",
# Comment prefixes (from multi-line commands)
"#",
# Control flow (from multi-line commands)
"for",
"while",
"if",
"then",
"else",
"fi",
"do",
"done",
"case",
"esac",
# System info commands (informational, not dangerous)
"hostname",
"whoami",
"id",
"uname",
"date",
"uptime",
# File viewers (read-only, typically safe)
"bat",
"less",
"more",
# Variable assignments (captured as commands)
"set",
"unset",
"local",
"declare",
"readonly",
}
)


def compute_permission_gaps(
storage: SQLiteStorage,
days: int = 7,
Expand All @@ -712,6 +761,9 @@ def compute_permission_gaps(
Uses fnmatch for glob pattern matching, so patterns like Bash(make*)
will correctly match commands like 'make', 'make-test', etc.

Filters out non-actionable commands (shell builtins, context commands)
that would create noise in the results.

Args:
storage: Storage instance
days: Number of days to analyze
Expand Down Expand Up @@ -741,6 +793,9 @@ def compute_permission_gaps(
patterns = []
for row in rows:
cmd = row["command"]
# Skip non-actionable commands (builtins, context commands)
if cmd in NON_ACTIONABLE_COMMANDS:
continue
if not _command_matches_patterns(cmd, base_commands, glob_patterns):
patterns.append(
Pattern(
Expand Down Expand Up @@ -1179,6 +1234,39 @@ def get_period_metrics(start: datetime, end: datetime) -> dict:
)
tokens = token_usage[0] if token_usage else {"input_tokens": 0, "output_tokens": 0}

# Efficiency metrics: compactions and result bytes
efficiency = storage.execute_query(
"""
SELECT
SUM(CASE WHEN entry_type = 'compaction' THEN 1 ELSE 0 END) as compaction_count,
COALESCE(SUM(result_size_bytes), 0) as total_result_bytes
FROM events
WHERE timestamp >= ? AND timestamp < ?
""",
(start, end),
)
eff = efficiency[0] if efficiency else {"compaction_count": 0, "total_result_bytes": 0}
compaction_count = eff["compaction_count"] or 0
total_result_bytes = eff["total_result_bytes"] or 0

# Files read multiple times (rework indicator)
multi_read = storage.execute_query(
"""
SELECT COUNT(*) as multi_read_files
FROM (
SELECT file_path
FROM events
WHERE timestamp >= ? AND timestamp < ?
AND tool_name = 'Read'
AND file_path IS NOT NULL
GROUP BY session_id, file_path
HAVING COUNT(*) > 1
)
""",
(start, end),
)
files_read_multiple = multi_read[0]["multi_read_files"] if multi_read else 0

return {
"total_events": total_events,
"sessions": sessions,
Expand All @@ -1187,6 +1275,15 @@ def get_period_metrics(start: datetime, end: datetime) -> dict:
"top_tools": top_tools,
"input_tokens": tokens["input_tokens"] or 0,
"output_tokens": tokens["output_tokens"] or 0,
# Efficiency metrics
"compaction_count": compaction_count,
"total_result_bytes": total_result_bytes,
"files_read_multiple_times": files_read_multiple,
# Per-session averages
"avg_compactions_per_session": compaction_count / sessions if sessions > 0 else 0,
"avg_result_mb_per_session": (total_result_bytes / 1024 / 1024) / sessions
if sessions > 0
else 0,
}

def calculate_change(current: float, previous: float) -> dict:
Expand Down Expand Up @@ -1257,5 +1354,23 @@ def calculate_change(current: float, previous: float) -> dict:
current_metrics["output_tokens"], previous_metrics["output_tokens"]
),
},
# Issue #78: Efficiency metrics for workflow improvement tracking
"efficiency": {
"compactions": calculate_change(
current_metrics["compaction_count"], previous_metrics["compaction_count"]
),
"avg_compactions_per_session": calculate_change(
current_metrics["avg_compactions_per_session"],
previous_metrics["avg_compactions_per_session"],
),
"files_read_multiple_times": calculate_change(
current_metrics["files_read_multiple_times"],
previous_metrics["files_read_multiple_times"],
),
"avg_result_mb_per_session": calculate_change(
current_metrics["avg_result_mb_per_session"],
previous_metrics["avg_result_mb_per_session"],
),
},
"tool_changes": tool_changes[:10],
}
Loading