Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ Queryable analytics for Claude Code session logs, exposed as an MCP server and C
cp ~/.claude/contrib/analytics/data.db ~/.claude/contrib/analytics/data.db.backup-$(date +%Y%m%d-%H%M%S)
```

### When adding new columns:
1. **Always backup first** (see above)
2. **Backfill existing data** when possible - new columns should be populated for historical records, not just future ingestion
3. For backfill: either re-ingest from JSONL (`ingest_logs(force=True)` after clearing) or write UPDATE queries in the migration

---

## Design Philosophy
Expand Down
6 changes: 6 additions & 0 deletions docs/SCHEMA.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ CREATE TABLE events (
is_sidechain INTEGER DEFAULT 0,
version TEXT, -- Claude Code version

-- Context efficiency (Issue #69)
result_size_bytes INTEGER, -- Size of message_text in bytes for context analysis

UNIQUE(session_id, uuid) -- UUID unique within each session
)
```
Expand All @@ -89,6 +92,8 @@ CREATE TABLE events (
- Token columns only populated on `entry_type='assistant'` to avoid double-counting
- `message_text` enables FTS via `events_fts` virtual table for all entry types
- `tool_input_json` preserves full parameters for drill-down queries
- `entry_type='compaction'` marks context resets (detected from summary text containing "continued from a previous conversation")
- `result_size_bytes` enables context burn rate analysis

### sessions

Expand Down Expand Up @@ -253,6 +258,7 @@ Sync triggers maintain index consistency:
| 6 | add_event_bus_integration | bus_events table |
| 7 | add_tool_id_index | Performance index for self-joins |
| 8 | add_unified_message_text | Unified message_text column, rebuilt FTS on all entry types (Issue #68) |
| 9 | add_result_size_bytes | result_size_bytes column for context efficiency tracking (Issue #69) |

---

Expand Down
190 changes: 190 additions & 0 deletions src/session_analytics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
classify_sessions,
detect_parallel_sessions,
find_related_sessions,
get_compaction_events,
get_handoff_context,
get_large_tool_results,
get_pre_compaction_events,
get_session_efficiency,
get_user_journey,
query_agent_activity,
query_bus_events,
Expand Down Expand Up @@ -610,6 +614,98 @@ def format_metric(name: str, metric: dict) -> str:
return lines


# Issue #69: Compaction and efficiency formatters


@_register_formatter(lambda d: "compaction_count" in d and "compactions" in d)
def _format_compactions(data: dict) -> list[str]:
# Count unique sessions
unique_sessions = len({c["session_id"] for c in data.get("compactions", [])})
lines = [
f"Compaction events (context resets) - last {data.get('days', 7)} days",
"",
f"Total compactions: {data['compaction_count']}",
f"Sessions affected: {unique_sessions}",
"",
]
if data.get("compactions"):
lines.append("Recent compactions:")
for c in data["compactions"][:10]:
lines.append(f" {c['timestamp']} - session {c['session_id'][:8]}...")
return lines


@_register_formatter(lambda d: "compaction_timestamp" in d and "events" in d and "event_count" in d)
def _format_pre_compaction(data: dict) -> list[str]:
lines = [
f"Events before compaction at {data['compaction_timestamp']}",
f"Session: {data['session_id']}",
"",
f"Events found: {data['event_count']}",
"",
]
if data.get("events"):
lines.append("Events (most recent first):")
for e in data["events"]:
tool = e.get("tool") or e.get("type", "unknown")
size_info = ""
if e.get("size_bytes"):
size_kb = e["size_bytes"] / 1024
size_info = f" ({size_kb:.1f}KB)"
identifier = e.get("file") or e.get("command") or ""
if identifier:
identifier = f" - {identifier[:40]}"
error_mark = " [ERR]" if e.get("error") else ""
lines.append(f" {e['timestamp']} {tool}{size_info}{identifier}{error_mark}")
return lines


@_register_formatter(lambda d: "large_results" in d and "result_count" in d)
def _format_large_results(data: dict) -> list[str]:
# Calculate total from tool_breakdown
total_mb = sum(t.get("total_mb", 0) for t in data.get("tool_breakdown", []))
lines = [
f"Large tool results (>= {data.get('min_size_kb', 10)}KB) - last {data.get('days', 7)} days",
"",
f"Total large results: {data['result_count']}",
f"Total size: {total_mb:.2f}MB",
"",
]
if data.get("large_results"):
lines.append("Top results by size:")
for r in data["large_results"][:10]:
identifier = r.get("file") or r.get("command") or "N/A"
lines.append(f" {r['tool']}: {r['size_kb']:.1f}KB - {identifier[:50]}")
return lines


@_register_formatter(
lambda d: "sessions" in d
and "session_count" in d
and any("efficiency_signals" in s for s in d.get("sessions", []))
)
def _format_efficiency(data: dict) -> list[str]:
lines = [
f"Session efficiency - last {data.get('days', 7)} days",
"",
f"Sessions analyzed: {data.get('session_count', 0)}",
"",
"Sessions by context usage:",
]
for s in data.get("sessions", [])[:10]:
signals = s.get("efficiency_signals", {})
total_mb = signals.get("total_result_mb", 0)
compactions = signals.get("compaction_count", 0)
burn_rate = signals.get("burn_rate_tokens_per_event", 0)
read_edit = signals.get("read_to_edit_ratio", 0)
multi_read = signals.get("files_read_multiple_times", 0)
lines.append(
f" {s['session_id'][:8]}...: {total_mb:.2f}MB, {compactions} compactions, "
f"{burn_rate:.0f} tok/ev, R/E:{read_edit:.1f}, multi-read:{multi_read}"
)
return lines


def format_output(data: dict, json_output: bool = False) -> str:
"""Format output as JSON or human-readable."""
if json_output:
Expand Down Expand Up @@ -1024,6 +1120,55 @@ def cmd_session_commits(args):
print(format_output(result, args.json))


# Issue #69: Compaction and efficiency commands


def cmd_compactions(args):
"""Show compaction events (context resets)."""
storage = SQLiteStorage()
result = get_compaction_events(
storage,
days=args.days,
session_id=getattr(args, "session_id", None),
)
print(format_output(result, args.json))


def cmd_pre_compaction(args):
"""Show events before a compaction event."""
storage = SQLiteStorage()
result = get_pre_compaction_events(
storage,
session_id=args.session_id,
compaction_timestamp=args.timestamp,
limit=args.limit,
)
print(format_output(result, args.json))


def cmd_large_results(args):
"""Show large tool results that consume context space."""
storage = SQLiteStorage()
result = get_large_tool_results(
storage,
days=args.days,
min_size_kb=args.min_size,
limit=args.limit,
)
print(format_output(result, args.json))


def cmd_efficiency(args):
"""Show session context efficiency metrics."""
storage = SQLiteStorage()
result = get_session_efficiency(
storage,
days=args.days,
project=getattr(args, "project", None),
)
print(format_output(result, args.json))


def _benchmark_tool(tool_name: str, tool_func: callable, iterations: int = 3) -> dict:
"""Benchmark a single MCP tool with multiple iterations.

Expand Down Expand Up @@ -1097,9 +1242,18 @@ def cmd_benchmark(args):
from session_analytics.queries import (
detect_parallel_sessions as queries_detect_parallel_sessions,
)
from session_analytics.queries import (
get_compaction_events as queries_get_compaction_events,
)
from session_analytics.queries import (
get_handoff_context as queries_get_handoff_context,
)
from session_analytics.queries import (
get_large_tool_results as queries_get_large_tool_results,
)
from session_analytics.queries import (
get_session_efficiency as queries_get_session_efficiency,
)
from session_analytics.queries import (
get_user_journey as queries_get_user_journey,
)
Expand Down Expand Up @@ -1183,6 +1337,12 @@ def cmd_benchmark(args):
"get_mcp_usage": lambda: queries_query_mcp_usage(storage, days=7),
"get_agent_activity": lambda: queries_query_agent_activity(storage, days=7),
"get_bus_events": lambda: queries_query_bus_events(storage, days=7, limit=10),
# Issue #69: Compaction and efficiency tools
"get_compaction_events": lambda: queries_get_compaction_events(storage, days=7),
"get_large_tool_results": lambda: queries_get_large_tool_results(
storage, days=7, min_size_kb=10, limit=10
),
"get_session_efficiency": lambda: queries_get_session_efficiency(storage, days=7),
}

# Skipped tools (require specific data or modify DB):
Expand Down Expand Up @@ -1474,6 +1634,36 @@ def main():
sub.add_argument("--limit", type=int, default=100, help="Max events to return (default: 100)")
sub.set_defaults(func=cmd_bus_events)

# Issue #69: Compaction and efficiency commands

# compactions
sub = subparsers.add_parser("compactions", help="Show compaction events (context resets)")
sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
sub.add_argument("--session-id", help="Filter to specific session ID")
sub.set_defaults(func=cmd_compactions)

# pre-compaction
sub = subparsers.add_parser("pre-compaction", help="Show events before a compaction event")
sub.add_argument("session_id", help="Session ID to analyze")
sub.add_argument("timestamp", help="ISO timestamp of the compaction event")
sub.add_argument("--limit", type=int, default=50, help="Max events to return (default: 50)")
sub.set_defaults(func=cmd_pre_compaction)

# large-results
sub = subparsers.add_parser(
"large-results", help="Show large tool results consuming context space"
)
sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
sub.add_argument("--min-size", type=int, default=10, help="Minimum size in KB (default: 10)")
sub.add_argument("--limit", type=int, default=50, help="Max results to return (default: 50)")
sub.set_defaults(func=cmd_large_results)

# efficiency
sub = subparsers.add_parser("efficiency", help="Show session context efficiency metrics")
sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
sub.add_argument("--project", help="Project path filter")
sub.set_defaults(func=cmd_efficiency)

# benchmark (Issue #63)
sub = subparsers.add_parser("benchmark", help="Benchmark all MCP tool response times")
sub.add_argument(
Expand Down
25 changes: 25 additions & 0 deletions src/session_analytics/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ Each session includes `classification_factors` explaining WHY it was categorized
|------|---------|
| `get_agent_activity(days?, project?)` | Task subagent activity vs main session (RFC #41) |

### Context Efficiency Analysis

| Tool | Purpose |
|------|---------|
| `get_compaction_events(days?, session_id?)` | List compaction events (context resets) |
| `get_pre_compaction_events(session_id, compaction_timestamp, limit?)` | Events before a compaction for analysis |
| `get_large_tool_results(days?, min_size_kb?, limit?)` | Find tool results consuming context space |
| `get_session_efficiency(days?, project?)` | Session efficiency metrics and burn rate |

**Context efficiency** helps identify why sessions hit context limits:
- **Compactions**: Context resets when Claude summarizes conversation
- **Large results**: Tool outputs consuming significant context space
- **Burn rate**: How fast sessions consume their context budget
- **Read/Edit ratio**: High ratio suggests inefficient exploration (should use Task/Explore)
- **Files read multiple times**: Redundant reads indicate opportunity to cache context

### Event-Bus Integration

| Tool | Purpose |
Expand Down Expand Up @@ -200,6 +216,15 @@ analyze_failures() → "These commands tend to fail"
analyze_trends() → "Usage is increasing/decreasing"
```

### Workflow: Context Efficiency

```
get_compaction_events() → "When did context resets happen?"
get_session_efficiency() → "Which sessions burn context fastest?"
get_large_tool_results() → "What operations consume the most space?"
get_pre_compaction_events() → "What led up to a specific reset?"
```

## Reference

### Session Categories
Expand Down
32 changes: 31 additions & 1 deletion src/session_analytics/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,28 @@ def extract_tool_result_content(tool_result: dict) -> str | None:
return None


def calculate_result_size(text: str | None) -> int | None:
"""Calculate the byte size of text content.

Issue #69: Tracks context window consumption for efficiency analysis.
Uses UTF-8 encoding to get actual byte size (not character count).
"""
if text is None:
return None
return len(text.encode("utf-8"))


def detect_compaction(text: str | None) -> bool:
"""Detect if summary content indicates a compaction event.

Issue #69: Compaction occurs when Claude Code truncates conversation history.
The summary message contains the marker phrase indicating context was compressed.
"""
if not text:
return False
return "continued from a previous conversation" in text.lower()


def find_log_files(
logs_dir: Path = DEFAULT_LOGS_DIR,
days: int = 7,
Expand Down Expand Up @@ -298,6 +320,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
git_branch=git_branch,
cwd=cwd,
message_text=assistant_text, # Issue #68: unified message text
result_size_bytes=calculate_result_size(assistant_text), # Issue #69
# RFC #41: Agent tracking fields
parent_uuid=None, # Assistant events have no parent
agent_id=agent_id,
Expand Down Expand Up @@ -392,6 +415,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
git_branch=git_branch,
cwd=cwd,
message_text=tool_result_text, # Issue #68: full tool result
result_size_bytes=calculate_result_size(tool_result_text), # Issue #69
# RFC #41: Agent tracking fields
agent_id=agent_id,
is_sidechain=is_sidechain,
Expand All @@ -411,6 +435,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
skill_name=command_name, # Reuse skill_name for command tracking
user_message_text=user_message_text,
message_text=message_text, # Issue #68: unified message text
result_size_bytes=calculate_result_size(message_text), # Issue #69
git_branch=git_branch,
cwd=cwd,
# RFC #41: Agent tracking fields
Expand All @@ -432,6 +457,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
skill_name=command_name, # Reuse skill_name for command tracking
user_message_text=user_message_text,
message_text=message_text, # Issue #68: unified message text
result_size_bytes=calculate_result_size(message_text), # Issue #69
git_branch=git_branch,
cwd=cwd,
# RFC #41: Agent tracking fields
Expand All @@ -447,15 +473,19 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
summary_content = message.get("content", "") if message else raw.get("summary", "")
summary_text = extract_text_from_content(summary_content)

# Issue #69: Detect compaction events
is_compaction = detect_compaction(summary_text)

events.append(
Event(
id=None,
uuid=uuid if uuid else f"summary:{raw.get('leafUuid', 'unknown')}",
timestamp=timestamp if timestamp else datetime.now(),
session_id=session_id if session_id else "unknown",
project_path=project_path,
entry_type="summary",
entry_type="compaction" if is_compaction else "summary", # Issue #69
message_text=summary_text, # Issue #68: unified message text
result_size_bytes=calculate_result_size(summary_text), # Issue #69
# RFC #41: Agent tracking fields
agent_id=agent_id,
is_sidechain=is_sidechain,
Expand Down
Loading