evansenter · evansenter · Jan 10, 2026 · Jan 10, 2026 · Jan 10, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -22,6 +22,11 @@ Queryable analytics for Claude Code session logs, exposed as an MCP server and C
 cp ~/.claude/contrib/analytics/data.db ~/.claude/contrib/analytics/data.db.backup-$(date +%Y%m%d-%H%M%S)
 ```
 
+### When adding new columns:
+1. **Always backup first** (see above)
+2. **Backfill existing data** when possible - new columns should be populated for historical records, not just future ingestion
+3. For backfill: either re-ingest from JSONL (`ingest_logs(force=True)` after clearing) or write UPDATE queries in the migration
+
 ---
 
 ## Design Philosophy

diff --git a/docs/SCHEMA.md b/docs/SCHEMA.md
@@ -80,6 +80,9 @@ CREATE TABLE events (
     is_sidechain INTEGER DEFAULT 0,
     version TEXT,              -- Claude Code version
 
+    -- Context efficiency (Issue #69)
+    result_size_bytes INTEGER, -- Size of message_text in bytes for context analysis
+
     UNIQUE(session_id, uuid)   -- UUID unique within each session
 )
 ```
@@ -89,6 +92,8 @@ CREATE TABLE events (
 - Token columns only populated on `entry_type='assistant'` to avoid double-counting
 - `message_text` enables FTS via `events_fts` virtual table for all entry types
 - `tool_input_json` preserves full parameters for drill-down queries
+- `entry_type='compaction'` marks context resets (detected from summary text containing "continued from a previous conversation")
+- `result_size_bytes` enables context burn rate analysis
 
 ### sessions
 
@@ -253,6 +258,7 @@ Sync triggers maintain index consistency:
 | 6 | add_event_bus_integration | bus_events table |
 | 7 | add_tool_id_index | Performance index for self-joins |
 | 8 | add_unified_message_text | Unified message_text column, rebuilt FTS on all entry types (Issue #68) |
+| 9 | add_result_size_bytes | result_size_bytes column for context efficiency tracking (Issue #69) |
 
 ---
 

diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py
@@ -24,7 +24,11 @@
     classify_sessions,
     detect_parallel_sessions,
     find_related_sessions,
+    get_compaction_events,
     get_handoff_context,
+    get_large_tool_results,
+    get_pre_compaction_events,
+    get_session_efficiency,
     get_user_journey,
     query_agent_activity,
     query_bus_events,
@@ -610,6 +614,98 @@ def format_metric(name: str, metric: dict) -> str:
     return lines
 
 
+# Issue #69: Compaction and efficiency formatters
+
+
+@_register_formatter(lambda d: "compaction_count" in d and "compactions" in d)
+def _format_compactions(data: dict) -> list[str]:
+    # Count unique sessions
+    unique_sessions = len({c["session_id"] for c in data.get("compactions", [])})
+    lines = [
+        f"Compaction events (context resets) - last {data.get('days', 7)} days",
+        "",
+        f"Total compactions: {data['compaction_count']}",
+        f"Sessions affected: {unique_sessions}",
+        "",
+    ]
+    if data.get("compactions"):
+        lines.append("Recent compactions:")
+        for c in data["compactions"][:10]:
+            lines.append(f"  {c['timestamp']} - session {c['session_id'][:8]}...")
+    return lines
+
+
+@_register_formatter(lambda d: "compaction_timestamp" in d and "events" in d and "event_count" in d)
+def _format_pre_compaction(data: dict) -> list[str]:
+    lines = [
+        f"Events before compaction at {data['compaction_timestamp']}",
+        f"Session: {data['session_id']}",
+        "",
+        f"Events found: {data['event_count']}",
+        "",
+    ]
+    if data.get("events"):
+        lines.append("Events (most recent first):")
+        for e in data["events"]:
+            tool = e.get("tool") or e.get("type", "unknown")
+            size_info = ""
+            if e.get("size_bytes"):
+                size_kb = e["size_bytes"] / 1024
+                size_info = f" ({size_kb:.1f}KB)"
+            identifier = e.get("file") or e.get("command") or ""
+            if identifier:
+                identifier = f" - {identifier[:40]}"
+            error_mark = " [ERR]" if e.get("error") else ""
+            lines.append(f"  {e['timestamp']} {tool}{size_info}{identifier}{error_mark}")
+    return lines
+
+
+@_register_formatter(lambda d: "large_results" in d and "result_count" in d)
+def _format_large_results(data: dict) -> list[str]:
+    # Calculate total from tool_breakdown
+    total_mb = sum(t.get("total_mb", 0) for t in data.get("tool_breakdown", []))
+    lines = [
+        f"Large tool results (>= {data.get('min_size_kb', 10)}KB) - last {data.get('days', 7)} days",
+        "",
+        f"Total large results: {data['result_count']}",
+        f"Total size: {total_mb:.2f}MB",
+        "",
+    ]
+    if data.get("large_results"):
+        lines.append("Top results by size:")
+        for r in data["large_results"][:10]:
+            identifier = r.get("file") or r.get("command") or "N/A"
+            lines.append(f"  {r['tool']}: {r['size_kb']:.1f}KB - {identifier[:50]}")
+    return lines
+
+
+@_register_formatter(
+    lambda d: "sessions" in d
+    and "session_count" in d
+    and any("efficiency_signals" in s for s in d.get("sessions", []))
+)
+def _format_efficiency(data: dict) -> list[str]:
+    lines = [
+        f"Session efficiency - last {data.get('days', 7)} days",
+        "",
+        f"Sessions analyzed: {data.get('session_count', 0)}",
+        "",
+        "Sessions by context usage:",
+    ]
+    for s in data.get("sessions", [])[:10]:
+        signals = s.get("efficiency_signals", {})
+        total_mb = signals.get("total_result_mb", 0)
+        compactions = signals.get("compaction_count", 0)
+        burn_rate = signals.get("burn_rate_tokens_per_event", 0)
+        read_edit = signals.get("read_to_edit_ratio", 0)
+        multi_read = signals.get("files_read_multiple_times", 0)
+        lines.append(
+            f"  {s['session_id'][:8]}...: {total_mb:.2f}MB, {compactions} compactions, "
+            f"{burn_rate:.0f} tok/ev, R/E:{read_edit:.1f}, multi-read:{multi_read}"
+        )
+    return lines
+
+
 def format_output(data: dict, json_output: bool = False) -> str:
     """Format output as JSON or human-readable."""
     if json_output:
@@ -1024,6 +1120,55 @@ def cmd_session_commits(args):
     print(format_output(result, args.json))
 
 
+# Issue #69: Compaction and efficiency commands
+
+
+def cmd_compactions(args):
+    """Show compaction events (context resets)."""
+    storage = SQLiteStorage()
+    result = get_compaction_events(
+        storage,
+        days=args.days,
+        session_id=getattr(args, "session_id", None),
+    )
+    print(format_output(result, args.json))
+
+
+def cmd_pre_compaction(args):
+    """Show events before a compaction event."""
+    storage = SQLiteStorage()
+    result = get_pre_compaction_events(
+        storage,
+        session_id=args.session_id,
+        compaction_timestamp=args.timestamp,
+        limit=args.limit,
+    )
+    print(format_output(result, args.json))
+
+
+def cmd_large_results(args):
+    """Show large tool results that consume context space."""
+    storage = SQLiteStorage()
+    result = get_large_tool_results(
+        storage,
+        days=args.days,
+        min_size_kb=args.min_size,
+        limit=args.limit,
+    )
+    print(format_output(result, args.json))
+
+
+def cmd_efficiency(args):
+    """Show session context efficiency metrics."""
+    storage = SQLiteStorage()
+    result = get_session_efficiency(
+        storage,
+        days=args.days,
+        project=getattr(args, "project", None),
+    )
+    print(format_output(result, args.json))
+
+
 def _benchmark_tool(tool_name: str, tool_func: callable, iterations: int = 3) -> dict:
     """Benchmark a single MCP tool with multiple iterations.
 
@@ -1097,9 +1242,18 @@ def cmd_benchmark(args):
     from session_analytics.queries import (
         detect_parallel_sessions as queries_detect_parallel_sessions,
     )
+    from session_analytics.queries import (
+        get_compaction_events as queries_get_compaction_events,
+    )
     from session_analytics.queries import (
         get_handoff_context as queries_get_handoff_context,
     )
+    from session_analytics.queries import (
+        get_large_tool_results as queries_get_large_tool_results,
+    )
+    from session_analytics.queries import (
+        get_session_efficiency as queries_get_session_efficiency,
+    )
     from session_analytics.queries import (
         get_user_journey as queries_get_user_journey,
     )
@@ -1183,6 +1337,12 @@ def cmd_benchmark(args):
         "get_mcp_usage": lambda: queries_query_mcp_usage(storage, days=7),
         "get_agent_activity": lambda: queries_query_agent_activity(storage, days=7),
         "get_bus_events": lambda: queries_query_bus_events(storage, days=7, limit=10),
+        # Issue #69: Compaction and efficiency tools
+        "get_compaction_events": lambda: queries_get_compaction_events(storage, days=7),
+        "get_large_tool_results": lambda: queries_get_large_tool_results(
+            storage, days=7, min_size_kb=10, limit=10
+        ),
+        "get_session_efficiency": lambda: queries_get_session_efficiency(storage, days=7),
     }
 
     # Skipped tools (require specific data or modify DB):
@@ -1474,6 +1634,36 @@ def main():
     sub.add_argument("--limit", type=int, default=100, help="Max events to return (default: 100)")
     sub.set_defaults(func=cmd_bus_events)
 
+    # Issue #69: Compaction and efficiency commands
+
+    # compactions
+    sub = subparsers.add_parser("compactions", help="Show compaction events (context resets)")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--session-id", help="Filter to specific session ID")
+    sub.set_defaults(func=cmd_compactions)
+
+    # pre-compaction
+    sub = subparsers.add_parser("pre-compaction", help="Show events before a compaction event")
+    sub.add_argument("session_id", help="Session ID to analyze")
+    sub.add_argument("timestamp", help="ISO timestamp of the compaction event")
+    sub.add_argument("--limit", type=int, default=50, help="Max events to return (default: 50)")
+    sub.set_defaults(func=cmd_pre_compaction)
+
+    # large-results
+    sub = subparsers.add_parser(
+        "large-results", help="Show large tool results consuming context space"
+    )
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--min-size", type=int, default=10, help="Minimum size in KB (default: 10)")
+    sub.add_argument("--limit", type=int, default=50, help="Max results to return (default: 50)")
+    sub.set_defaults(func=cmd_large_results)
+
+    # efficiency
+    sub = subparsers.add_parser("efficiency", help="Show session context efficiency metrics")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--project", help="Project path filter")
+    sub.set_defaults(func=cmd_efficiency)
+
     # benchmark (Issue #63)
     sub = subparsers.add_parser("benchmark", help="Benchmark all MCP tool response times")
     sub.add_argument(

diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md
@@ -115,6 +115,22 @@ Each session includes `classification_factors` explaining WHY it was categorized
 |------|---------|
 | `get_agent_activity(days?, project?)` | Task subagent activity vs main session (RFC #41) |
 
+### Context Efficiency Analysis
+
+| Tool | Purpose |
+|------|---------|
+| `get_compaction_events(days?, session_id?)` | List compaction events (context resets) |
+| `get_pre_compaction_events(session_id, compaction_timestamp, limit?)` | Events before a compaction for analysis |
+| `get_large_tool_results(days?, min_size_kb?, limit?)` | Find tool results consuming context space |
+| `get_session_efficiency(days?, project?)` | Session efficiency metrics and burn rate |
+
+**Context efficiency** helps identify why sessions hit context limits:
+- **Compactions**: Context resets when Claude summarizes conversation
+- **Large results**: Tool outputs consuming significant context space
+- **Burn rate**: How fast sessions consume their context budget
+- **Read/Edit ratio**: High ratio suggests inefficient exploration (should use Task/Explore)
+- **Files read multiple times**: Redundant reads indicate opportunity to cache context
+
 ### Event-Bus Integration
 
 | Tool | Purpose |
@@ -200,6 +216,15 @@ analyze_failures()    → "These commands tend to fail"
 analyze_trends()      → "Usage is increasing/decreasing"
 ```
 
+### Workflow: Context Efficiency
+
+```
+get_compaction_events()     → "When did context resets happen?"
+get_session_efficiency()    → "Which sessions burn context fastest?"
+get_large_tool_results()    → "What operations consume the most space?"
+get_pre_compaction_events() → "What led up to a specific reset?"
+```
+
 ## Reference
 
 ### Session Categories

diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py
@@ -84,6 +84,28 @@ def extract_tool_result_content(tool_result: dict) -> str | None:
     return None
 
 
+def calculate_result_size(text: str | None) -> int | None:
+    """Calculate the byte size of text content.
+
+    Issue #69: Tracks context window consumption for efficiency analysis.
+    Uses UTF-8 encoding to get actual byte size (not character count).
+    """
+    if text is None:
+        return None
+    return len(text.encode("utf-8"))
+
+
+def detect_compaction(text: str | None) -> bool:
+    """Detect if summary content indicates a compaction event.
+
+    Issue #69: Compaction occurs when Claude Code truncates conversation history.
+    The summary message contains the marker phrase indicating context was compressed.
+    """
+    if not text:
+        return False
+    return "continued from a previous conversation" in text.lower()
+
+
 def find_log_files(
     logs_dir: Path = DEFAULT_LOGS_DIR,
     days: int = 7,
@@ -298,6 +320,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
                 git_branch=git_branch,
                 cwd=cwd,
                 message_text=assistant_text,  # Issue #68: unified message text
+                result_size_bytes=calculate_result_size(assistant_text),  # Issue #69
                 # RFC #41: Agent tracking fields
                 parent_uuid=None,  # Assistant events have no parent
                 agent_id=agent_id,
@@ -392,6 +415,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
                             git_branch=git_branch,
                             cwd=cwd,
                             message_text=tool_result_text,  # Issue #68: full tool result
+                            result_size_bytes=calculate_result_size(tool_result_text),  # Issue #69
                             # RFC #41: Agent tracking fields
                             agent_id=agent_id,
                             is_sidechain=is_sidechain,
@@ -411,6 +435,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
                         skill_name=command_name,  # Reuse skill_name for command tracking
                         user_message_text=user_message_text,
                         message_text=message_text,  # Issue #68: unified message text
+                        result_size_bytes=calculate_result_size(message_text),  # Issue #69
                         git_branch=git_branch,
                         cwd=cwd,
                         # RFC #41: Agent tracking fields
@@ -432,6 +457,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
                     skill_name=command_name,  # Reuse skill_name for command tracking
                     user_message_text=user_message_text,
                     message_text=message_text,  # Issue #68: unified message text
+                    result_size_bytes=calculate_result_size(message_text),  # Issue #69
                     git_branch=git_branch,
                     cwd=cwd,
                     # RFC #41: Agent tracking fields
@@ -447,15 +473,19 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
         summary_content = message.get("content", "") if message else raw.get("summary", "")
         summary_text = extract_text_from_content(summary_content)
 
+        # Issue #69: Detect compaction events
+        is_compaction = detect_compaction(summary_text)
+
         events.append(
             Event(
                 id=None,
                 uuid=uuid if uuid else f"summary:{raw.get('leafUuid', 'unknown')}",
                 timestamp=timestamp if timestamp else datetime.now(),
                 session_id=session_id if session_id else "unknown",
                 project_path=project_path,
-                entry_type="summary",
+                entry_type="compaction" if is_compaction else "summary",  # Issue #69
                 message_text=summary_text,  # Issue #68: unified message text
+                result_size_bytes=calculate_result_size(summary_text),  # Issue #69
                 # RFC #41: Agent tracking fields
                 agent_id=agent_id,
                 is_sidechain=is_sidechain,