Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/SCHEMA.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ Sync triggers maintain index consistency:
| 7 | add_tool_id_index | Performance index for self-joins |
| 8 | add_unified_message_text | Unified message_text column, rebuilt FTS on all entry types (Issue #68) |
| 9 | add_result_size_bytes | result_size_bytes column for context efficiency tracking (Issue #69) |
| 10 | backfill_compaction_and_result_size | Backfill compaction detection and result_size_bytes for existing data |
| 11 | fix_compaction_detection_user_entries | Fix compaction detection to look at user entries (not just summary) |
| 12 | fix_warmup_not_errors | Fix warmup events incorrectly marked as errors (Issue #75) |

---

Expand Down
19 changes: 18 additions & 1 deletion src/session_analytics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from session_analytics.ingest import (
correlate_git_with_sessions,
ingest_git_history,
ingest_git_history_all_projects,
ingest_logs,
)
from session_analytics.patterns import (
Expand Down Expand Up @@ -1063,6 +1064,16 @@ def cmd_git_correlate(args):
print(format_output(result, args.json))


def cmd_git_ingest_all(args):
"""Ingest git history from all known projects."""
storage = SQLiteStorage()
result = ingest_git_history_all_projects(
storage,
days=args.days,
)
print(format_output(result, args.json))


def cmd_signals(args):
"""Show raw session signals for LLM interpretation (RFC #26, revised per RFC #17)."""
storage = SQLiteStorage()
Expand Down Expand Up @@ -1346,7 +1357,8 @@ def cmd_benchmark(args):
}

# Skipped tools (require specific data or modify DB):
# - ingest_logs, ingest_git_history, correlate_git_with_sessions, ingest_bus_events
# - ingest_logs, ingest_git_history, ingest_git_history_all_projects
# - correlate_git_with_sessions, ingest_bus_events
# - find_related_sessions (requires valid session_id)

benchmarks = []
Expand Down Expand Up @@ -1575,6 +1587,11 @@ def main():
sub.add_argument("--days", type=int, default=7, help="Days to correlate (default: 7)")
sub.set_defaults(func=cmd_git_correlate)

# git-ingest-all
sub = subparsers.add_parser("git-ingest-all", help="Ingest git history from all known projects")
sub.add_argument("--days", type=int, default=7, help="Days of history (default: 7)")
sub.set_defaults(func=cmd_git_ingest_all)

# signals (RFC #26, revised per RFC #17 - raw data, no interpretation)
sub = subparsers.add_parser("signals", help="Show raw session signals for LLM interpretation")
sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
Expand Down
11 changes: 9 additions & 2 deletions src/session_analytics/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ Each session includes `classification_factors` explaining WHY it was categorized

| Tool | Purpose |
|------|---------|
| `ingest_git_history(days?, repo_path?)` | Parse and store git commits |
| `ingest_git_history(days?, repo_path?)` | Parse and store git commits from current repo |
| `ingest_git_history_all_projects(days?)` | Parse commits from all known projects |
| `correlate_git_with_sessions(days?)` | Link commits to sessions by timing |
| `get_session_commits(session_id?)` | Get commits associated with a session |

Expand Down Expand Up @@ -258,7 +259,13 @@ match commands `make`, `make-test`, etc. using fnmatch.
Git correlation requires two steps:

```
ingest_git_history(days=30) # Parse commits from repo
# Option 1: Ingest from all known projects (recommended)
ingest_git_history_all_projects(days=30)

# Option 2: Ingest from current repo only
ingest_git_history(days=30)

# Then correlate and query
correlate_git_with_sessions() # Link to sessions by timing
get_session_commits(session_id="abc") # View results
```
Expand Down
152 changes: 150 additions & 2 deletions src/session_analytics/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,50 @@
USER_MESSAGE_MAX_LENGTH = 2000


def decode_project_path(encoded: str) -> Path | None:
"""Decode an encoded project path back to a filesystem path.

Claude Code encodes project paths by replacing '/' with '-' in directory names.
e.g., "-Users-evansenter-Documents-projects-dotfiles" -> "/Users/evansenter/Documents/projects/dotfiles"

Handles paths with hyphens in directory names by trying to find valid paths.
e.g., "-Users-foo-my-project" could be "/Users/foo/my-project" or "/Users/foo-my/project"

Returns None if the decoded path doesn't exist or isn't a directory.
"""
if not encoded:
return None

# Split on '-' and skip empty first part (from leading '-')
parts = encoded.split("-")
if parts and parts[0] == "":
parts = parts[1:]

if not parts:
return None

def find_path(remaining_parts: list[str], current_path: Path) -> Path | None:
"""Recursively find valid path by trying different segment combinations."""
if not remaining_parts:
return current_path if current_path.is_dir() else None

# Try combining 1, 2, 3... segments with hyphens
for num_parts in range(1, len(remaining_parts) + 1):
segment = "-".join(remaining_parts[:num_parts])
candidate = current_path / segment

if candidate.exists():
# This segment exists, try to continue with remaining parts
result = find_path(remaining_parts[num_parts:], candidate)
if result is not None:
return result

return None

# Start from root
return find_path(parts, Path("/"))


def extract_text_from_content(content) -> str | None:
"""Extract text content from various message content formats.

Expand Down Expand Up @@ -402,6 +446,9 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
is_error = tr.get("is_error", False)
# Issue #68: Extract tool result content
tool_result_text = extract_tool_result_content(tr)
# Issue #75: Warmup exits are not real errors
if is_error and tool_result_text == "Warmup":
is_error = False
events.append(
Event(
id=None,
Expand All @@ -424,14 +471,18 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
)
else:
# User message with other content types
# Issue #69: Detect compaction markers in user messages
user_entry_type = "command" if command_name else "user"
if detect_compaction(message_text):
user_entry_type = "compaction"
events.append(
Event(
id=None,
uuid=uuid,
timestamp=timestamp,
session_id=session_id,
project_path=project_path,
entry_type="command" if command_name else "user",
entry_type=user_entry_type,
skill_name=command_name, # Reuse skill_name for command tracking
user_message_text=user_message_text,
message_text=message_text, # Issue #68: unified message text
Expand All @@ -446,14 +497,18 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
)
else:
# Plain text user message
# Issue #69: Detect compaction markers in user messages
user_entry_type = "command" if command_name else "user"
if detect_compaction(message_text):
user_entry_type = "compaction"
events.append(
Event(
id=None,
uuid=uuid,
timestamp=timestamp,
session_id=session_id,
project_path=project_path,
entry_type="command" if command_name else "user",
entry_type=user_entry_type,
skill_name=command_name, # Reuse skill_name for command tracking
user_message_text=user_message_text,
message_text=message_text, # Issue #68: unified message text
Expand Down Expand Up @@ -787,6 +842,99 @@ def ingest_git_history(
}


def ingest_git_history_all_projects(
storage: SQLiteStorage,
days: int = 7,
) -> dict:
"""Ingest git commit history from all known projects.

Scans unique project paths from the events table, decodes them to filesystem
paths, and runs git ingestion on each that has a .git directory.

Args:
storage: Storage instance
days: Number of days of history to ingest (default: 7)

Returns:
Dict with aggregate stats:
- projects_found: Total unique project paths in events table
- projects_with_git: Projects that have a .git directory
- projects_ingested: Projects successfully processed
- projects_skipped: Projects without valid path or git dir
- projects_failed: Projects with ingestion errors
- total_commits_added: Sum of new commits across all projects
- per_project: List of results (only includes projects with git repos,
not skipped projects - use projects_skipped count for those)
"""
# Get unique project paths from events
rows = storage.execute_query(
"""
SELECT DISTINCT project_path
FROM events
WHERE project_path IS NOT NULL
"""
)

projects_found = len(rows)
projects_with_git = 0
projects_ingested = 0
projects_skipped = 0
projects_failed = 0
total_commits_added = 0
per_project_results = []

for row in rows:
encoded_path = row["project_path"]
decoded_path = decode_project_path(encoded_path)

if decoded_path is None:
projects_skipped += 1
logger.debug(f"Could not decode or find path: {encoded_path}")
continue

# Check if it's a git repo (directory) or worktree (.git file pointing to main repo)
git_path = decoded_path / ".git"
if not git_path.exists():
projects_skipped += 1
continue

projects_with_git += 1

# Run git ingestion
result = ingest_git_history(
storage=storage,
repo_path=decoded_path,
days=days,
project_path=encoded_path,
)

if "error" in result:
projects_failed += 1
logger.warning(f"Git ingestion failed for {decoded_path}: {result['error']}")
else:
projects_ingested += 1
total_commits_added += result.get("commits_added", 0)

per_project_results.append(
{
"project": str(decoded_path),
"commits_added": result.get("commits_added", 0),
"error": result.get("error"),
}
)

return {
"days": days,
"projects_found": projects_found,
"projects_with_git": projects_with_git,
"projects_ingested": projects_ingested,
"projects_skipped": projects_skipped,
"projects_failed": projects_failed,
"total_commits_added": total_commits_added,
"per_project": per_project_results,
}


def correlate_git_with_sessions(
storage: SQLiteStorage,
days: int = 7,
Expand Down
20 changes: 20 additions & 0 deletions src/session_analytics/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,26 @@ def correlate_git_with_sessions(days: int = 7) -> dict:
return {"status": "ok", **result}


@mcp.tool()
def ingest_git_history_all_projects(days: int = 7) -> dict:
"""Ingest git commit history from all known projects.

Scans unique project paths from the events table, decodes them to filesystem
paths, and runs git ingestion on each that has a .git directory.

This is more comprehensive than ingest_git_history() which only processes
the current directory.

Args:
days: Number of days of history to ingest (default: 7)

Returns:
Aggregate stats across all projects including total commits added
"""
result = ingest.ingest_git_history_all_projects(storage, days=days)
return {"status": "ok", **result}


@mcp.tool()
def get_session_signals(days: int = 7, min_count: int = 1) -> dict:
"""Get raw session signals for LLM interpretation.
Expand Down
Loading