Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions docs/SCHEMA.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ This document describes the SQLite database schema for agent-session-analytics.
| `bus_events` | Cross-session events from event-bus | ~2K |
| `events_fts` | FTS5 virtual table for user message search | N/A |
| `raw_entries` | Unparsed JSONL entries for future re-parsing | 100K+ |
| `project_aliases` | Alias mappings for renamed projects | ~10 |

---

Expand Down Expand Up @@ -211,6 +212,26 @@ CREATE TABLE raw_entries (

**Design note**: The UNIQUE constraint on `entry_json` ensures exact deduplication. While this means large JSON values are compared, SQLite handles this efficiently and it avoids hash collision edge cases.

### project_aliases

Maps alias names to target patterns for flexible project filtering. When filtering by an alias, queries automatically expand to match both the alias and all its targets.

```sql
CREATE TABLE project_aliases (
id INTEGER PRIMARY KEY AUTOINCREMENT,
alias TEXT NOT NULL COLLATE NOCASE, -- The filter name (e.g., "genai-rs")
target TEXT NOT NULL COLLATE NOCASE, -- Pattern to also match (e.g., "rust-genai")
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(alias, target)
)
CREATE INDEX idx_project_aliases_alias ON project_aliases(alias COLLATE NOCASE)
```

**Key patterns**:
- One alias can have multiple targets (for projects renamed multiple times)
- `COLLATE NOCASE` ensures case-insensitive matching
- Query expansion: `WHERE project_path LIKE '%alias%' OR project_path LIKE '%target%'`

---

## Indexes
Expand Down Expand Up @@ -245,6 +266,7 @@ Performance-critical indexes on the `events` table:
| `bus_events` | `idx_bus_events_repo` | `repo` |
| `raw_entries` | `idx_raw_entries_session` | `session_id` |
| `raw_entries` | `idx_raw_entries_timestamp` | `timestamp` |
| `project_aliases` | `idx_project_aliases_alias` | `alias COLLATE NOCASE` |

---

Expand Down Expand Up @@ -284,6 +306,7 @@ Sync triggers maintain index consistency:
| 11 | fix_compaction_detection_user_entries | Fix compaction detection to look at user entries (not just summary) |
| 12 | fix_warmup_not_errors | Fix warmup events incorrectly marked as errors (Issue #75) |
| 13 | add_raw_entries_table | Raw JSONL storage for future re-parsing (Issue #93) |
| 14 | add_project_aliases | Project alias table for renamed project matching (Issue #71) |

---

Expand Down
82 changes: 82 additions & 0 deletions src/agent_session_analytics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,27 @@ def _format_efficiency(data: dict) -> list[str]:
return lines


@_register_formatter(
lambda d: (
"aliases" in d
and all(
isinstance(a, dict) and "alias" in a and "target" in a for a in d.get("aliases", [])
)
)
)
def _format_aliases(data: dict) -> list[str]:
aliases = data.get("aliases", [])
if not aliases:
return ["No project aliases configured."]
lines = [
"Project aliases:",
"",
]
for a in aliases:
lines.append(f" {a['alias']} → {a['target']}")
return lines


def format_output(data: dict, json_output: bool = False) -> str:
"""Format output as JSON or human-readable."""
if json_output:
Expand Down Expand Up @@ -1471,13 +1492,16 @@ def cmd_benchmark(args):
storage, days=7, min_size_kb=10, limit=10
),
"get_session_efficiency": lambda: queries_get_session_efficiency(storage, days=7),
# Issue #71: Project aliases
"list_project_aliases": lambda: storage.get_project_aliases(),
}

# Skipped tools (require specific data or modify DB):
# - ingest_logs, ingest_git_history, ingest_git_history_all_projects
# - correlate_git_with_sessions, ingest_bus_events
# - find_related_sessions (requires valid session_id)
# - upload_entries, get_sync_status, finalize_sync (remote sync tools - modify DB or require client context)
# - add_project_alias, remove_project_alias (modify DB)

benchmarks = []
for tool_name, tool_func in tool_functions.items():
Expand Down Expand Up @@ -1740,6 +1764,41 @@ def mcp_call(method_name: str, arguments: dict) -> dict | None:
print(format_output(output, args.json))


# --- Alias Commands ---


def cmd_alias_add(args):
"""Add a project alias."""
storage = SQLiteStorage()
try:
storage.add_project_alias(args.alias, args.target)
result = {"status": "ok", "alias": args.alias, "target": args.target}
except sqlite3.IntegrityError:
result = {
"status": "ok",
"message": "Alias already exists",
"alias": args.alias,
"target": args.target,
}
print(format_output(result, args.json))


def cmd_alias_remove(args):
"""Remove a project alias."""
storage = SQLiteStorage()
removed = storage.remove_project_alias(args.alias, args.target)
result = {"status": "ok", "alias": args.alias, "removed_count": removed}
print(format_output(result, args.json))


def cmd_alias_list(args):
"""List project aliases."""
storage = SQLiteStorage()
aliases = storage.get_project_aliases(args.alias)
result = {"aliases": aliases}
print(format_output(result, args.json))


def main():
"""CLI entry point."""
epilog = """
Expand Down Expand Up @@ -2094,6 +2153,29 @@ def main():
)
sub.set_defaults(func=cmd_push)

# alias (Issue #71 - project aliases for renamed projects)
alias_parser = subparsers.add_parser(
"alias", help="Manage project aliases for flexible filtering"
)
alias_subparsers = alias_parser.add_subparsers(dest="alias_command", required=True)

# alias add
sub = alias_subparsers.add_parser("add", help="Add a project alias")
sub.add_argument("alias", help="The alias name (e.g., 'genai-rs')")
sub.add_argument("target", help="The target to match (e.g., 'rust-genai')")
sub.set_defaults(func=cmd_alias_add)

# alias remove
sub = alias_subparsers.add_parser("remove", help="Remove project alias(es)")
sub.add_argument("alias", help="The alias to remove")
sub.add_argument("target", nargs="?", help="Target to remove (all if omitted)")
sub.set_defaults(func=cmd_alias_remove)

# alias list
sub = alias_subparsers.add_parser("list", help="List project aliases")
sub.add_argument("--alias", help="Filter to specific alias")
sub.set_defaults(func=cmd_alias_list)

args = parser.parse_args()
args.func(args)

Expand Down
37 changes: 37 additions & 0 deletions src/agent_session_analytics/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,43 @@ The `push` command queries `get_sync_status()` first to determine what the serve

**Raw entry storage:** All uploaded entries are stored in both parsed form (events table) and raw form (raw_entries table). This allows re-parsing historical data when the parser improves.

### Project Aliases

When projects are renamed, historical data doesn't match new filters. Project aliases solve this:

| Tool | Purpose |
|------|---------|
| `add_project_alias(alias, target)` | Link an alias to a target pattern |
| `remove_project_alias(alias, target?)` | Remove alias (all targets if target omitted) |
| `list_project_aliases(alias?)` | List configured aliases |

**Example:** Your project was renamed from `rust-genai` to `genai-rs`:
```
add_project_alias("genai-rs", "rust-genai")
```

Now `--project genai-rs` will match both `genai-rs` AND `rust-genai` in all queries.

**CLI usage:**
```bash
# Add alias
agent-session-analytics-cli alias add genai-rs rust-genai

# List all aliases
agent-session-analytics-cli alias list

# Remove specific alias-target pair
agent-session-analytics-cli alias remove genai-rs rust-genai

# Remove all targets for an alias
agent-session-analytics-cli alias remove genai-rs
```

**Notes:**
- Matching is case-insensitive (`GenAI-RS` matches `genai-rs`)
- Aliases expand to OR clauses: `WHERE project_path LIKE '%genai-rs%' OR project_path LIKE '%rust-genai%'`
- Multiple targets can be added per alias (e.g., for projects renamed multiple times)

### Core Queries

| Tool | Purpose |
Expand Down
47 changes: 33 additions & 14 deletions src/agent_session_analytics/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@ def build_where_clause(
cutoff_column: str = "timestamp",
project: str | None = None,
extra_conditions: list[str] | None = None,
storage: SQLiteStorage | None = None,
) -> tuple[str, list]:
"""Build a WHERE clause with common query filters.

Args:
cutoff: Datetime for cutoff filter (>= comparison)
cutoff_column: Column name for cutoff (default: "timestamp")
project: Optional project path filter (LIKE %project%)
project: Optional project path filter (LIKE match, supports aliases)
extra_conditions: Additional WHERE conditions to include
storage: Storage instance for alias resolution (optional)

Returns:
Tuple of (where_clause_string, params_list)
Expand All @@ -45,8 +47,21 @@ def build_where_clause(
params.append(cutoff)

if project:
conditions.append("project_path LIKE ?")
params.append(f"%{project}%")
# Resolve aliases if storage provided
if storage:
patterns = storage.resolve_project_aliases(project)
else:
patterns = [project]

# Build condition with COLLATE NOCASE for case-insensitive matching
if len(patterns) == 1:
conditions.append("project_path LIKE ? COLLATE NOCASE")
params.append(f"%{patterns[0]}%")
else:
# Multiple patterns: (project_path LIKE ? OR project_path LIKE ? ...)
pattern_conditions = " OR ".join(["project_path LIKE ? COLLATE NOCASE"] * len(patterns))
conditions.append(f"({pattern_conditions})")
params.extend(f"%{p}%" for p in patterns)

if extra_conditions:
conditions.extend(extra_conditions)
Expand Down Expand Up @@ -147,6 +162,7 @@ def query_tool_frequency(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name IS NOT NULL"],
storage=storage,
)

# Get tool frequency counts
Expand All @@ -169,6 +185,7 @@ def query_tool_frequency(
cutoff=cutoff,
project=project,
extra_conditions=["entry_type = 'command'"],
storage=storage,
)
cmd_rows = storage.execute_query(
f"SELECT COUNT(*) as count FROM events WHERE {cmd_where}",
Expand Down Expand Up @@ -226,6 +243,7 @@ def _get_skill_breakdown(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name = 'Skill'", "skill_name IS NOT NULL"],
storage=storage,
)

rows = storage.execute_query(
Expand All @@ -252,6 +270,7 @@ def _get_command_breakdown(
cutoff=cutoff,
project=project,
extra_conditions=["entry_type = 'command'", "skill_name IS NOT NULL"],
storage=storage,
)

rows = storage.execute_query(
Expand All @@ -278,6 +297,7 @@ def _get_task_breakdown(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name = 'Task'", "tool_input_json IS NOT NULL"],
storage=storage,
)

rows = storage.execute_query(
Expand Down Expand Up @@ -308,6 +328,7 @@ def _get_bash_breakdown(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name = 'Bash'", "command IS NOT NULL"],
storage=storage,
)

rows = storage.execute_query(
Expand Down Expand Up @@ -407,6 +428,7 @@ def query_commands(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name = 'Bash'", "command IS NOT NULL"],
storage=storage,
)

# Add prefix filter if specified
Expand Down Expand Up @@ -459,6 +481,7 @@ def query_sessions(
cutoff=cutoff,
cutoff_column="last_seen",
project=project,
storage=storage,
)

rows = storage.execute_query(
Expand Down Expand Up @@ -531,6 +554,7 @@ def query_tokens(
where_clause, params = build_where_clause(
cutoff=cutoff,
project=project,
storage=storage,
)

if by == "day":
Expand Down Expand Up @@ -1095,16 +1119,7 @@ def classify_sessions(
- category_distribution: Count of sessions per category
"""
cutoff = get_cutoff(days=days)

# Build where clause
where_parts = ["timestamp >= ?"]
params: list = [cutoff]

if project:
where_parts.append("project_path LIKE ?")
params.append(f"%{project}%")

where_clause = " AND ".join(where_parts)
where_clause, params = build_where_clause(cutoff=cutoff, project=project, storage=storage)

# Get activity stats per session (including efficiency metrics for #79)
# Safe: where_clause is built from hardcoded condition strings above
Expand Down Expand Up @@ -1494,6 +1509,7 @@ def query_file_activity(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name IN ('Read', 'Edit', 'Write')", "file_path IS NOT NULL"],
storage=storage,
)

rows = storage.execute_query(
Expand Down Expand Up @@ -1572,6 +1588,7 @@ def query_languages(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name IN ('Read', 'Edit', 'Write')", "file_path IS NOT NULL"],
storage=storage,
)

rows = storage.execute_query(
Expand Down Expand Up @@ -1712,6 +1729,7 @@ def query_mcp_usage(
cutoff=cutoff,
project=project,
extra_conditions=["tool_name LIKE 'mcp__%'"],
storage=storage,
)

rows = storage.execute_query(
Expand Down Expand Up @@ -1796,6 +1814,7 @@ def query_agent_activity(
where_clause, params = build_where_clause(
cutoff=cutoff,
project=project,
storage=storage,
)

# Query aggregated stats per agent_id (NULL = main session)
Expand Down Expand Up @@ -2601,7 +2620,7 @@ def get_session_efficiency(
Dict with efficiency metrics per session
"""
cutoff = get_cutoff(days=days)
where_clause, params = build_where_clause(cutoff=cutoff, project=project)
where_clause, params = build_where_clause(cutoff=cutoff, project=project, storage=storage)

# Get session-level efficiency metrics
query_params = list(params)
Expand Down
Loading