From a600dc127b65251a4cbc7d12780561c2b5c3d346 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 04:14:03 +0000
Subject: [PATCH 01/10] Add Phase 1: Project setup and FastMCP server skeleton
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- pyproject.toml with FastMCP, uvicorn, and dev dependencies
- Makefile with check, fmt, lint, test, install, uninstall targets
- LaunchAgent plist and install/uninstall scripts for auto-start
- dev.sh script for development mode with auto-reload
- Basic FastMCP server with placeholder tools:
  - get_status: Returns server status
  - ingest_logs: Placeholder for log ingestion
  - query_tool_frequency: Placeholder for frequency queries
- Usage guide as MCP resource at session-analytics://guide
- Tests for the placeholder tools
- README with installation and usage instructions

Server runs on port 8081 (to not conflict with event-bus on 8080).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/settings.local.json                   |  18 +++
 Makefile                                      |  73 ++++++++++
 README.md                                     |  59 ++++++++
 pyproject.toml                                |  54 ++++++++
 ....evansenter.claude-session-analytics.plist |  41 ++++++
 scripts/dev.sh                                |  37 +++++
 scripts/install-launchagent.sh                |  55 ++++++++
 scripts/uninstall-launchagent.sh              |  24 ++++
 src/session_analytics/__init__.py             |   3 +
 src/session_analytics/guide.md                |  68 ++++++++++
 src/session_analytics/server.py               | 126 ++++++++++++++++++
 tests/__init__.py                             |   1 +
 tests/conftest.py                             |  25 ++++
 tests/test_server.py                          |  27 ++++
 14 files changed, 611 insertions(+)
 create mode 100644 .claude/settings.local.json
 create mode 100644 Makefile
 create mode 100644 README.md
 create mode 100644 pyproject.toml
 create mode 100644 scripts/com.evansenter.claude-session-analytics.plist
 create mode 100755 scripts/dev.sh
 create mode 100755 scripts/install-launchagent.sh
 create mode 100755 scripts/uninstall-launchagent.sh
 create mode 100644 src/session_analytics/guide.md
 create mode 100644 src/session_analytics/server.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_server.py

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 0000000..65a0653
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,18 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(chmod:*)",
+      "Bash(python3 -m venv:*)",
+      "Bash(.venv/bin/pip install:*)",
+      "Bash(brew list:*)",
+      "Bash(/opt/homebrew/bin/python3.12:*)",
+      "Bash(.venv/bin/ruff format:*)",
+      "Bash(.venv/bin/ruff check .)",
+      "Bash(.venv/bin/pytest tests/ -v)",
+      "Bash(./scripts/install-launchagent.sh:*)",
+      "Bash(claude mcp add:*)",
+      "Bash(curl:*)",
+      "Bash(cat:*)"
+    ]
+  }
+}
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a191f7c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,73 @@
+.PHONY: check fmt lint test clean install uninstall dev venv
+
+# Run all quality gates (format check, lint, tests)
+check: fmt lint test
+
+# Check/fix formatting with ruff
+fmt:
+	ruff format --check .
+
+# Run linter with ruff
+lint:
+	ruff check .
+
+# Run tests
+test:
+	pytest tests/ -v
+
+# Clean build artifacts
+clean:
+	rm -rf build/ dist/ *.egg-info .pytest_cache .ruff_cache
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+
+# Create virtual environment (requires Python 3.10+)
+venv:
+	@if [ ! -d .venv ]; then \
+		echo "Creating virtual environment..."; \
+		PYTHON=$$(command -v python3.12 || command -v python3.11 || command -v python3.10 || echo "python3"); \
+		$$PYTHON -m venv .venv && .venv/bin/pip install --upgrade pip; \
+	fi
+
+# Install with dev dependencies (for development)
+dev: venv
+	.venv/bin/pip install -e ".[dev]"
+
+# Full installation: venv + deps + LaunchAgent + CLI + MCP
+install: venv
+	@echo "Installing dependencies..."
+	.venv/bin/pip install -e .
+	@echo ""
+	@echo "Installing LaunchAgent..."
+	./scripts/install-launchagent.sh
+	@echo ""
+	@echo "Adding to Claude Code..."
+	@CLAUDE_CMD=$$(command -v claude || echo "$$HOME/.local/bin/claude"); \
+	if [ -x "$$CLAUDE_CMD" ]; then \
+		$$CLAUDE_CMD mcp add --transport http --scope user session-analytics http://localhost:8081/mcp 2>/dev/null && \
+			echo "Added session-analytics to Claude Code" || \
+			echo "session-analytics already configured in Claude Code"; \
+	else \
+		echo "Note: claude not found. Run manually:"; \
+		echo "  claude mcp add --transport http --scope user session-analytics http://localhost:8081/mcp"; \
+	fi
+	@echo ""
+	@echo "Installation complete!"
+	@echo ""
+	@echo "Make sure ~/.local/bin is in your PATH:"
+	@echo '  export PATH="$$HOME/.local/bin:$$PATH"'
+
+# Uninstall: LaunchAgent + CLI + MCP config
+uninstall:
+	@echo "Uninstalling..."
+	./scripts/uninstall-launchagent.sh
+	@echo ""
+	@echo "Removing from Claude Code..."
+	@CLAUDE_CMD=$$(command -v claude || echo "$$HOME/.local/bin/claude"); \
+	if [ -x "$$CLAUDE_CMD" ]; then \
+		$$CLAUDE_CMD mcp remove --scope user session-analytics 2>/dev/null && \
+			echo "Removed session-analytics from Claude Code" || \
+			echo "session-analytics not found in Claude Code"; \
+	fi
+	@echo ""
+	@echo "Uninstall complete!"
+	@echo "Note: venv and source code remain in place."
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b724c57
--- /dev/null
+++ b/README.md
@@ -0,0 +1,59 @@
+# Claude Session Analytics
+
+MCP server for queryable analytics on Claude Code session logs.
+
+## Overview
+
+Replaces `parse-session-logs.sh` with a persistent, queryable analytics layer. Parses JSONL session logs from `~/.claude/projects/` and provides:
+
+- **User-centric timeline**: Events across conversations, organized by timestamp
+- **Rich querying**: Tool frequency, command breakdown, sequences, permission gaps
+- **Persistent storage**: SQLite at `~/.claude/contrib/analytics/data.db`
+- **Auto-refresh**: Queries automatically refresh stale data (>5 min old)
+- **CLI access**: Full CLI for shell scripts and hooks
+
+## Installation
+
+```bash
+make install
+```
+
+This will:
+1. Create a virtual environment
+2. Install dependencies
+3. Set up a LaunchAgent for auto-start
+4. Add the MCP server to Claude Code
+
+## Development
+
+```bash
+make dev        # Install dev dependencies
+./scripts/dev.sh  # Run in dev mode with auto-reload
+```
+
+## Commands
+
+```bash
+make check      # Run fmt, lint, test
+make install    # Install LaunchAgent + CLI
+make uninstall  # Remove LaunchAgent + CLI
+```
+
+## MCP Tools
+
+| Tool | Purpose |
+|------|---------|
+| `ingest_logs` | Refresh data from JSONL files |
+| `query_timeline` | Events in time window |
+| `query_tool_frequency` | Tool usage counts |
+| `query_commands` | Bash command breakdown |
+| `query_sequences` | Common tool patterns |
+| `query_permission_gaps` | Commands needing settings.json |
+| `query_sessions` | Session metadata |
+| `query_tokens` | Token usage analysis |
+| `get_insights` | Pre-computed patterns for /improve-workflow |
+| `get_status` | Ingestion status + DB stats |
+
+## License
+
+MIT
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..3194b72
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,54 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "claude-session-analytics"
+version = "0.1.0"
+description = "MCP server for queryable analytics on Claude Code session logs"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+authors = [
+    { name = "Evan Senter" }
+]
+keywords = ["mcp", "claude", "analytics", "session-logs"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "fastmcp>=0.1.0",
+    "uvicorn>=0.30.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.23.0",
+    "ruff>=0.8.0",
+]
+
+[project.scripts]
+session-analytics = "session_analytics.server:main"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/session_analytics"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+asyncio_mode = "auto"
+
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+src = ["src", "tests"]
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W", "UP"]
+ignore = ["E501"]  # Line length handled by formatter
diff --git a/scripts/com.evansenter.claude-session-analytics.plist b/scripts/com.evansenter.claude-session-analytics.plist
new file mode 100644
index 0000000..d8421b0
--- /dev/null
+++ b/scripts/com.evansenter.claude-session-analytics.plist
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>Label</key>
+    <string>com.evansenter.claude-session-analytics</string>
+
+    <key>ProgramArguments</key>
+    <array>
+        <string>__VENV_PYTHON__</string>
+        <string>-m</string>
+        <string>session_analytics.server</string>
+    </array>
+
+    <key>WorkingDirectory</key>
+    <string>__PROJECT_DIR__</string>
+
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>PATH</key>
+        <string>/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin</string>
+        <key>PYTHONPATH</key>
+        <string>__PROJECT_DIR__/src</string>
+    </dict>
+
+    <key>RunAtLoad</key>
+    <true/>
+
+    <key>KeepAlive</key>
+    <true/>
+
+    <key>StandardOutPath</key>
+    <string>__HOME__/.claude/session-analytics.log</string>
+
+    <key>StandardErrorPath</key>
+    <string>__HOME__/.claude/session-analytics.err</string>
+
+    <key>ProcessType</key>
+    <string>Background</string>
+</dict>
+</plist>
diff --git a/scripts/dev.sh b/scripts/dev.sh
new file mode 100755
index 0000000..c86e1f3
--- /dev/null
+++ b/scripts/dev.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Run session analytics in development mode (foreground, auto-reload, verbose logging)
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+LABEL="com.evansenter.claude-session-analytics"
+PLIST="$HOME/Library/LaunchAgents/$LABEL.plist"
+
+cd "$PROJECT_DIR"
+source .venv/bin/activate
+
+# Stop LaunchAgent if running (to free port 8081)
+LAUNCHAGENT_WAS_RUNNING=false
+if launchctl list 2>/dev/null | grep -q "$LABEL"; then
+    echo "Stopping LaunchAgent for dev mode..."
+    launchctl unload "$PLIST" 2>/dev/null
+    LAUNCHAGENT_WAS_RUNNING=true
+    osascript -e 'display notification "Stopped for dev mode" with title "Session Analytics"' 2>/dev/null
+fi
+
+# Restart LaunchAgent on exit
+cleanup() {
+    if [[ "$LAUNCHAGENT_WAS_RUNNING" == "true" && -f "$PLIST" ]]; then
+        echo ""
+        echo "Restarting LaunchAgent..."
+        launchctl load "$PLIST"
+        osascript -e 'display notification "LaunchAgent restarted" with title "Session Analytics"' 2>/dev/null
+    fi
+}
+trap cleanup EXIT
+
+echo "Starting session analytics in dev mode (Ctrl+C to stop)..."
+echo "Add to Claude Code: claude mcp add --transport http --scope user session-analytics http://127.0.0.1:8081/mcp"
+echo ""
+
+# DEV_MODE enables verbose logging
+DEV_MODE=1 uvicorn session_analytics.server:create_app --host 127.0.0.1 --port 8081 --reload --factory
diff --git a/scripts/install-launchagent.sh b/scripts/install-launchagent.sh
new file mode 100755
index 0000000..40c9398
--- /dev/null
+++ b/scripts/install-launchagent.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Install the session analytics server as a macOS LaunchAgent (auto-starts on login)
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+VENV_PYTHON="$PROJECT_DIR/.venv/bin/python"
+PLIST_TEMPLATE="$SCRIPT_DIR/com.evansenter.claude-session-analytics.plist"
+PLIST_DEST="$HOME/Library/LaunchAgents/com.evansenter.claude-session-analytics.plist"
+LABEL="com.evansenter.claude-session-analytics"
+
+# Check venv exists
+if [[ ! -f "$VENV_PYTHON" ]]; then
+    echo "Error: Virtual environment not found at $PROJECT_DIR/.venv"
+    echo "Run: python3 -m venv .venv && source .venv/bin/activate && pip install -e ."
+    exit 1
+fi
+
+# Create LaunchAgents directory if needed
+mkdir -p "$HOME/Library/LaunchAgents"
+mkdir -p "$HOME/.claude"
+
+# Stop existing service if running
+if launchctl list | grep -q "$LABEL"; then
+    echo "Stopping existing service..."
+    launchctl unload "$PLIST_DEST" 2>/dev/null || true
+fi
+
+# Generate plist with correct paths
+echo "Installing LaunchAgent..."
+sed -e "s|__VENV_PYTHON__|$VENV_PYTHON|g" \
+    -e "s|__PROJECT_DIR__|$PROJECT_DIR|g" \
+    -e "s|__HOME__|$HOME|g" \
+    "$PLIST_TEMPLATE" > "$PLIST_DEST"
+
+# Load the service
+echo "Starting service..."
+launchctl load "$PLIST_DEST"
+
+# Verify it's running
+sleep 1
+if launchctl list | grep -q "$LABEL"; then
+    echo ""
+    echo "Session analytics installed and running!"
+    echo "  Logs: ~/.claude/session-analytics.log"
+    echo "  Errors: ~/.claude/session-analytics.err"
+    echo ""
+    echo "To uninstall: $SCRIPT_DIR/uninstall-launchagent.sh"
+    osascript -e 'display notification "LaunchAgent installed and running" with title "Session Analytics"' 2>/dev/null
+else
+    echo "Error: Service failed to start. Check ~/.claude/session-analytics.err"
+    osascript -e 'display notification "Failed to start - check logs" with title "Session Analytics" sound name "Basso"' 2>/dev/null
+    exit 1
+fi
diff --git a/scripts/uninstall-launchagent.sh b/scripts/uninstall-launchagent.sh
new file mode 100755
index 0000000..9e556ed
--- /dev/null
+++ b/scripts/uninstall-launchagent.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# Uninstall the session analytics LaunchAgent
+
+set -e
+
+PLIST_DEST="$HOME/Library/LaunchAgents/com.evansenter.claude-session-analytics.plist"
+LABEL="com.evansenter.claude-session-analytics"
+
+if [[ ! -f "$PLIST_DEST" ]]; then
+    echo "LaunchAgent not installed."
+    exit 0
+fi
+
+echo "Stopping service..."
+launchctl unload "$PLIST_DEST" 2>/dev/null || true
+
+echo "Removing plist..."
+rm -f "$PLIST_DEST"
+
+echo "Session analytics LaunchAgent uninstalled."
+
+echo ""
+echo "Note: Logs remain at ~/.claude/session-analytics.log"
+osascript -e 'display notification "LaunchAgent uninstalled" with title "Session Analytics"' 2>/dev/null
diff --git a/src/session_analytics/__init__.py b/src/session_analytics/__init__.py
index e69de29..345cbea 100644
--- a/src/session_analytics/__init__.py
+++ b/src/session_analytics/__init__.py
@@ -0,0 +1,3 @@
+"""Claude Session Analytics - MCP server for queryable session log analytics."""
+
+__version__ = "0.1.0"
diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md
new file mode 100644
index 0000000..1fe271b
--- /dev/null
+++ b/src/session_analytics/guide.md
@@ -0,0 +1,68 @@
+# Session Analytics Usage Guide
+
+This MCP server provides queryable analytics on Claude Code session logs.
+
+## Quick Start
+
+The server auto-refreshes data when queries detect stale data (>5 min old).
+You can also manually trigger ingestion:
+
+```
+ingest_logs(days=7)  # Process last 7 days of logs
+```
+
+## Available Tools
+
+### Ingestion
+
+| Tool | Purpose |
+|------|---------|
+| `ingest_logs` | Refresh data from JSONL files |
+| `get_status` | Ingestion status + DB stats |
+
+### Queries
+
+| Tool | Purpose |
+|------|---------|
+| `query_timeline` | Events in time window |
+| `query_tool_frequency` | Tool usage counts |
+| `query_commands` | Bash command breakdown |
+| `query_sequences` | Common tool patterns |
+| `query_permission_gaps` | Commands needing settings.json |
+| `query_sessions` | Session metadata |
+| `query_tokens` | Token usage analysis |
+| `get_insights` | Pre-computed patterns |
+
+## Common Patterns
+
+### Understanding tool usage
+
+```
+query_tool_frequency(days=30)
+```
+
+### Finding permission gaps
+
+```
+query_permission_gaps(threshold=5)  # Commands used 5+ times that need permission
+```
+
+### Analyzing workflows
+
+```
+query_sequences(min_count=3, length=3)  # Common 3-tool sequences
+```
+
+## Integration with /improve-workflow
+
+The `get_insights` tool returns pre-computed patterns specifically for
+the `/improve-workflow` command:
+
+```
+get_insights(refresh=True)  # Force fresh analysis
+```
+
+## Data Location
+
+- Database: `~/.claude/contrib/analytics/data.db`
+- Logs parsed from: `~/.claude/projects/**/*.jsonl`
diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py
new file mode 100644
index 0000000..5ce5ea2
--- /dev/null
+++ b/src/session_analytics/server.py
@@ -0,0 +1,126 @@
+"""MCP Session Analytics Server.
+
+Provides tools for querying Claude Code session logs:
+- ingest_logs: Refresh data from JSONL files
+- query_timeline: Events in time window
+- query_tool_frequency: Tool usage counts
+- query_commands: Bash command breakdown
+- query_sequences: Common tool patterns
+- query_permission_gaps: Commands needing settings.json
+- query_sessions: Session metadata
+- query_tokens: Token usage analysis
+- get_insights: Pre-computed patterns for /improve-workflow
+- get_status: Ingestion status + DB stats
+"""
+
+import logging
+import os
+from pathlib import Path
+
+from fastmcp import FastMCP
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%H:%M:%S",
+)
+logger = logging.getLogger("session-analytics")
+if os.environ.get("DEV_MODE"):
+    logger.setLevel(logging.DEBUG)
+
+# Initialize MCP server
+mcp = FastMCP("session-analytics")
+
+
+@mcp.resource("session-analytics://guide", description="Usage guide and best practices")
+def usage_guide() -> str:
+    """Return the session analytics usage guide from external markdown file."""
+    guide_path = Path(__file__).parent / "guide.md"
+    try:
+        return guide_path.read_text()
+    except FileNotFoundError:
+        return "# Session Analytics Usage Guide\n\nGuide file not found. See CLAUDE.md for usage."
+
+
+@mcp.tool()
+def get_status() -> dict:
+    """Get ingestion status and database stats.
+
+    Returns:
+        Status info including last ingestion time, event count, and DB size
+    """
+    # Placeholder - will be implemented in Phase 2
+    return {
+        "status": "ok",
+        "version": "0.1.0",
+        "message": "Session analytics server is running. Storage layer not yet implemented.",
+        "db_path": str(Path.home() / ".claude" / "contrib" / "analytics" / "data.db"),
+    }
+
+
+@mcp.tool()
+def ingest_logs(days: int = 7, project: str | None = None, force: bool = False) -> dict:
+    """Refresh data from JSONL session log files.
+
+    Args:
+        days: Number of days to look back (default: 7)
+        project: Optional project path filter
+        force: Force re-ingestion even if data is fresh
+
+    Returns:
+        Ingestion stats (files processed, entries added, etc.)
+    """
+    # Placeholder - will be implemented in Phase 3
+    return {
+        "status": "not_implemented",
+        "message": "Ingestion will be implemented in Phase 3",
+        "days": days,
+        "project": project,
+        "force": force,
+    }
+
+
+@mcp.tool()
+def query_tool_frequency(days: int = 7, project: str | None = None) -> dict:
+    """Get tool usage frequency counts.
+
+    Args:
+        days: Number of days to analyze (default: 7)
+        project: Optional project path filter
+
+    Returns:
+        Tool frequency breakdown
+    """
+    # Placeholder - will be implemented in Phase 4
+    return {
+        "status": "not_implemented",
+        "message": "Query will be implemented in Phase 4",
+        "days": days,
+        "project": project,
+    }
+
+
+def create_app():
+    """Create the ASGI app for uvicorn."""
+    # stateless_http=True allows resilience to server restarts
+    return mcp.http_app(stateless_http=True)
+
+
+def main():
+    """Run the MCP server."""
+    import uvicorn
+
+    port = int(os.environ.get("PORT", 8081))
+    host = os.environ.get("HOST", "127.0.0.1")
+
+    print(f"Starting Claude Session Analytics on {host}:{port}")
+    print(
+        f"Add to Claude Code: claude mcp add --transport http --scope user session-analytics http://{host}:{port}/mcp"
+    )
+
+    uvicorn.run(create_app(), host=host, port=port)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29..b76b24c 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for claude-session-analytics."""
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..83cca08
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,25 @@
+"""Pytest configuration and fixtures."""
+
+import pytest
+
+
+@pytest.fixture
+def sample_session_log_entry():
+    """Sample JSONL entry from a Claude Code session log."""
+    return {
+        "uuid": "test-uuid-12345",
+        "timestamp": "2025-01-01T12:00:00.000Z",
+        "sessionId": "session-abc123",
+        "type": "assistant",
+        "message": {
+            "role": "assistant",
+            "content": [
+                {
+                    "type": "tool_use",
+                    "id": "tool-123",
+                    "name": "Bash",
+                    "input": {"command": "git status", "description": "Check git status"},
+                }
+            ],
+        },
+    }
diff --git a/tests/test_server.py b/tests/test_server.py
new file mode 100644
index 0000000..cc43083
--- /dev/null
+++ b/tests/test_server.py
@@ -0,0 +1,27 @@
+"""Tests for the MCP server."""
+
+from session_analytics.server import get_status, ingest_logs, query_tool_frequency
+
+
+def test_get_status():
+    """Test that get_status returns expected fields."""
+    # FastMCP wraps functions - access the underlying fn
+    result = get_status.fn()
+    assert result["status"] == "ok"
+    assert "version" in result
+    assert "db_path" in result
+
+
+def test_ingest_logs_placeholder():
+    """Test that ingest_logs returns placeholder response."""
+    result = ingest_logs.fn(days=7)
+    assert result["status"] == "not_implemented"
+    assert result["days"] == 7
+
+
+def test_query_tool_frequency_placeholder():
+    """Test that query_tool_frequency returns placeholder response."""
+    result = query_tool_frequency.fn(days=14, project="/some/path")
+    assert result["status"] == "not_implemented"
+    assert result["days"] == 14
+    assert result["project"] == "/some/path"

From feb498e3decdb58ab910a251109703aebd0289e9 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 04:30:26 +0000
Subject: [PATCH 02/10] Add Phase 2: SQLite storage layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- storage.py with SQLiteStorage class:
  - Events table with denormalized fields for fast queries
  - Sessions table for session metadata
  - Ingestion state tracking for incremental updates
  - Patterns table for pre-computed insights
  - Indexes on timestamp, session_id, tool_name, project_path
- Data classes: Event, Session, IngestionState, Pattern
- CRUD operations for all entities with batch insert support
- get_db_stats() for monitoring database health
- Updated server.py to use storage for get_status()
- Comprehensive test suite (16 tests)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/session_analytics/server.py  |  13 +-
 src/session_analytics/storage.py | 588 +++++++++++++++++++++++++++++++
 tests/test_server.py             |   2 +
 tests/test_storage.py            | 267 ++++++++++++++
 4 files changed, 867 insertions(+), 3 deletions(-)
 create mode 100644 src/session_analytics/storage.py
 create mode 100644 tests/test_storage.py

diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py
index 5ce5ea2..987dfeb 100644
--- a/src/session_analytics/server.py
+++ b/src/session_analytics/server.py
@@ -19,6 +19,8 @@
 
 from fastmcp import FastMCP
 
+from session_analytics.storage import SQLiteStorage
+
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -32,6 +34,9 @@
 # Initialize MCP server
 mcp = FastMCP("session-analytics")
 
+# Initialize storage
+storage = SQLiteStorage()
+
 
 @mcp.resource("session-analytics://guide", description="Usage guide and best practices")
 def usage_guide() -> str:
@@ -50,12 +55,14 @@ def get_status() -> dict:
     Returns:
         Status info including last ingestion time, event count, and DB size
     """
-    # Placeholder - will be implemented in Phase 2
+    stats = storage.get_db_stats()
+    last_ingest = storage.get_last_ingestion_time()
+
     return {
         "status": "ok",
         "version": "0.1.0",
-        "message": "Session analytics server is running. Storage layer not yet implemented.",
-        "db_path": str(Path.home() / ".claude" / "contrib" / "analytics" / "data.db"),
+        "last_ingestion": last_ingest.isoformat() if last_ingest else None,
+        **stats,
     }
 
 
diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py
new file mode 100644
index 0000000..0fa58e3
--- /dev/null
+++ b/src/session_analytics/storage.py
@@ -0,0 +1,588 @@
+"""SQLite storage backend for session analytics."""
+
+import json
+import logging
+import os
+import sqlite3
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+
+logger = logging.getLogger("session-analytics")
+
+# Register datetime adapters/converters (required for Python 3.12+)
+
+
+def _adapt_datetime(dt: datetime) -> str:
+    """Convert datetime to ISO format string for SQLite storage."""
+    return dt.isoformat()
+
+
+def _convert_datetime(data: bytes) -> datetime:
+    """Convert ISO format string from SQLite to datetime."""
+    return datetime.fromisoformat(data.decode())
+
+
+sqlite3.register_adapter(datetime, _adapt_datetime)
+sqlite3.register_converter("TIMESTAMP", _convert_datetime)
+
+
+@dataclass
+class Event:
+    """A parsed event from a Claude Code session log."""
+
+    id: int | None
+    uuid: str
+    timestamp: datetime
+    session_id: str
+    project_path: str | None = None
+    entry_type: str | None = None  # 'user', 'assistant', 'summary'
+
+    # Tool-specific (null if not a tool call)
+    tool_name: str | None = None
+    tool_input_json: str | None = None
+    tool_id: str | None = None
+    is_error: bool = False
+
+    # Denormalized for common filters
+    command: str | None = None  # Bash: first word
+    command_args: str | None = None  # Bash: remaining args
+    file_path: str | None = None  # Read/Edit/Write target
+    skill_name: str | None = None  # Skill invocation
+
+    # Token tracking
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cache_read_tokens: int | None = None
+    cache_creation_tokens: int | None = None
+    model: str | None = None
+
+    # Context
+    git_branch: str | None = None
+    cwd: str | None = None
+
+
+@dataclass
+class Session:
+    """Metadata about a Claude Code session."""
+
+    id: str
+    project_path: str | None = None
+    first_seen: datetime | None = None
+    last_seen: datetime | None = None
+    entry_count: int = 0
+    tool_use_count: int = 0
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    primary_branch: str | None = None
+    slug: str | None = None
+
+
+@dataclass
+class IngestionState:
+    """Tracks the ingestion state of a JSONL file."""
+
+    file_path: str
+    file_size: int
+    last_modified: datetime
+    entries_processed: int
+    last_processed: datetime
+
+
+@dataclass
+class Pattern:
+    """A pre-computed pattern for fast querying."""
+
+    id: int | None
+    pattern_type: str  # 'tool_frequency', 'sequence', 'permission_gap', etc.
+    pattern_key: str  # e.g., "Bash" or "Read → Edit"
+    count: int = 0
+    last_seen: datetime | None = None
+    metadata: dict = field(default_factory=dict)
+    computed_at: datetime | None = None
+
+
+# Default database path
+DEFAULT_DB_PATH = Path.home() / ".claude" / "contrib" / "analytics" / "data.db"
+
+# Schema version for migrations
+SCHEMA_VERSION = 1
+
+
+class SQLiteStorage:
+    """SQLite-backed storage for session analytics."""
+
+    def __init__(self, db_path: str | Path | None = None):
+        """Initialize storage with optional custom DB path."""
+        if db_path is None:
+            db_path = os.environ.get("SESSION_ANALYTICS_DB", str(DEFAULT_DB_PATH))
+
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+
+        self._init_db()
+
+    @contextmanager
+    def _connect(self):
+        """Context manager for database connections."""
+        conn = sqlite3.connect(
+            self.db_path,
+            detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES,
+        )
+        conn.row_factory = sqlite3.Row
+        try:
+            yield conn
+            conn.commit()
+        finally:
+            conn.close()
+
+    def _init_db(self):
+        """Create tables if they don't exist."""
+        with self._connect() as conn:
+            # Schema version tracking
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS schema_version (
+                    version INTEGER PRIMARY KEY
+                )
+            """)
+
+            # Core events table (denormalized for fast queries)
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS events (
+                    id INTEGER PRIMARY KEY,
+                    uuid TEXT NOT NULL,
+                    timestamp TIMESTAMP NOT NULL,
+                    session_id TEXT NOT NULL,
+                    project_path TEXT,
+                    entry_type TEXT,
+
+                    -- Tool-specific
+                    tool_name TEXT,
+                    tool_input_json TEXT,
+                    tool_id TEXT,
+                    is_error INTEGER DEFAULT 0,
+
+                    -- Denormalized for common filters
+                    command TEXT,
+                    command_args TEXT,
+                    file_path TEXT,
+                    skill_name TEXT,
+
+                    -- Token tracking
+                    input_tokens INTEGER,
+                    output_tokens INTEGER,
+                    cache_read_tokens INTEGER,
+                    cache_creation_tokens INTEGER,
+                    model TEXT,
+
+                    -- Context
+                    git_branch TEXT,
+                    cwd TEXT,
+
+                    UNIQUE(session_id, uuid)
+                )
+            """)
+
+            # Indexes for common queries
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_events_timestamp ON events(timestamp)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_events_tool ON events(tool_name)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_events_project ON events(project_path)")
+
+            # Sessions metadata
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS sessions (
+                    id TEXT PRIMARY KEY,
+                    project_path TEXT,
+                    first_seen TIMESTAMP,
+                    last_seen TIMESTAMP,
+                    entry_count INTEGER DEFAULT 0,
+                    tool_use_count INTEGER DEFAULT 0,
+                    total_input_tokens INTEGER DEFAULT 0,
+                    total_output_tokens INTEGER DEFAULT 0,
+                    primary_branch TEXT,
+                    slug TEXT
+                )
+            """)
+
+            # Ingestion tracking (incremental updates)
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS ingestion_state (
+                    file_path TEXT PRIMARY KEY,
+                    file_size INTEGER,
+                    last_modified TIMESTAMP,
+                    entries_processed INTEGER,
+                    last_processed TIMESTAMP
+                )
+            """)
+
+            # Pre-computed patterns
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS patterns (
+                    id INTEGER PRIMARY KEY,
+                    pattern_type TEXT NOT NULL,
+                    pattern_key TEXT NOT NULL,
+                    count INTEGER DEFAULT 0,
+                    last_seen TIMESTAMP,
+                    metadata_json TEXT,
+                    computed_at TIMESTAMP,
+                    UNIQUE(pattern_type, pattern_key)
+                )
+            """)
+
+            # Set schema version
+            conn.execute(
+                "INSERT OR REPLACE INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,)
+            )
+
+    # Event operations
+
+    def add_event(self, event: Event) -> Event:
+        """Add a new event and return it with assigned ID."""
+        with self._connect() as conn:
+            cursor = conn.execute(
+                """
+                INSERT OR IGNORE INTO events (
+                    uuid, timestamp, session_id, project_path, entry_type,
+                    tool_name, tool_input_json, tool_id, is_error,
+                    command, command_args, file_path, skill_name,
+                    input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model,
+                    git_branch, cwd
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    event.uuid,
+                    event.timestamp,
+                    event.session_id,
+                    event.project_path,
+                    event.entry_type,
+                    event.tool_name,
+                    event.tool_input_json,
+                    event.tool_id,
+                    1 if event.is_error else 0,
+                    event.command,
+                    event.command_args,
+                    event.file_path,
+                    event.skill_name,
+                    event.input_tokens,
+                    event.output_tokens,
+                    event.cache_read_tokens,
+                    event.cache_creation_tokens,
+                    event.model,
+                    event.git_branch,
+                    event.cwd,
+                ),
+            )
+            event.id = cursor.lastrowid
+            return event
+
+    def add_events_batch(self, events: list[Event]) -> int:
+        """Add multiple events in a single transaction. Returns count added."""
+        with self._connect() as conn:
+            cursor = conn.executemany(
+                """
+                INSERT OR IGNORE INTO events (
+                    uuid, timestamp, session_id, project_path, entry_type,
+                    tool_name, tool_input_json, tool_id, is_error,
+                    command, command_args, file_path, skill_name,
+                    input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model,
+                    git_branch, cwd
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                [
+                    (
+                        e.uuid,
+                        e.timestamp,
+                        e.session_id,
+                        e.project_path,
+                        e.entry_type,
+                        e.tool_name,
+                        e.tool_input_json,
+                        e.tool_id,
+                        1 if e.is_error else 0,
+                        e.command,
+                        e.command_args,
+                        e.file_path,
+                        e.skill_name,
+                        e.input_tokens,
+                        e.output_tokens,
+                        e.cache_read_tokens,
+                        e.cache_creation_tokens,
+                        e.model,
+                        e.git_branch,
+                        e.cwd,
+                    )
+                    for e in events
+                ],
+            )
+            return cursor.rowcount
+
+    def get_event_count(self) -> int:
+        """Get total number of events."""
+        with self._connect() as conn:
+            row = conn.execute("SELECT COUNT(*) as count FROM events").fetchone()
+            return row["count"]
+
+    def get_events_in_range(
+        self,
+        start: datetime | None = None,
+        end: datetime | None = None,
+        tool_name: str | None = None,
+        project_path: str | None = None,
+        limit: int = 100,
+    ) -> list[Event]:
+        """Get events within a time range with optional filters."""
+        with self._connect() as conn:
+            conditions = []
+            params: list = []
+
+            if start:
+                conditions.append("timestamp >= ?")
+                params.append(start)
+            if end:
+                conditions.append("timestamp <= ?")
+                params.append(end)
+            if tool_name:
+                conditions.append("tool_name = ?")
+                params.append(tool_name)
+            if project_path:
+                conditions.append("project_path = ?")
+                params.append(project_path)
+
+            where_clause = " AND ".join(conditions) if conditions else "1=1"
+            params.append(limit)
+
+            rows = conn.execute(
+                f"""
+                SELECT * FROM events
+                WHERE {where_clause}
+                ORDER BY timestamp DESC
+                LIMIT ?
+                """,
+                params,
+            ).fetchall()
+
+            return [self._row_to_event(row) for row in rows]
+
+    def _row_to_event(self, row: sqlite3.Row) -> Event:
+        """Convert a database row to an Event object."""
+        return Event(
+            id=row["id"],
+            uuid=row["uuid"],
+            timestamp=row["timestamp"],
+            session_id=row["session_id"],
+            project_path=row["project_path"],
+            entry_type=row["entry_type"],
+            tool_name=row["tool_name"],
+            tool_input_json=row["tool_input_json"],
+            tool_id=row["tool_id"],
+            is_error=bool(row["is_error"]),
+            command=row["command"],
+            command_args=row["command_args"],
+            file_path=row["file_path"],
+            skill_name=row["skill_name"],
+            input_tokens=row["input_tokens"],
+            output_tokens=row["output_tokens"],
+            cache_read_tokens=row["cache_read_tokens"],
+            cache_creation_tokens=row["cache_creation_tokens"],
+            model=row["model"],
+            git_branch=row["git_branch"],
+            cwd=row["cwd"],
+        )
+
+    # Session operations
+
+    def upsert_session(self, session: Session) -> None:
+        """Add or update a session."""
+        with self._connect() as conn:
+            conn.execute(
+                """
+                INSERT OR REPLACE INTO sessions (
+                    id, project_path, first_seen, last_seen,
+                    entry_count, tool_use_count,
+                    total_input_tokens, total_output_tokens,
+                    primary_branch, slug
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    session.id,
+                    session.project_path,
+                    session.first_seen,
+                    session.last_seen,
+                    session.entry_count,
+                    session.tool_use_count,
+                    session.total_input_tokens,
+                    session.total_output_tokens,
+                    session.primary_branch,
+                    session.slug,
+                ),
+            )
+
+    def get_session(self, session_id: str) -> Session | None:
+        """Get a session by ID."""
+        with self._connect() as conn:
+            row = conn.execute("SELECT * FROM sessions WHERE id = ?", (session_id,)).fetchone()
+            if row:
+                return self._row_to_session(row)
+            return None
+
+    def get_session_count(self) -> int:
+        """Get total number of sessions."""
+        with self._connect() as conn:
+            row = conn.execute("SELECT COUNT(*) as count FROM sessions").fetchone()
+            return row["count"]
+
+    def _row_to_session(self, row: sqlite3.Row) -> Session:
+        """Convert a database row to a Session object."""
+        return Session(
+            id=row["id"],
+            project_path=row["project_path"],
+            first_seen=row["first_seen"],
+            last_seen=row["last_seen"],
+            entry_count=row["entry_count"],
+            tool_use_count=row["tool_use_count"],
+            total_input_tokens=row["total_input_tokens"],
+            total_output_tokens=row["total_output_tokens"],
+            primary_branch=row["primary_branch"],
+            slug=row["slug"],
+        )
+
+    # Ingestion state operations
+
+    def get_ingestion_state(self, file_path: str) -> IngestionState | None:
+        """Get ingestion state for a file."""
+        with self._connect() as conn:
+            row = conn.execute(
+                "SELECT * FROM ingestion_state WHERE file_path = ?", (file_path,)
+            ).fetchone()
+            if row:
+                return IngestionState(
+                    file_path=row["file_path"],
+                    file_size=row["file_size"],
+                    last_modified=row["last_modified"],
+                    entries_processed=row["entries_processed"],
+                    last_processed=row["last_processed"],
+                )
+            return None
+
+    def update_ingestion_state(self, state: IngestionState) -> None:
+        """Update ingestion state for a file."""
+        with self._connect() as conn:
+            conn.execute(
+                """
+                INSERT OR REPLACE INTO ingestion_state (
+                    file_path, file_size, last_modified, entries_processed, last_processed
+                ) VALUES (?, ?, ?, ?, ?)
+                """,
+                (
+                    state.file_path,
+                    state.file_size,
+                    state.last_modified,
+                    state.entries_processed,
+                    state.last_processed,
+                ),
+            )
+
+    def get_last_ingestion_time(self) -> datetime | None:
+        """Get the most recent ingestion time across all files."""
+        with self._connect() as conn:
+            row = conn.execute("SELECT MAX(last_processed) as last FROM ingestion_state").fetchone()
+            if not row or not row["last"]:
+                return None
+            # Handle both datetime objects and ISO strings (SQLite aggregates return strings)
+            val = row["last"]
+            return datetime.fromisoformat(val) if isinstance(val, str) else val
+
+    # Pattern operations
+
+    def upsert_pattern(self, pattern: Pattern) -> None:
+        """Add or update a pattern."""
+        with self._connect() as conn:
+            conn.execute(
+                """
+                INSERT OR REPLACE INTO patterns (
+                    pattern_type, pattern_key, count, last_seen, metadata_json, computed_at
+                ) VALUES (?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    pattern.pattern_type,
+                    pattern.pattern_key,
+                    pattern.count,
+                    pattern.last_seen,
+                    json.dumps(pattern.metadata) if pattern.metadata else None,
+                    pattern.computed_at,
+                ),
+            )
+
+    def get_patterns(self, pattern_type: str | None = None) -> list[Pattern]:
+        """Get patterns, optionally filtered by type."""
+        with self._connect() as conn:
+            if pattern_type:
+                rows = conn.execute(
+                    "SELECT * FROM patterns WHERE pattern_type = ? ORDER BY count DESC",
+                    (pattern_type,),
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    "SELECT * FROM patterns ORDER BY pattern_type, count DESC"
+                ).fetchall()
+
+            return [
+                Pattern(
+                    id=row["id"],
+                    pattern_type=row["pattern_type"],
+                    pattern_key=row["pattern_key"],
+                    count=row["count"],
+                    last_seen=row["last_seen"],
+                    metadata=json.loads(row["metadata_json"]) if row["metadata_json"] else {},
+                    computed_at=row["computed_at"],
+                )
+                for row in rows
+            ]
+
+    def clear_patterns(self, pattern_type: str | None = None) -> int:
+        """Clear patterns, optionally filtered by type. Returns count deleted."""
+        with self._connect() as conn:
+            if pattern_type:
+                cursor = conn.execute(
+                    "DELETE FROM patterns WHERE pattern_type = ?", (pattern_type,)
+                )
+            else:
+                cursor = conn.execute("DELETE FROM patterns")
+            return cursor.rowcount
+
+    # Utility operations
+
+    def get_db_stats(self) -> dict:
+        """Get database statistics."""
+        with self._connect() as conn:
+            event_count = conn.execute("SELECT COUNT(*) FROM events").fetchone()[0]
+            session_count = conn.execute("SELECT COUNT(*) FROM sessions").fetchone()[0]
+            pattern_count = conn.execute("SELECT COUNT(*) FROM patterns").fetchone()[0]
+            file_count = conn.execute("SELECT COUNT(*) FROM ingestion_state").fetchone()[0]
+
+            # Get date range
+            date_range = conn.execute(
+                "SELECT MIN(timestamp) as min_ts, MAX(timestamp) as max_ts FROM events"
+            ).fetchone()
+
+            # Get DB file size
+            db_size = self.db_path.stat().st_size if self.db_path.exists() else 0
+
+            # Helper to convert datetime or string to ISO string
+            def to_iso(val):
+                if val is None:
+                    return None
+                return val if isinstance(val, str) else val.isoformat()
+
+            return {
+                "event_count": event_count,
+                "session_count": session_count,
+                "pattern_count": pattern_count,
+                "files_processed": file_count,
+                "earliest_event": to_iso(date_range["min_ts"]),
+                "latest_event": to_iso(date_range["max_ts"]),
+                "db_size_bytes": db_size,
+                "db_path": str(self.db_path),
+            }
diff --git a/tests/test_server.py b/tests/test_server.py
index cc43083..d05e030 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -10,6 +10,8 @@ def test_get_status():
     assert result["status"] == "ok"
     assert "version" in result
     assert "db_path" in result
+    assert "event_count" in result
+    assert "session_count" in result
 
 
 def test_ingest_logs_placeholder():
diff --git a/tests/test_storage.py b/tests/test_storage.py
new file mode 100644
index 0000000..9c8519f
--- /dev/null
+++ b/tests/test_storage.py
@@ -0,0 +1,267 @@
+"""Tests for the SQLite storage layer."""
+
+import tempfile
+from datetime import datetime
+from pathlib import Path
+
+import pytest
+
+from session_analytics.storage import (
+    Event,
+    IngestionState,
+    Pattern,
+    Session,
+    SQLiteStorage,
+)
+
+
+@pytest.fixture
+def storage():
+    """Create a temporary storage instance for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "test.db"
+        yield SQLiteStorage(db_path)
+
+
+@pytest.fixture
+def sample_event():
+    """Create a sample event for testing."""
+    return Event(
+        id=None,
+        uuid="test-uuid-12345",
+        timestamp=datetime(2025, 1, 1, 12, 0, 0),
+        session_id="session-abc123",
+        project_path="/encoded/project/path",
+        entry_type="assistant",
+        tool_name="Bash",
+        tool_input_json='{"command": "git status"}',
+        tool_id="tool-123",
+        is_error=False,
+        command="git",
+        command_args="status",
+    )
+
+
+class TestEventOperations:
+    """Tests for event CRUD operations."""
+
+    def test_add_event(self, storage, sample_event):
+        """Test adding a single event."""
+        result = storage.add_event(sample_event)
+        assert result.id is not None
+        assert result.uuid == sample_event.uuid
+
+    def test_add_event_dedup(self, storage, sample_event):
+        """Test that duplicate events are ignored."""
+        storage.add_event(sample_event)
+        storage.add_event(sample_event)  # Same uuid + session_id
+        assert storage.get_event_count() == 1
+
+    def test_add_events_batch(self, storage):
+        """Test adding multiple events in batch."""
+        events = [
+            Event(
+                id=None,
+                uuid=f"uuid-{i}",
+                timestamp=datetime(2025, 1, 1, 12, i, 0),
+                session_id="session-1",
+            )
+            for i in range(5)
+        ]
+        count = storage.add_events_batch(events)
+        assert count == 5
+        assert storage.get_event_count() == 5
+
+    def test_get_events_in_range(self, storage):
+        """Test filtering events by time range."""
+        # Add events across different times
+        for i in range(5):
+            storage.add_event(
+                Event(
+                    id=None,
+                    uuid=f"uuid-{i}",
+                    timestamp=datetime(2025, 1, i + 1, 12, 0, 0),
+                    session_id="session-1",
+                )
+            )
+
+        # Query a subset (start/end are inclusive, events are at 12:00)
+        events = storage.get_events_in_range(
+            start=datetime(2025, 1, 2, 0, 0, 0),
+            end=datetime(2025, 1, 4, 23, 59, 59),
+        )
+        assert len(events) == 3
+
+    def test_get_events_by_tool(self, storage):
+        """Test filtering events by tool name."""
+        storage.add_event(
+            Event(
+                id=None,
+                uuid="uuid-1",
+                timestamp=datetime.now(),
+                session_id="s1",
+                tool_name="Bash",
+            )
+        )
+        storage.add_event(
+            Event(
+                id=None,
+                uuid="uuid-2",
+                timestamp=datetime.now(),
+                session_id="s1",
+                tool_name="Read",
+            )
+        )
+
+        bash_events = storage.get_events_in_range(tool_name="Bash")
+        assert len(bash_events) == 1
+        assert bash_events[0].tool_name == "Bash"
+
+
+class TestSessionOperations:
+    """Tests for session CRUD operations."""
+
+    def test_upsert_session(self, storage):
+        """Test adding and updating a session."""
+        session = Session(
+            id="session-1",
+            project_path="/test/project",
+            first_seen=datetime(2025, 1, 1),
+            last_seen=datetime(2025, 1, 1),
+            entry_count=10,
+        )
+        storage.upsert_session(session)
+
+        retrieved = storage.get_session("session-1")
+        assert retrieved is not None
+        assert retrieved.entry_count == 10
+
+        # Update
+        session.entry_count = 20
+        storage.upsert_session(session)
+
+        retrieved = storage.get_session("session-1")
+        assert retrieved.entry_count == 20
+
+    def test_get_session_count(self, storage):
+        """Test counting sessions."""
+        for i in range(3):
+            storage.upsert_session(Session(id=f"session-{i}"))
+        assert storage.get_session_count() == 3
+
+
+class TestIngestionState:
+    """Tests for ingestion state tracking."""
+
+    def test_update_and_get_ingestion_state(self, storage):
+        """Test tracking file ingestion state."""
+        state = IngestionState(
+            file_path="/path/to/file.jsonl",
+            file_size=1024,
+            last_modified=datetime(2025, 1, 1),
+            entries_processed=100,
+            last_processed=datetime(2025, 1, 1, 12, 0),
+        )
+        storage.update_ingestion_state(state)
+
+        retrieved = storage.get_ingestion_state("/path/to/file.jsonl")
+        assert retrieved is not None
+        assert retrieved.file_size == 1024
+        assert retrieved.entries_processed == 100
+
+    def test_get_last_ingestion_time(self, storage):
+        """Test getting most recent ingestion time."""
+        storage.update_ingestion_state(
+            IngestionState(
+                file_path="/file1.jsonl",
+                file_size=100,
+                last_modified=datetime(2025, 1, 1),
+                entries_processed=10,
+                last_processed=datetime(2025, 1, 1, 10, 0),
+            )
+        )
+        storage.update_ingestion_state(
+            IngestionState(
+                file_path="/file2.jsonl",
+                file_size=200,
+                last_modified=datetime(2025, 1, 2),
+                entries_processed=20,
+                last_processed=datetime(2025, 1, 2, 10, 0),  # More recent
+            )
+        )
+
+        last_time = storage.get_last_ingestion_time()
+        assert last_time == datetime(2025, 1, 2, 10, 0)
+
+
+class TestPatternOperations:
+    """Tests for pattern CRUD operations."""
+
+    def test_upsert_pattern(self, storage):
+        """Test adding and updating patterns."""
+        pattern = Pattern(
+            id=None,
+            pattern_type="tool_frequency",
+            pattern_key="Bash",
+            count=100,
+            last_seen=datetime(2025, 1, 1),
+            metadata={"avg_duration": 1.5},
+        )
+        storage.upsert_pattern(pattern)
+
+        patterns = storage.get_patterns("tool_frequency")
+        assert len(patterns) == 1
+        assert patterns[0].count == 100
+        assert patterns[0].metadata["avg_duration"] == 1.5
+
+    def test_get_patterns_by_type(self, storage):
+        """Test filtering patterns by type."""
+        storage.upsert_pattern(
+            Pattern(id=None, pattern_type="tool_frequency", pattern_key="Bash", count=50)
+        )
+        storage.upsert_pattern(
+            Pattern(id=None, pattern_type="sequence", pattern_key="Read→Edit", count=30)
+        )
+
+        tool_patterns = storage.get_patterns("tool_frequency")
+        assert len(tool_patterns) == 1
+
+        all_patterns = storage.get_patterns()
+        assert len(all_patterns) == 2
+
+    def test_clear_patterns(self, storage):
+        """Test clearing patterns."""
+        storage.upsert_pattern(
+            Pattern(id=None, pattern_type="tool_frequency", pattern_key="Bash", count=50)
+        )
+        storage.upsert_pattern(
+            Pattern(id=None, pattern_type="sequence", pattern_key="Read→Edit", count=30)
+        )
+
+        # Clear just one type
+        deleted = storage.clear_patterns("tool_frequency")
+        assert deleted == 1
+        assert len(storage.get_patterns()) == 1
+
+        # Clear all
+        storage.upsert_pattern(
+            Pattern(id=None, pattern_type="tool_frequency", pattern_key="Read", count=40)
+        )
+        deleted = storage.clear_patterns()
+        assert deleted == 2
+
+
+class TestDbStats:
+    """Tests for database statistics."""
+
+    def test_get_db_stats(self, storage, sample_event):
+        """Test getting database statistics."""
+        storage.add_event(sample_event)
+        storage.upsert_session(Session(id="session-1"))
+        storage.upsert_pattern(Pattern(id=None, pattern_type="test", pattern_key="key", count=1))
+
+        stats = storage.get_db_stats()
+        assert stats["event_count"] == 1
+        assert stats["session_count"] == 1
+        assert stats["pattern_count"] == 1
+        assert stats["db_path"] is not None

From 39e935eb45a9dab21234084ebb7e0101afde5c6c Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 04:37:00 +0000
Subject: [PATCH 03/10] Add Phase 3: JSONL ingestion module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements log file discovery and parsing:
- find_log_files(): Discovers JSONL files within date range
- parse_tool_use(): Extracts tool info (command, file_path, skill_name)
- parse_entry(): Parses entries into Event objects
- ingest_file(): Incremental ingestion with mtime/size tracking
- ingest_logs(): Full ingestion orchestration
- update_session_stats(): Aggregates session statistics

Integrates with server.py to provide real data for ingest_logs tool.

Closes #3

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/session_analytics/ingest.py | 452 ++++++++++++++++++++++++++++++++
 src/session_analytics/server.py |  10 +-
 tests/test_ingest.py            | 316 ++++++++++++++++++++++
 tests/test_server.py            |  11 +-
 4 files changed, 778 insertions(+), 11 deletions(-)
 create mode 100644 src/session_analytics/ingest.py
 create mode 100644 tests/test_ingest.py

diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py
new file mode 100644
index 0000000..6ab5ad8
--- /dev/null
+++ b/src/session_analytics/ingest.py
@@ -0,0 +1,452 @@
+"""JSONL log ingestion for Claude Code session analytics."""
+
+import json
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+
+from session_analytics.storage import Event, IngestionState, Session, SQLiteStorage
+
+logger = logging.getLogger("session-analytics")
+
+# Default location for Claude Code session logs
+DEFAULT_LOGS_DIR = Path.home() / ".claude" / "projects"
+
+
+def find_log_files(
+    logs_dir: Path = DEFAULT_LOGS_DIR,
+    days: int = 7,
+    project_filter: str | None = None,
+) -> list[Path]:
+    """Find JSONL log files within the specified time range.
+
+    Args:
+        logs_dir: Directory containing project subdirectories
+        days: Only include files modified within this many days
+        project_filter: Optional project path to filter (encoded form)
+
+    Returns:
+        List of JSONL file paths, sorted by modification time (newest first)
+    """
+    if not logs_dir.exists():
+        logger.warning(f"Logs directory does not exist: {logs_dir}")
+        return []
+
+    cutoff = datetime.now() - timedelta(days=days)
+    files = []
+
+    for project_dir in logs_dir.iterdir():
+        if not project_dir.is_dir():
+            continue
+
+        # Apply project filter if specified
+        if project_filter and project_filter not in project_dir.name:
+            continue
+
+        for jsonl_file in project_dir.glob("*.jsonl"):
+            try:
+                mtime = datetime.fromtimestamp(jsonl_file.stat().st_mtime)
+                if mtime >= cutoff:
+                    files.append((jsonl_file, mtime))
+            except OSError as e:
+                logger.warning(f"Could not stat {jsonl_file}: {e}")
+
+    # Sort by modification time, newest first
+    files.sort(key=lambda x: x[1], reverse=True)
+    return [f for f, _ in files]
+
+
+def parse_tool_use(tool_use: dict) -> dict:
+    """Extract normalized fields from a tool_use block.
+
+    Returns dict with: tool_name, tool_id, tool_input_json, command, command_args,
+    file_path, skill_name
+    """
+    result = {
+        "tool_name": tool_use.get("name"),
+        "tool_id": tool_use.get("id"),
+        "tool_input_json": json.dumps(tool_use.get("input", {})),
+        "command": None,
+        "command_args": None,
+        "file_path": None,
+        "skill_name": None,
+    }
+
+    tool_input = tool_use.get("input", {})
+    tool_name = result["tool_name"]
+
+    # Extract Bash command info
+    if tool_name == "Bash":
+        cmd = tool_input.get("command", "")
+        if cmd:
+            parts = cmd.split(None, 1)
+            result["command"] = parts[0] if parts else None
+            result["command_args"] = parts[1] if len(parts) > 1 else None
+
+    # Extract file path for file operations
+    elif tool_name in ("Read", "Edit", "Write", "Glob", "Grep"):
+        result["file_path"] = tool_input.get("file_path") or tool_input.get("path")
+
+    # Extract skill name
+    elif tool_name == "Skill":
+        result["skill_name"] = tool_input.get("skill")
+
+    # Handle MCP tools (e.g., mcp__event-bus__register_session)
+    elif tool_name and tool_name.startswith("mcp__"):
+        # Keep the full name for MCP tools
+        pass
+
+    return result
+
+
+def parse_entry(raw: dict, project_path: str) -> list[Event]:
+    """Parse a single JSONL entry into Event objects.
+
+    An entry may produce multiple events (e.g., assistant with multiple tool_use blocks).
+
+    Args:
+        raw: Parsed JSON object from JSONL
+        project_path: Encoded project path from directory name
+
+    Returns:
+        List of Event objects (may be empty for skipped entries)
+    """
+    entry_type = raw.get("type")
+
+    # Skip certain entry types that don't contain useful analytics data
+    if entry_type in ("file-history-snapshot", "queue-operation", "create"):
+        return []
+
+    # Skip thinking/text blocks that are nested content
+    if entry_type in ("thinking", "text", "tool_use", "tool_result", "message"):
+        return []
+
+    uuid = raw.get("uuid")
+    session_id = raw.get("sessionId")
+    timestamp_str = raw.get("timestamp")
+
+    # Skip entries without required fields
+    if not uuid or not session_id or not timestamp_str:
+        return []
+
+    try:
+        timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
+        # Convert to naive datetime (remove timezone for SQLite compatibility)
+        timestamp = timestamp.replace(tzinfo=None)
+    except (ValueError, AttributeError):
+        logger.debug(f"Could not parse timestamp: {timestamp_str}")
+        return []
+
+    # Extract common fields
+    cwd = raw.get("cwd")
+    git_branch = raw.get("gitBranch")
+
+    # Extract token usage from assistant messages
+    message = raw.get("message", {})
+    usage = message.get("usage", {})
+    input_tokens = usage.get("input_tokens")
+    output_tokens = usage.get("output_tokens")
+    cache_read_tokens = usage.get("cache_read_input_tokens")
+    cache_creation_tokens = usage.get("cache_creation_input_tokens")
+    model = message.get("model")
+
+    events = []
+
+    # Handle assistant entries with tool_use blocks
+    if entry_type == "assistant":
+        content = message.get("content", [])
+        tool_uses = [c for c in content if isinstance(c, dict) and c.get("type") == "tool_use"]
+
+        if tool_uses:
+            # Create an event for each tool_use
+            for tool_use in tool_uses:
+                parsed = parse_tool_use(tool_use)
+                events.append(
+                    Event(
+                        id=None,
+                        uuid=f"{uuid}:{parsed['tool_id']}",  # Unique per tool_use
+                        timestamp=timestamp,
+                        session_id=session_id,
+                        project_path=project_path,
+                        entry_type="tool_use",
+                        tool_name=parsed["tool_name"],
+                        tool_input_json=parsed["tool_input_json"],
+                        tool_id=parsed["tool_id"],
+                        is_error=False,
+                        command=parsed["command"],
+                        command_args=parsed["command_args"],
+                        file_path=parsed["file_path"],
+                        skill_name=parsed["skill_name"],
+                        input_tokens=input_tokens,
+                        output_tokens=output_tokens,
+                        cache_read_tokens=cache_read_tokens,
+                        cache_creation_tokens=cache_creation_tokens,
+                        model=model,
+                        git_branch=git_branch,
+                        cwd=cwd,
+                    )
+                )
+        else:
+            # Assistant message without tools
+            events.append(
+                Event(
+                    id=None,
+                    uuid=uuid,
+                    timestamp=timestamp,
+                    session_id=session_id,
+                    project_path=project_path,
+                    entry_type="assistant",
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    cache_read_tokens=cache_read_tokens,
+                    cache_creation_tokens=cache_creation_tokens,
+                    model=model,
+                    git_branch=git_branch,
+                    cwd=cwd,
+                )
+            )
+
+    # Handle user entries (may contain tool_result)
+    elif entry_type == "user":
+        content = message.get("content", "")
+
+        # Check if content is a list with tool_result blocks
+        if isinstance(content, list):
+            tool_results = [
+                c for c in content if isinstance(c, dict) and c.get("type") == "tool_result"
+            ]
+            if tool_results:
+                for tr in tool_results:
+                    # Check for error
+                    is_error = tr.get("is_error", False)
+                    events.append(
+                        Event(
+                            id=None,
+                            uuid=f"{uuid}:{tr.get('tool_use_id', 'result')}",
+                            timestamp=timestamp,
+                            session_id=session_id,
+                            project_path=project_path,
+                            entry_type="tool_result",
+                            tool_id=tr.get("tool_use_id"),
+                            is_error=is_error,
+                            git_branch=git_branch,
+                            cwd=cwd,
+                        )
+                    )
+            else:
+                # User message with other content types
+                events.append(
+                    Event(
+                        id=None,
+                        uuid=uuid,
+                        timestamp=timestamp,
+                        session_id=session_id,
+                        project_path=project_path,
+                        entry_type="user",
+                        git_branch=git_branch,
+                        cwd=cwd,
+                    )
+                )
+        else:
+            # Plain text user message
+            events.append(
+                Event(
+                    id=None,
+                    uuid=uuid,
+                    timestamp=timestamp,
+                    session_id=session_id,
+                    project_path=project_path,
+                    entry_type="user",
+                    git_branch=git_branch,
+                    cwd=cwd,
+                )
+            )
+
+    # Handle summary entries
+    elif entry_type == "summary":
+        events.append(
+            Event(
+                id=None,
+                uuid=uuid if uuid else f"summary:{raw.get('leafUuid', 'unknown')}",
+                timestamp=timestamp if timestamp else datetime.now(),
+                session_id=session_id if session_id else "unknown",
+                project_path=project_path,
+                entry_type="summary",
+            )
+        )
+
+    return events
+
+
+def ingest_file(
+    file_path: Path,
+    storage: SQLiteStorage,
+    force: bool = False,
+) -> dict:
+    """Ingest a single JSONL file.
+
+    Uses incremental ingestion - only processes new entries if file has changed.
+
+    Args:
+        file_path: Path to JSONL file
+        storage: Storage instance
+        force: Force re-ingestion even if file hasn't changed
+
+    Returns:
+        Stats dict with entries_processed, events_added, skipped
+    """
+    file_str = str(file_path)
+    stat = file_path.stat()
+    file_size = stat.st_size
+    file_mtime = datetime.fromtimestamp(stat.st_mtime)
+
+    # Check if we've already processed this file
+    state = storage.get_ingestion_state(file_str)
+    if state and not force:
+        # Skip if file hasn't changed
+        if state.file_size == file_size and state.last_modified >= file_mtime:
+            return {"entries_processed": 0, "events_added": 0, "skipped": True}
+
+    # Extract project path from directory name
+    project_path = file_path.parent.name
+
+    # Parse and collect events
+    events = []
+    entries_processed = 0
+    errors = 0
+
+    with open(file_path, encoding="utf-8") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+
+            try:
+                raw = json.loads(line)
+                parsed_events = parse_entry(raw, project_path)
+                events.extend(parsed_events)
+                entries_processed += 1
+            except json.JSONDecodeError as e:
+                logger.debug(f"JSON parse error in {file_path}:{line_num}: {e}")
+                errors += 1
+            except Exception as e:
+                logger.warning(f"Error processing {file_path}:{line_num}: {e}")
+                errors += 1
+
+    # Batch insert events
+    events_added = storage.add_events_batch(events) if events else 0
+
+    # Update ingestion state
+    storage.update_ingestion_state(
+        IngestionState(
+            file_path=file_str,
+            file_size=file_size,
+            last_modified=file_mtime,
+            entries_processed=entries_processed,
+            last_processed=datetime.now(),
+        )
+    )
+
+    return {
+        "entries_processed": entries_processed,
+        "events_added": events_added,
+        "skipped": False,
+        "errors": errors,
+    }
+
+
+def update_session_stats(storage: SQLiteStorage) -> int:
+    """Update session statistics from ingested events.
+
+    Returns number of sessions updated.
+    """
+    # Query distinct sessions from events
+    with storage._connect() as conn:
+        rows = conn.execute("""
+            SELECT
+                session_id,
+                project_path,
+                MIN(timestamp) as first_seen,
+                MAX(timestamp) as last_seen,
+                COUNT(*) as entry_count,
+                SUM(CASE WHEN tool_name IS NOT NULL THEN 1 ELSE 0 END) as tool_use_count,
+                SUM(COALESCE(input_tokens, 0)) as total_input_tokens,
+                SUM(COALESCE(output_tokens, 0)) as total_output_tokens,
+                (SELECT git_branch FROM events e2
+                 WHERE e2.session_id = events.session_id
+                 ORDER BY timestamp DESC LIMIT 1) as primary_branch
+            FROM events
+            GROUP BY session_id
+        """).fetchall()
+
+        count = 0
+        for row in rows:
+            storage.upsert_session(
+                Session(
+                    id=row["session_id"],
+                    project_path=row["project_path"],
+                    first_seen=row["first_seen"],
+                    last_seen=row["last_seen"],
+                    entry_count=row["entry_count"],
+                    tool_use_count=row["tool_use_count"],
+                    total_input_tokens=row["total_input_tokens"],
+                    total_output_tokens=row["total_output_tokens"],
+                    primary_branch=row["primary_branch"],
+                )
+            )
+            count += 1
+
+    return count
+
+
+def ingest_logs(
+    storage: SQLiteStorage,
+    days: int = 7,
+    project: str | None = None,
+    force: bool = False,
+) -> dict:
+    """Ingest all JSONL log files.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to look back
+        project: Optional project filter
+        force: Force re-ingestion
+
+    Returns:
+        Stats dict with totals
+    """
+    files = find_log_files(days=days, project_filter=project)
+
+    total_entries = 0
+    total_events = 0
+    files_processed = 0
+    files_skipped = 0
+    total_errors = 0
+
+    for file_path in files:
+        try:
+            result = ingest_file(file_path, storage, force=force)
+            if result["skipped"]:
+                files_skipped += 1
+            else:
+                files_processed += 1
+                total_entries += result["entries_processed"]
+                total_events += result["events_added"]
+                total_errors += result.get("errors", 0)
+        except Exception as e:
+            logger.error(f"Failed to ingest {file_path}: {e}")
+            total_errors += 1
+
+    # Update session statistics
+    sessions_updated = update_session_stats(storage)
+
+    return {
+        "files_found": len(files),
+        "files_processed": files_processed,
+        "files_skipped": files_skipped,
+        "entries_processed": total_entries,
+        "events_added": total_events,
+        "sessions_updated": sessions_updated,
+        "errors": total_errors,
+    }
diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py
index 987dfeb..5abd6d9 100644
--- a/src/session_analytics/server.py
+++ b/src/session_analytics/server.py
@@ -19,6 +19,7 @@
 
 from fastmcp import FastMCP
 
+from session_analytics.ingest import ingest_logs as do_ingest_logs
 from session_analytics.storage import SQLiteStorage
 
 # Configure logging
@@ -78,13 +79,10 @@ def ingest_logs(days: int = 7, project: str | None = None, force: bool = False)
     Returns:
         Ingestion stats (files processed, entries added, etc.)
     """
-    # Placeholder - will be implemented in Phase 3
+    result = do_ingest_logs(storage, days=days, project=project, force=force)
     return {
-        "status": "not_implemented",
-        "message": "Ingestion will be implemented in Phase 3",
-        "days": days,
-        "project": project,
-        "force": force,
+        "status": "ok",
+        **result,
     }
 
 
diff --git a/tests/test_ingest.py b/tests/test_ingest.py
new file mode 100644
index 0000000..b2503b5
--- /dev/null
+++ b/tests/test_ingest.py
@@ -0,0 +1,316 @@
+"""Tests for the JSONL ingestion module."""
+
+import json
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from session_analytics.ingest import (
+    find_log_files,
+    ingest_file,
+    parse_entry,
+    parse_tool_use,
+)
+from session_analytics.storage import SQLiteStorage
+
+
+@pytest.fixture
+def storage():
+    """Create a temporary storage instance for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "test.db"
+        yield SQLiteStorage(db_path)
+
+
+@pytest.fixture
+def sample_logs_dir():
+    """Create a temporary directory with sample JSONL files."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        logs_dir = Path(tmpdir)
+        project_dir = logs_dir / "-test-project"
+        project_dir.mkdir()
+
+        # Create a sample JSONL file
+        jsonl_file = project_dir / "test-session.jsonl"
+        entries = [
+            {
+                "type": "user",
+                "uuid": "user-1",
+                "sessionId": "session-1",
+                "timestamp": "2025-01-01T12:00:00.000Z",
+                "cwd": "/test/project",
+                "gitBranch": "main",
+                "message": {"role": "user", "content": "Hello"},
+            },
+            {
+                "type": "assistant",
+                "uuid": "assistant-1",
+                "sessionId": "session-1",
+                "timestamp": "2025-01-01T12:00:05.000Z",
+                "cwd": "/test/project",
+                "gitBranch": "main",
+                "message": {
+                    "role": "assistant",
+                    "model": "claude-opus-4-5-20251101",
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "tool-1",
+                            "name": "Bash",
+                            "input": {"command": "git status"},
+                        }
+                    ],
+                    "usage": {
+                        "input_tokens": 100,
+                        "output_tokens": 50,
+                        "cache_read_input_tokens": 1000,
+                    },
+                },
+            },
+            {
+                "type": "user",
+                "uuid": "result-1",
+                "sessionId": "session-1",
+                "timestamp": "2025-01-01T12:00:10.000Z",
+                "cwd": "/test/project",
+                "gitBranch": "main",
+                "message": {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "tool-1",
+                            "content": "On branch main",
+                        }
+                    ],
+                },
+            },
+        ]
+
+        with open(jsonl_file, "w") as f:
+            for entry in entries:
+                f.write(json.dumps(entry) + "\n")
+
+        yield logs_dir
+
+
+class TestParseToolUse:
+    """Tests for tool_use parsing."""
+
+    def test_parse_bash_command(self):
+        """Test extracting command from Bash tool."""
+        tool_use = {
+            "name": "Bash",
+            "id": "tool-1",
+            "input": {"command": "git status --short"},
+        }
+        result = parse_tool_use(tool_use)
+        assert result["tool_name"] == "Bash"
+        assert result["command"] == "git"
+        assert result["command_args"] == "status --short"
+
+    def test_parse_read_file(self):
+        """Test extracting file_path from Read tool."""
+        tool_use = {
+            "name": "Read",
+            "id": "tool-2",
+            "input": {"file_path": "/path/to/file.py"},
+        }
+        result = parse_tool_use(tool_use)
+        assert result["tool_name"] == "Read"
+        assert result["file_path"] == "/path/to/file.py"
+
+    def test_parse_skill(self):
+        """Test extracting skill_name from Skill tool."""
+        tool_use = {
+            "name": "Skill",
+            "id": "tool-3",
+            "input": {"skill": "commit"},
+        }
+        result = parse_tool_use(tool_use)
+        assert result["tool_name"] == "Skill"
+        assert result["skill_name"] == "commit"
+
+    def test_parse_mcp_tool(self):
+        """Test parsing MCP tool names."""
+        tool_use = {
+            "name": "mcp__event-bus__register_session",
+            "id": "tool-4",
+            "input": {"name": "test"},
+        }
+        result = parse_tool_use(tool_use)
+        assert result["tool_name"] == "mcp__event-bus__register_session"
+
+
+class TestParseEntry:
+    """Tests for entry parsing."""
+
+    def test_parse_user_message(self):
+        """Test parsing a user message."""
+        entry = {
+            "type": "user",
+            "uuid": "user-1",
+            "sessionId": "session-1",
+            "timestamp": "2025-01-01T12:00:00.000Z",
+            "cwd": "/test",
+            "gitBranch": "main",
+            "message": {"role": "user", "content": "Hello"},
+        }
+        events = parse_entry(entry, "test-project")
+        assert len(events) == 1
+        assert events[0].entry_type == "user"
+        assert events[0].session_id == "session-1"
+
+    def test_parse_assistant_with_tool(self):
+        """Test parsing an assistant message with tool_use."""
+        entry = {
+            "type": "assistant",
+            "uuid": "assistant-1",
+            "sessionId": "session-1",
+            "timestamp": "2025-01-01T12:00:00.000Z",
+            "message": {
+                "model": "claude-opus-4-5",
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool-1",
+                        "name": "Bash",
+                        "input": {"command": "ls -la"},
+                    }
+                ],
+                "usage": {"input_tokens": 100, "output_tokens": 50},
+            },
+        }
+        events = parse_entry(entry, "test-project")
+        assert len(events) == 1
+        assert events[0].entry_type == "tool_use"
+        assert events[0].tool_name == "Bash"
+        assert events[0].command == "ls"
+        assert events[0].input_tokens == 100
+
+    def test_parse_tool_result(self):
+        """Test parsing a tool_result entry."""
+        entry = {
+            "type": "user",
+            "uuid": "result-1",
+            "sessionId": "session-1",
+            "timestamp": "2025-01-01T12:00:00.000Z",
+            "message": {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool-1",
+                        "content": "output",
+                    }
+                ],
+            },
+        }
+        events = parse_entry(entry, "test-project")
+        assert len(events) == 1
+        assert events[0].entry_type == "tool_result"
+        assert events[0].tool_id == "tool-1"
+
+    def test_skip_file_history_snapshot(self):
+        """Test that file-history-snapshot entries are skipped."""
+        entry = {
+            "type": "file-history-snapshot",
+            "uuid": "snapshot-1",
+            "sessionId": "session-1",
+            "timestamp": "2025-01-01T12:00:00.000Z",
+        }
+        events = parse_entry(entry, "test-project")
+        assert len(events) == 0
+
+    def test_skip_malformed_entry(self):
+        """Test that entries without required fields are skipped."""
+        entry = {"type": "user"}  # Missing uuid, sessionId, timestamp
+        events = parse_entry(entry, "test-project")
+        assert len(events) == 0
+
+
+class TestIngestFile:
+    """Tests for file ingestion."""
+
+    def test_ingest_file(self, storage, sample_logs_dir):
+        """Test ingesting a JSONL file."""
+        project_dir = sample_logs_dir / "-test-project"
+        jsonl_file = project_dir / "test-session.jsonl"
+
+        result = ingest_file(jsonl_file, storage)
+        assert result["entries_processed"] == 3
+        assert result["events_added"] == 3
+        assert result["skipped"] is False
+
+    def test_incremental_ingestion(self, storage, sample_logs_dir):
+        """Test that unchanged files are skipped on re-ingestion."""
+        project_dir = sample_logs_dir / "-test-project"
+        jsonl_file = project_dir / "test-session.jsonl"
+
+        # First ingestion
+        result1 = ingest_file(jsonl_file, storage)
+        assert result1["skipped"] is False
+
+        # Second ingestion should skip
+        result2 = ingest_file(jsonl_file, storage)
+        assert result2["skipped"] is True
+
+    def test_force_reingestion(self, storage, sample_logs_dir):
+        """Test force re-ingestion."""
+        project_dir = sample_logs_dir / "-test-project"
+        jsonl_file = project_dir / "test-session.jsonl"
+
+        # First ingestion
+        ingest_file(jsonl_file, storage)
+
+        # Force re-ingestion should process again
+        result = ingest_file(jsonl_file, storage, force=True)
+        assert result["skipped"] is False
+
+
+class TestFindLogFiles:
+    """Tests for log file discovery."""
+
+    def test_find_log_files(self, sample_logs_dir):
+        """Test finding JSONL files in logs directory."""
+        files = find_log_files(logs_dir=sample_logs_dir, days=7)
+        assert len(files) == 1
+        assert files[0].suffix == ".jsonl"
+
+    def test_filter_by_project(self, sample_logs_dir):
+        """Test filtering by project name."""
+        # Create another project
+        other_project = sample_logs_dir / "-other-project"
+        other_project.mkdir()
+        (other_project / "other.jsonl").write_text('{"type":"user"}\n')
+
+        # Should find both
+        all_files = find_log_files(logs_dir=sample_logs_dir, days=7)
+        assert len(all_files) == 2
+
+        # Should only find matching project
+        filtered = find_log_files(logs_dir=sample_logs_dir, days=7, project_filter="test")
+        assert len(filtered) == 1
+        assert "test" in str(filtered[0])
+
+
+class TestIngestLogs:
+    """Tests for full ingestion flow."""
+
+    def test_ingest_logs(self, storage, sample_logs_dir):
+        """Test full ingestion flow."""
+        # Use find_log_files with explicit logs_dir
+        from session_analytics.ingest import ingest_file as do_ingest_file
+        from session_analytics.ingest import update_session_stats
+
+        files = find_log_files(logs_dir=sample_logs_dir, days=7)
+        assert len(files) == 1
+
+        # Ingest the file
+        result = do_ingest_file(files[0], storage)
+        assert result["events_added"] == 3
+
+        # Update session stats
+        sessions = update_session_stats(storage)
+        assert sessions >= 1
diff --git a/tests/test_server.py b/tests/test_server.py
index d05e030..d85b404 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -14,11 +14,12 @@ def test_get_status():
     assert "session_count" in result
 
 
-def test_ingest_logs_placeholder():
-    """Test that ingest_logs returns placeholder response."""
-    result = ingest_logs.fn(days=7)
-    assert result["status"] == "not_implemented"
-    assert result["days"] == 7
+def test_ingest_logs():
+    """Test that ingest_logs runs and returns stats."""
+    result = ingest_logs.fn(days=1)
+    assert result["status"] == "ok"
+    assert "files_found" in result
+    assert "events_added" in result
 
 
 def test_query_tool_frequency_placeholder():

From 1da00bbf7238a7e6d403def9ebe8938813d3df97 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 04:47:17 +0000
Subject: [PATCH 04/10] Add Phase 4: Query tools implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements all query MCP tools:
- query_tool_frequency: Tool usage counts with project filter
- query_timeline: Events in time window with filtering
- query_commands: Bash command breakdown with prefix filter
- query_sessions: Session metadata and token totals
- query_tokens: Token usage grouped by day/session/model

Also adds:
- ensure_fresh_data(): Auto-refresh mechanism (5 min staleness)
- Comprehensive tests for all queries (18 new tests)

Closes #4

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/session_analytics/queries.py | 431 +++++++++++++++++++++++++++++++
 src/session_analytics/server.py  |  98 ++++++-
 tests/test_queries.py            | 302 ++++++++++++++++++++++
 tests/test_server.py             |  64 ++++-
 4 files changed, 881 insertions(+), 14 deletions(-)
 create mode 100644 src/session_analytics/queries.py
 create mode 100644 tests/test_queries.py

diff --git a/src/session_analytics/queries.py b/src/session_analytics/queries.py
new file mode 100644
index 0000000..51e0e09
--- /dev/null
+++ b/src/session_analytics/queries.py
@@ -0,0 +1,431 @@
+"""Query implementations for session analytics."""
+
+from datetime import datetime, timedelta
+
+from session_analytics.storage import SQLiteStorage
+
+
+def ensure_fresh_data(
+    storage: SQLiteStorage,
+    max_age_minutes: int = 5,
+    days: int = 7,
+    project: str | None = None,
+    force: bool = False,
+) -> bool:
+    """Check if data is stale and refresh if needed.
+
+    Args:
+        storage: Storage instance
+        max_age_minutes: Maximum age of data before refresh
+        days: Number of days to look back when refreshing
+        project: Optional project filter for refresh
+        force: Force refresh regardless of age
+
+    Returns:
+        True if data was refreshed, False if data was fresh
+    """
+    if force:
+        from session_analytics.ingest import ingest_logs
+
+        ingest_logs(storage, days=days, project=project)
+        return True
+
+    last_ingest = storage.get_last_ingestion_time()
+    if last_ingest is None or (datetime.now() - last_ingest) > timedelta(minutes=max_age_minutes):
+        from session_analytics.ingest import ingest_logs
+
+        ingest_logs(storage, days=days, project=project)
+        return True
+
+    return False
+
+
+def query_tool_frequency(
+    storage: SQLiteStorage,
+    days: int = 7,
+    project: str | None = None,
+) -> dict:
+    """Get tool usage frequency counts.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+        project: Optional project path filter
+
+    Returns:
+        Dict with tool frequency breakdown
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+
+    with storage._connect() as conn:
+        conditions = ["timestamp >= ?", "tool_name IS NOT NULL"]
+        params: list = [cutoff]
+
+        if project:
+            conditions.append("project_path LIKE ?")
+            params.append(f"%{project}%")
+
+        where_clause = " AND ".join(conditions)
+
+        # Get tool frequency counts
+        rows = conn.execute(
+            f"""
+            SELECT tool_name, COUNT(*) as count
+            FROM events
+            WHERE {where_clause}
+            GROUP BY tool_name
+            ORDER BY count DESC
+            """,
+            params,
+        ).fetchall()
+
+        tools = [{"tool": row["tool_name"], "count": row["count"]} for row in rows]
+
+        # Get total tool calls
+        total = sum(t["count"] for t in tools)
+
+        return {
+            "days": days,
+            "project": project,
+            "total_tool_calls": total,
+            "tools": tools,
+        }
+
+
+def query_timeline(
+    storage: SQLiteStorage,
+    start: datetime | None = None,
+    end: datetime | None = None,
+    tool: str | None = None,
+    project: str | None = None,
+    limit: int = 100,
+) -> dict:
+    """Get events in a time window.
+
+    Args:
+        storage: Storage instance
+        start: Start of time window (default: 24 hours ago)
+        end: End of time window (default: now)
+        tool: Optional tool name filter
+        project: Optional project path filter
+        limit: Maximum events to return
+
+    Returns:
+        Dict with timeline events
+    """
+    if start is None:
+        start = datetime.now() - timedelta(hours=24)
+    if end is None:
+        end = datetime.now()
+
+    events = storage.get_events_in_range(
+        start=start,
+        end=end,
+        tool_name=tool,
+        project_path=project,
+        limit=limit,
+    )
+
+    return {
+        "start": start.isoformat(),
+        "end": end.isoformat(),
+        "tool": tool,
+        "project": project,
+        "count": len(events),
+        "events": [
+            {
+                "timestamp": e.timestamp.isoformat(),
+                "session_id": e.session_id,
+                "entry_type": e.entry_type,
+                "tool_name": e.tool_name,
+                "command": e.command,
+                "file_path": e.file_path,
+                "skill_name": e.skill_name,
+                "is_error": e.is_error,
+            }
+            for e in events
+        ],
+    }
+
+
+def query_commands(
+    storage: SQLiteStorage,
+    days: int = 7,
+    project: str | None = None,
+    prefix: str | None = None,
+) -> dict:
+    """Get Bash command breakdown.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+        project: Optional project path filter
+        prefix: Optional command prefix filter (e.g., "git")
+
+    Returns:
+        Dict with command breakdown
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+
+    with storage._connect() as conn:
+        conditions = ["timestamp >= ?", "tool_name = 'Bash'", "command IS NOT NULL"]
+        params: list = [cutoff]
+
+        if project:
+            conditions.append("project_path LIKE ?")
+            params.append(f"%{project}%")
+
+        if prefix:
+            conditions.append("command LIKE ?")
+            params.append(f"{prefix}%")
+
+        where_clause = " AND ".join(conditions)
+
+        # Get command frequency counts
+        rows = conn.execute(
+            f"""
+            SELECT command, COUNT(*) as count
+            FROM events
+            WHERE {where_clause}
+            GROUP BY command
+            ORDER BY count DESC
+            """,
+            params,
+        ).fetchall()
+
+        commands = [{"command": row["command"], "count": row["count"]} for row in rows]
+
+        # Get total Bash commands
+        total = sum(c["count"] for c in commands)
+
+        return {
+            "days": days,
+            "project": project,
+            "prefix": prefix,
+            "total_commands": total,
+            "commands": commands,
+        }
+
+
+def query_sessions(
+    storage: SQLiteStorage,
+    days: int = 7,
+    project: str | None = None,
+) -> dict:
+    """Get session metadata.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+        project: Optional project path filter
+
+    Returns:
+        Dict with session information
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+
+    with storage._connect() as conn:
+        conditions = ["last_seen >= ?"]
+        params: list = [cutoff]
+
+        if project:
+            conditions.append("project_path LIKE ?")
+            params.append(f"%{project}%")
+
+        where_clause = " AND ".join(conditions)
+
+        rows = conn.execute(
+            f"""
+            SELECT
+                id, project_path, first_seen, last_seen,
+                entry_count, tool_use_count,
+                total_input_tokens, total_output_tokens,
+                primary_branch
+            FROM sessions
+            WHERE {where_clause}
+            ORDER BY last_seen DESC
+            """,
+            params,
+        ).fetchall()
+
+        sessions = [
+            {
+                "id": row["id"],
+                "project": row["project_path"],
+                "first_seen": row["first_seen"],
+                "last_seen": row["last_seen"],
+                "entry_count": row["entry_count"],
+                "tool_use_count": row["tool_use_count"],
+                "input_tokens": row["total_input_tokens"],
+                "output_tokens": row["total_output_tokens"],
+                "branch": row["primary_branch"],
+            }
+            for row in rows
+        ]
+
+        # Calculate totals
+        total_entries = sum(s["entry_count"] for s in sessions)
+        total_tools = sum(s["tool_use_count"] for s in sessions)
+        total_input = sum(s["input_tokens"] or 0 for s in sessions)
+        total_output = sum(s["output_tokens"] or 0 for s in sessions)
+
+        return {
+            "days": days,
+            "project": project,
+            "session_count": len(sessions),
+            "total_entries": total_entries,
+            "total_tool_uses": total_tools,
+            "total_input_tokens": total_input,
+            "total_output_tokens": total_output,
+            "sessions": sessions,
+        }
+
+
+def query_tokens(
+    storage: SQLiteStorage,
+    days: int = 7,
+    project: str | None = None,
+    by: str = "day",
+) -> dict:
+    """Get token usage analysis.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+        project: Optional project path filter
+        by: Grouping: 'day', 'session', or 'model'
+
+    Returns:
+        Dict with token usage breakdown
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+
+    with storage._connect() as conn:
+        conditions = ["timestamp >= ?"]
+        params: list = [cutoff]
+
+        if project:
+            conditions.append("project_path LIKE ?")
+            params.append(f"%{project}%")
+
+        where_clause = " AND ".join(conditions)
+
+        if by == "day":
+            # Group by day
+            rows = conn.execute(
+                f"""
+                SELECT
+                    DATE(timestamp) as day,
+                    SUM(COALESCE(input_tokens, 0)) as input_tokens,
+                    SUM(COALESCE(output_tokens, 0)) as output_tokens,
+                    SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
+                    SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
+                    COUNT(*) as event_count
+                FROM events
+                WHERE {where_clause}
+                GROUP BY DATE(timestamp)
+                ORDER BY day DESC
+                """,
+                params,
+            ).fetchall()
+
+            breakdown = [
+                {
+                    "day": row["day"],
+                    "input_tokens": row["input_tokens"],
+                    "output_tokens": row["output_tokens"],
+                    "cache_read_tokens": row["cache_read_tokens"],
+                    "cache_creation_tokens": row["cache_creation_tokens"],
+                    "event_count": row["event_count"],
+                }
+                for row in rows
+            ]
+            group_key = "day"
+
+        elif by == "session":
+            # Group by session
+            rows = conn.execute(
+                f"""
+                SELECT
+                    session_id,
+                    project_path,
+                    SUM(COALESCE(input_tokens, 0)) as input_tokens,
+                    SUM(COALESCE(output_tokens, 0)) as output_tokens,
+                    SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
+                    SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
+                    COUNT(*) as event_count
+                FROM events
+                WHERE {where_clause}
+                GROUP BY session_id
+                ORDER BY input_tokens DESC
+                """,
+                params,
+            ).fetchall()
+
+            breakdown = [
+                {
+                    "session_id": row["session_id"],
+                    "project": row["project_path"],
+                    "input_tokens": row["input_tokens"],
+                    "output_tokens": row["output_tokens"],
+                    "cache_read_tokens": row["cache_read_tokens"],
+                    "cache_creation_tokens": row["cache_creation_tokens"],
+                    "event_count": row["event_count"],
+                }
+                for row in rows
+            ]
+            group_key = "session"
+
+        elif by == "model":
+            # Group by model
+            rows = conn.execute(
+                f"""
+                SELECT
+                    COALESCE(model, 'unknown') as model,
+                    SUM(COALESCE(input_tokens, 0)) as input_tokens,
+                    SUM(COALESCE(output_tokens, 0)) as output_tokens,
+                    SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
+                    SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
+                    COUNT(*) as event_count
+                FROM events
+                WHERE {where_clause}
+                GROUP BY model
+                ORDER BY input_tokens DESC
+                """,
+                params,
+            ).fetchall()
+
+            breakdown = [
+                {
+                    "model": row["model"],
+                    "input_tokens": row["input_tokens"],
+                    "output_tokens": row["output_tokens"],
+                    "cache_read_tokens": row["cache_read_tokens"],
+                    "cache_creation_tokens": row["cache_creation_tokens"],
+                    "event_count": row["event_count"],
+                }
+                for row in rows
+            ]
+            group_key = "model"
+
+        else:
+            return {
+                "error": f"Invalid grouping: {by}. Use 'day', 'session', or 'model'.",
+            }
+
+        # Calculate totals
+        total_input = sum(b["input_tokens"] for b in breakdown)
+        total_output = sum(b["output_tokens"] for b in breakdown)
+        total_cache_read = sum(b["cache_read_tokens"] for b in breakdown)
+        total_cache_creation = sum(b["cache_creation_tokens"] for b in breakdown)
+
+        return {
+            "days": days,
+            "project": project,
+            "group_by": group_key,
+            "total_input_tokens": total_input,
+            "total_output_tokens": total_output,
+            "total_cache_read_tokens": total_cache_read,
+            "total_cache_creation_tokens": total_cache_creation,
+            "breakdown": breakdown,
+        }
diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py
index 5abd6d9..6ddbd6e 100644
--- a/src/session_analytics/server.py
+++ b/src/session_analytics/server.py
@@ -20,6 +20,12 @@
 from fastmcp import FastMCP
 
 from session_analytics.ingest import ingest_logs as do_ingest_logs
+from session_analytics.queries import ensure_fresh_data
+from session_analytics.queries import query_commands as do_query_commands
+from session_analytics.queries import query_sessions as do_query_sessions
+from session_analytics.queries import query_timeline as do_query_timeline
+from session_analytics.queries import query_tokens as do_query_tokens
+from session_analytics.queries import query_tool_frequency as do_query_tool_frequency
 from session_analytics.storage import SQLiteStorage
 
 # Configure logging
@@ -97,13 +103,91 @@ def query_tool_frequency(days: int = 7, project: str | None = None) -> dict:
     Returns:
         Tool frequency breakdown
     """
-    # Placeholder - will be implemented in Phase 4
-    return {
-        "status": "not_implemented",
-        "message": "Query will be implemented in Phase 4",
-        "days": days,
-        "project": project,
-    }
+    ensure_fresh_data(storage, days=days, project=project)
+    result = do_query_tool_frequency(storage, days=days, project=project)
+    return {"status": "ok", **result}
+
+
+@mcp.tool()
+def query_timeline(
+    start: str | None = None,
+    end: str | None = None,
+    tool: str | None = None,
+    project: str | None = None,
+    limit: int = 100,
+) -> dict:
+    """Get events in a time window.
+
+    Args:
+        start: Start time (ISO format, default: 24 hours ago)
+        end: End time (ISO format, default: now)
+        tool: Optional tool name filter
+        project: Optional project path filter
+        limit: Maximum events to return (default: 100)
+
+    Returns:
+        Timeline of events
+    """
+    from datetime import datetime
+
+    start_dt = datetime.fromisoformat(start) if start else None
+    end_dt = datetime.fromisoformat(end) if end else None
+
+    ensure_fresh_data(storage)
+    result = do_query_timeline(
+        storage, start=start_dt, end=end_dt, tool=tool, project=project, limit=limit
+    )
+    return {"status": "ok", **result}
+
+
+@mcp.tool()
+def query_commands(days: int = 7, project: str | None = None, prefix: str | None = None) -> dict:
+    """Get Bash command breakdown.
+
+    Args:
+        days: Number of days to analyze (default: 7)
+        project: Optional project path filter
+        prefix: Optional command prefix filter (e.g., "git")
+
+    Returns:
+        Command frequency breakdown
+    """
+    ensure_fresh_data(storage, days=days, project=project)
+    result = do_query_commands(storage, days=days, project=project, prefix=prefix)
+    return {"status": "ok", **result}
+
+
+@mcp.tool()
+def query_sessions(days: int = 7, project: str | None = None) -> dict:
+    """Get session metadata.
+
+    Args:
+        days: Number of days to analyze (default: 7)
+        project: Optional project path filter
+
+    Returns:
+        Session information
+    """
+    ensure_fresh_data(storage, days=days, project=project)
+    result = do_query_sessions(storage, days=days, project=project)
+    return {"status": "ok", **result}
+
+
+@mcp.tool()
+def query_tokens(days: int = 7, project: str | None = None, by: str = "day") -> dict:
+    """Get token usage analysis.
+
+    Args:
+        days: Number of days to analyze (default: 7)
+        project: Optional project path filter
+        by: Grouping: 'day', 'session', or 'model' (default: 'day')
+
+    Returns:
+        Token usage breakdown
+    """
+    ensure_fresh_data(storage, days=days, project=project)
+    result = do_query_tokens(storage, days=days, project=project, by=by)
+    return {"status": "ok", **result}
 
 
 def create_app():
diff --git a/tests/test_queries.py b/tests/test_queries.py
new file mode 100644
index 0000000..c5a68db
--- /dev/null
+++ b/tests/test_queries.py
@@ -0,0 +1,302 @@
+"""Tests for the query implementations."""
+
+import tempfile
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pytest
+
+from session_analytics.queries import (
+    ensure_fresh_data,
+    query_commands,
+    query_sessions,
+    query_timeline,
+    query_tokens,
+    query_tool_frequency,
+)
+from session_analytics.storage import Event, Session, SQLiteStorage
+
+
+@pytest.fixture
+def storage():
+    """Create a temporary storage instance for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "test.db"
+        yield SQLiteStorage(db_path)
+
+
+@pytest.fixture
+def populated_storage(storage):
+    """Create a storage instance with sample data."""
+    now = datetime.now()
+
+    # Add some events
+    events = [
+        Event(
+            id=None,
+            uuid="event-1",
+            timestamp=now - timedelta(hours=1),
+            session_id="session-1",
+            project_path="-test-project",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="git",
+            command_args="status",
+            input_tokens=100,
+            output_tokens=50,
+            model="claude-opus-4-5",
+        ),
+        Event(
+            id=None,
+            uuid="event-2",
+            timestamp=now - timedelta(hours=2),
+            session_id="session-1",
+            project_path="-test-project",
+            entry_type="tool_use",
+            tool_name="Read",
+            file_path="/path/to/file.py",
+            input_tokens=80,
+            output_tokens=30,
+            model="claude-opus-4-5",
+        ),
+        Event(
+            id=None,
+            uuid="event-3",
+            timestamp=now - timedelta(hours=3),
+            session_id="session-1",
+            project_path="-test-project",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="git",
+            command_args="diff",
+            input_tokens=120,
+            output_tokens=60,
+            model="claude-opus-4-5",
+        ),
+        Event(
+            id=None,
+            uuid="event-4",
+            timestamp=now - timedelta(hours=4),
+            session_id="session-2",
+            project_path="-other-project",
+            entry_type="tool_use",
+            tool_name="Edit",
+            file_path="/path/to/other.py",
+            input_tokens=200,
+            output_tokens=100,
+            model="claude-sonnet-4-20250514",
+        ),
+        Event(
+            id=None,
+            uuid="event-5",
+            timestamp=now - timedelta(days=10),
+            session_id="session-3",
+            project_path="-old-project",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="make",
+            input_tokens=50,
+            output_tokens=25,
+            model="claude-opus-4-5",
+        ),
+    ]
+    storage.add_events_batch(events)
+
+    # Add sessions
+    storage.upsert_session(
+        Session(
+            id="session-1",
+            project_path="-test-project",
+            first_seen=now - timedelta(hours=3),
+            last_seen=now - timedelta(hours=1),
+            entry_count=3,
+            tool_use_count=3,
+            total_input_tokens=300,
+            total_output_tokens=140,
+            primary_branch="main",
+        )
+    )
+    storage.upsert_session(
+        Session(
+            id="session-2",
+            project_path="-other-project",
+            first_seen=now - timedelta(hours=4),
+            last_seen=now - timedelta(hours=4),
+            entry_count=1,
+            tool_use_count=1,
+            total_input_tokens=200,
+            total_output_tokens=100,
+            primary_branch="feature",
+        )
+    )
+
+    return storage
+
+
+class TestQueryToolFrequency:
+    """Tests for tool frequency queries."""
+
+    def test_basic_frequency(self, populated_storage):
+        """Test basic tool frequency query."""
+        result = query_tool_frequency(populated_storage, days=7)
+        assert result["total_tool_calls"] == 4  # 5 events, but 1 is 10 days old
+        assert len(result["tools"]) > 0
+
+        # Check that Bash is most frequent
+        tools = {t["tool"]: t["count"] for t in result["tools"]}
+        assert tools.get("Bash", 0) == 2
+        assert tools.get("Read", 0) == 1
+        assert tools.get("Edit", 0) == 1
+
+    def test_frequency_with_project_filter(self, populated_storage):
+        """Test tool frequency with project filter."""
+        result = query_tool_frequency(populated_storage, days=7, project="test")
+        assert result["project"] == "test"
+        # Should only include test-project events
+        assert result["total_tool_calls"] == 3
+
+    def test_frequency_days_filter(self, populated_storage):
+        """Test that days filter works."""
+        result = query_tool_frequency(populated_storage, days=30)
+        assert result["total_tool_calls"] == 5  # All events including old one
+
+
+class TestQueryTimeline:
+    """Tests for timeline queries."""
+
+    def test_basic_timeline(self, populated_storage):
+        """Test basic timeline query."""
+        result = query_timeline(populated_storage, limit=10)
+        assert "events" in result
+        assert len(result["events"]) <= 10
+
+    def test_timeline_with_tool_filter(self, populated_storage):
+        """Test timeline with tool filter."""
+        result = query_timeline(populated_storage, tool="Bash", limit=10)
+        for event in result["events"]:
+            assert event["tool_name"] == "Bash"
+
+    def test_timeline_with_time_range(self, populated_storage):
+        """Test timeline with time range."""
+        now = datetime.now()
+        start = now - timedelta(hours=2)
+        end = now
+
+        result = query_timeline(populated_storage, start=start, end=end, limit=10)
+        # Should only include events within range
+        for event in result["events"]:
+            ts = datetime.fromisoformat(event["timestamp"])
+            assert ts >= start
+            assert ts <= end
+
+
+class TestQueryCommands:
+    """Tests for command queries."""
+
+    def test_basic_commands(self, populated_storage):
+        """Test basic command query."""
+        result = query_commands(populated_storage, days=7)
+        assert result["total_commands"] >= 2  # At least 2 git commands
+
+        # Check that git is present
+        commands = {c["command"]: c["count"] for c in result["commands"]}
+        assert "git" in commands
+        assert commands["git"] == 2
+
+    def test_commands_with_prefix(self, populated_storage):
+        """Test command query with prefix filter."""
+        result = query_commands(populated_storage, days=7, prefix="gi")
+        # Should only include git commands
+        for cmd in result["commands"]:
+            assert cmd["command"].startswith("gi")
+
+    def test_commands_with_project_filter(self, populated_storage):
+        """Test command query with project filter."""
+        result = query_commands(populated_storage, days=7, project="test")
+        assert result["project"] == "test"
+
+
+class TestQuerySessions:
+    """Tests for session queries."""
+
+    def test_basic_sessions(self, populated_storage):
+        """Test basic session query."""
+        result = query_sessions(populated_storage, days=7)
+        assert result["session_count"] == 2  # 2 sessions within 7 days
+        assert len(result["sessions"]) == 2
+
+    def test_sessions_with_project_filter(self, populated_storage):
+        """Test session query with project filter."""
+        result = query_sessions(populated_storage, days=7, project="test")
+        # Should only include test-project session
+        assert result["session_count"] == 1
+        assert result["sessions"][0]["project"] == "-test-project"
+
+    def test_session_totals(self, populated_storage):
+        """Test session totals calculation."""
+        result = query_sessions(populated_storage, days=7)
+        assert result["total_entries"] == 4  # 3 + 1
+        assert result["total_tool_uses"] == 4  # 3 + 1
+        assert result["total_input_tokens"] == 500  # 300 + 200
+        assert result["total_output_tokens"] == 240  # 140 + 100
+
+
+class TestQueryTokens:
+    """Tests for token queries."""
+
+    def test_tokens_by_day(self, populated_storage):
+        """Test token query grouped by day."""
+        result = query_tokens(populated_storage, days=7, by="day")
+        assert result["group_by"] == "day"
+        assert "breakdown" in result
+        assert result["total_input_tokens"] >= 0
+        assert result["total_output_tokens"] >= 0
+
+    def test_tokens_by_session(self, populated_storage):
+        """Test token query grouped by session."""
+        result = query_tokens(populated_storage, days=7, by="session")
+        assert result["group_by"] == "session"
+        # Should have entries for each session
+        assert len(result["breakdown"]) >= 1
+
+    def test_tokens_by_model(self, populated_storage):
+        """Test token query grouped by model."""
+        result = query_tokens(populated_storage, days=7, by="model")
+        assert result["group_by"] == "model"
+
+        # Should have entries for each model
+        models = {b["model"] for b in result["breakdown"]}
+        assert "claude-opus-4-5" in models
+
+    def test_tokens_invalid_grouping(self, populated_storage):
+        """Test token query with invalid grouping."""
+        result = query_tokens(populated_storage, days=7, by="invalid")
+        assert "error" in result
+
+
+class TestEnsureFreshData:
+    """Tests for data freshness checking."""
+
+    def test_fresh_data_not_refreshed(self, populated_storage):
+        """Test that fresh data is not refreshed."""
+        # First, update ingestion state to make data appear fresh
+        from session_analytics.storage import IngestionState
+
+        populated_storage.update_ingestion_state(
+            IngestionState(
+                file_path="/test/file.jsonl",
+                file_size=1000,
+                last_modified=datetime.now(),
+                entries_processed=10,
+                last_processed=datetime.now(),
+            )
+        )
+
+        # Data should be fresh
+        refreshed = ensure_fresh_data(populated_storage, max_age_minutes=5)
+        assert not refreshed
+
+    def test_force_refresh(self, populated_storage):
+        """Test that force=True always refreshes."""
+        refreshed = ensure_fresh_data(populated_storage, force=True)
+        assert refreshed
diff --git a/tests/test_server.py b/tests/test_server.py
index d85b404..23a86cd 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -1,6 +1,14 @@
 """Tests for the MCP server."""
 
-from session_analytics.server import get_status, ingest_logs, query_tool_frequency
+from session_analytics.server import (
+    get_status,
+    ingest_logs,
+    query_commands,
+    query_sessions,
+    query_timeline,
+    query_tokens,
+    query_tool_frequency,
+)
 
 
 def test_get_status():
@@ -22,9 +30,51 @@ def test_ingest_logs():
     assert "events_added" in result
 
 
-def test_query_tool_frequency_placeholder():
-    """Test that query_tool_frequency returns placeholder response."""
-    result = query_tool_frequency.fn(days=14, project="/some/path")
-    assert result["status"] == "not_implemented"
-    assert result["days"] == 14
-    assert result["project"] == "/some/path"
+def test_query_tool_frequency():
+    """Test that query_tool_frequency returns tool counts."""
+    result = query_tool_frequency.fn(days=7)
+    assert result["status"] == "ok"
+    assert "days" in result
+    assert "total_tool_calls" in result
+    assert "tools" in result
+    assert isinstance(result["tools"], list)
+
+
+def test_query_timeline():
+    """Test that query_timeline returns events."""
+    result = query_timeline.fn(limit=10)
+    assert result["status"] == "ok"
+    assert "start" in result
+    assert "end" in result
+    assert "events" in result
+    assert isinstance(result["events"], list)
+
+
+def test_query_commands():
+    """Test that query_commands returns command counts."""
+    result = query_commands.fn(days=7)
+    assert result["status"] == "ok"
+    assert "days" in result
+    assert "total_commands" in result
+    assert "commands" in result
+    assert isinstance(result["commands"], list)
+
+
+def test_query_sessions():
+    """Test that query_sessions returns session info."""
+    result = query_sessions.fn(days=7)
+    assert result["status"] == "ok"
+    assert "days" in result
+    assert "session_count" in result
+    assert "sessions" in result
+    assert isinstance(result["sessions"], list)
+
+
+def test_query_tokens():
+    """Test that query_tokens returns token breakdown."""
+    result = query_tokens.fn(days=7, by="day")
+    assert result["status"] == "ok"
+    assert "days" in result
+    assert "group_by" in result
+    assert "breakdown" in result
+    assert isinstance(result["breakdown"], list)

From 66d1e7451b936e1e6a8f9552da2913d3e0554712 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 04:51:04 +0000
Subject: [PATCH 05/10] Add Phase 5: Pattern detection and insights
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements pattern detection for /improve-workflow integration:
- compute_tool_frequency_patterns(): Tool usage frequency
- compute_command_patterns(): Bash command frequency
- compute_sequence_patterns(): Tool n-gram detection
- compute_permission_gaps(): Commands not in settings.json
- get_insights(): Unified insights API for /improve-workflow

New MCP tools:
- query_sequences: Common tool patterns
- query_permission_gaps: Commands needing settings.json
- get_insights: Pre-computed patterns

Adds 16 new tests (69 total).

Closes #5

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/session_analytics/patterns.py | 369 ++++++++++++++++++++++++++++++
 src/session_analytics/server.py   |  71 ++++++
 tests/test_patterns.py            | 309 +++++++++++++++++++++++++
 tests/test_server.py              |  31 +++
 4 files changed, 780 insertions(+)
 create mode 100644 src/session_analytics/patterns.py
 create mode 100644 tests/test_patterns.py

diff --git a/src/session_analytics/patterns.py b/src/session_analytics/patterns.py
new file mode 100644
index 0000000..bc0e5cd
--- /dev/null
+++ b/src/session_analytics/patterns.py
@@ -0,0 +1,369 @@
+"""Pattern detection and insight generation for session analytics."""
+
+import json
+import logging
+from collections import Counter
+from datetime import datetime, timedelta
+from pathlib import Path
+
+from session_analytics.storage import Pattern, SQLiteStorage
+
+logger = logging.getLogger("session-analytics")
+
+# Default settings.json location
+DEFAULT_SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
+
+
+def compute_tool_frequency_patterns(
+    storage: SQLiteStorage,
+    days: int = 7,
+) -> list[Pattern]:
+    """Compute tool frequency patterns from events.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+
+    Returns:
+        List of tool frequency patterns
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+    now = datetime.now()
+
+    with storage._connect() as conn:
+        rows = conn.execute(
+            """
+            SELECT tool_name, COUNT(*) as count, MAX(timestamp) as last_seen
+            FROM events
+            WHERE timestamp >= ? AND tool_name IS NOT NULL
+            GROUP BY tool_name
+            ORDER BY count DESC
+            """,
+            (cutoff,),
+        ).fetchall()
+
+        patterns = []
+        for row in rows:
+            patterns.append(
+                Pattern(
+                    id=None,
+                    pattern_type="tool_frequency",
+                    pattern_key=row["tool_name"],
+                    count=row["count"],
+                    last_seen=row["last_seen"],
+                    metadata={},
+                    computed_at=now,
+                )
+            )
+
+        return patterns
+
+
+def compute_command_patterns(
+    storage: SQLiteStorage,
+    days: int = 7,
+) -> list[Pattern]:
+    """Compute Bash command patterns from events.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+
+    Returns:
+        List of command patterns
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+    now = datetime.now()
+
+    with storage._connect() as conn:
+        rows = conn.execute(
+            """
+            SELECT command, COUNT(*) as count, MAX(timestamp) as last_seen
+            FROM events
+            WHERE timestamp >= ? AND tool_name = 'Bash' AND command IS NOT NULL
+            GROUP BY command
+            ORDER BY count DESC
+            """,
+            (cutoff,),
+        ).fetchall()
+
+        patterns = []
+        for row in rows:
+            patterns.append(
+                Pattern(
+                    id=None,
+                    pattern_type="command_frequency",
+                    pattern_key=row["command"],
+                    count=row["count"],
+                    last_seen=row["last_seen"],
+                    metadata={},
+                    computed_at=now,
+                )
+            )
+
+        return patterns
+
+
+def compute_sequence_patterns(
+    storage: SQLiteStorage,
+    days: int = 7,
+    sequence_length: int = 2,
+    min_count: int = 3,
+) -> list[Pattern]:
+    """Compute tool sequence patterns (n-grams) from events.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+        sequence_length: Length of sequences to detect
+        min_count: Minimum occurrences to include
+
+    Returns:
+        List of sequence patterns
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+    now = datetime.now()
+
+    with storage._connect() as conn:
+        # Get all tool events ordered by session and timestamp
+        rows = conn.execute(
+            """
+            SELECT session_id, tool_name, timestamp
+            FROM events
+            WHERE timestamp >= ? AND tool_name IS NOT NULL
+            ORDER BY session_id, timestamp
+            """,
+            (cutoff,),
+        ).fetchall()
+
+        # Group by session and extract sequences
+        sequences: Counter = Counter()
+        current_session = None
+        session_tools: list[str] = []
+
+        for row in rows:
+            if row["session_id"] != current_session:
+                # Process previous session
+                if len(session_tools) >= sequence_length:
+                    for i in range(len(session_tools) - sequence_length + 1):
+                        seq = tuple(session_tools[i : i + sequence_length])
+                        sequences[seq] += 1
+
+                current_session = row["session_id"]
+                session_tools = []
+
+            session_tools.append(row["tool_name"])
+
+        # Process last session
+        if len(session_tools) >= sequence_length:
+            for i in range(len(session_tools) - sequence_length + 1):
+                seq = tuple(session_tools[i : i + sequence_length])
+                sequences[seq] += 1
+
+        # Create patterns for sequences meeting min_count
+        patterns = []
+        for seq, count in sequences.most_common():
+            if count < min_count:
+                break
+            patterns.append(
+                Pattern(
+                    id=None,
+                    pattern_type="tool_sequence",
+                    pattern_key=" → ".join(seq),
+                    count=count,
+                    last_seen=now,
+                    metadata={"sequence": list(seq)},
+                    computed_at=now,
+                )
+            )
+
+        return patterns
+
+
+def load_allowed_commands(settings_path: Path = DEFAULT_SETTINGS_PATH) -> set[str]:
+    """Load allowed commands from Claude Code settings.json.
+
+    Args:
+        settings_path: Path to settings.json
+
+    Returns:
+        Set of allowed command prefixes
+    """
+    if not settings_path.exists():
+        return set()
+
+    try:
+        with open(settings_path) as f:
+            settings = json.load(f)
+
+        allowed = set()
+        permissions = settings.get("permissions", {})
+
+        # Look for allow patterns with Bash(command:*)
+        for pattern in permissions.get("allow", []):
+            if pattern.startswith("Bash(") and pattern.endswith(":*)"):
+                cmd = pattern[5:-3]  # Extract command from "Bash(cmd:*)"
+                allowed.add(cmd)
+
+        return allowed
+    except (json.JSONDecodeError, OSError) as e:
+        logger.warning(f"Could not load settings.json: {e}")
+        return set()
+
+
+def compute_permission_gaps(
+    storage: SQLiteStorage,
+    days: int = 7,
+    threshold: int = 5,
+    settings_path: Path = DEFAULT_SETTINGS_PATH,
+) -> list[Pattern]:
+    """Find commands that are frequently used but not in settings.json.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+        threshold: Minimum usage count to suggest adding
+        settings_path: Path to settings.json
+
+    Returns:
+        List of permission gap patterns
+    """
+    cutoff = datetime.now() - timedelta(days=days)
+    now = datetime.now()
+
+    allowed_commands = load_allowed_commands(settings_path)
+
+    with storage._connect() as conn:
+        rows = conn.execute(
+            """
+            SELECT command, COUNT(*) as count
+            FROM events
+            WHERE timestamp >= ? AND tool_name = 'Bash' AND command IS NOT NULL
+            GROUP BY command
+            HAVING COUNT(*) >= ?
+            ORDER BY count DESC
+            """,
+            (cutoff, threshold),
+        ).fetchall()
+
+        patterns = []
+        for row in rows:
+            cmd = row["command"]
+            if cmd not in allowed_commands:
+                patterns.append(
+                    Pattern(
+                        id=None,
+                        pattern_type="permission_gap",
+                        pattern_key=cmd,
+                        count=row["count"],
+                        last_seen=now,
+                        metadata={"suggestion": f"Bash({cmd}:*)"},
+                        computed_at=now,
+                    )
+                )
+
+        return patterns
+
+
+def compute_all_patterns(
+    storage: SQLiteStorage,
+    days: int = 7,
+) -> dict:
+    """Compute all pattern types and store them.
+
+    Args:
+        storage: Storage instance
+        days: Number of days to analyze
+
+    Returns:
+        Stats about computed patterns
+    """
+    # Clear existing patterns
+    storage.clear_patterns()
+
+    # Compute tool frequency
+    tool_patterns = compute_tool_frequency_patterns(storage, days=days)
+    for p in tool_patterns:
+        storage.upsert_pattern(p)
+
+    # Compute command frequency
+    command_patterns = compute_command_patterns(storage, days=days)
+    for p in command_patterns:
+        storage.upsert_pattern(p)
+
+    # Compute sequences
+    sequence_patterns = compute_sequence_patterns(storage, days=days)
+    for p in sequence_patterns:
+        storage.upsert_pattern(p)
+
+    # Compute permission gaps
+    gap_patterns = compute_permission_gaps(storage, days=days)
+    for p in gap_patterns:
+        storage.upsert_pattern(p)
+
+    return {
+        "tool_frequency_patterns": len(tool_patterns),
+        "command_patterns": len(command_patterns),
+        "sequence_patterns": len(sequence_patterns),
+        "permission_gap_patterns": len(gap_patterns),
+        "total_patterns": len(tool_patterns)
+        + len(command_patterns)
+        + len(sequence_patterns)
+        + len(gap_patterns),
+    }
+
+
+def get_insights(
+    storage: SQLiteStorage,
+    refresh: bool = False,
+    days: int = 7,
+) -> dict:
+    """Get pre-computed insights for /improve-workflow.
+
+    Args:
+        storage: Storage instance
+        refresh: Force recomputation of patterns
+        days: Number of days to analyze (only used if refresh=True)
+
+    Returns:
+        Insights organized by type
+    """
+    # Check if we need to refresh
+    patterns = storage.get_patterns()
+    if not patterns or refresh:
+        compute_all_patterns(storage, days=days)
+        patterns = storage.get_patterns()
+
+    # Organize by type
+    insights = {
+        "tool_frequency": [],
+        "command_frequency": [],
+        "sequences": [],
+        "permission_gaps": [],
+    }
+
+    for p in patterns:
+        if p.pattern_type == "tool_frequency":
+            insights["tool_frequency"].append({"tool": p.pattern_key, "count": p.count})
+        elif p.pattern_type == "command_frequency":
+            insights["command_frequency"].append({"command": p.pattern_key, "count": p.count})
+        elif p.pattern_type == "tool_sequence":
+            insights["sequences"].append({"sequence": p.pattern_key, "count": p.count})
+        elif p.pattern_type == "permission_gap":
+            insights["permission_gaps"].append(
+                {
+                    "command": p.pattern_key,
+                    "count": p.count,
+                    "suggestion": p.metadata.get("suggestion", ""),
+                }
+            )
+
+    # Add summary stats
+    insights["summary"] = {
+        "total_tools": len(insights["tool_frequency"]),
+        "total_commands": len(insights["command_frequency"]),
+        "total_sequences": len(insights["sequences"]),
+        "permission_gaps_found": len(insights["permission_gaps"]),
+    }
+
+    return insights
diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py
index 6ddbd6e..6750e89 100644
--- a/src/session_analytics/server.py
+++ b/src/session_analytics/server.py
@@ -20,6 +20,8 @@
 from fastmcp import FastMCP
 
 from session_analytics.ingest import ingest_logs as do_ingest_logs
+from session_analytics.patterns import compute_permission_gaps, compute_sequence_patterns
+from session_analytics.patterns import get_insights as do_get_insights
 from session_analytics.queries import ensure_fresh_data
 from session_analytics.queries import query_commands as do_query_commands
 from session_analytics.queries import query_sessions as do_query_sessions
@@ -190,6 +192,75 @@ def query_tokens(days: int = 7, project: str | None = None, by: str = "day") ->
     return {"status": "ok", **result}
 
 
+@mcp.tool()
+def query_sequences(days: int = 7, min_count: int = 3, length: int = 2) -> dict:
+    """Get common tool patterns (sequences).
+
+    Args:
+        days: Number of days to analyze (default: 7)
+        min_count: Minimum occurrences to include (default: 3)
+        length: Sequence length (default: 2)
+
+    Returns:
+        Common tool sequences
+    """
+    ensure_fresh_data(storage, days=days)
+    patterns = compute_sequence_patterns(
+        storage, days=days, sequence_length=length, min_count=min_count
+    )
+    return {
+        "status": "ok",
+        "days": days,
+        "min_count": min_count,
+        "sequence_length": length,
+        "sequences": [{"pattern": p.pattern_key, "count": p.count} for p in patterns],
+    }
+
+
+@mcp.tool()
+def query_permission_gaps(days: int = 7, threshold: int = 5) -> dict:
+    """Find commands that may need to be added to settings.json.
+
+    Args:
+        days: Number of days to analyze (default: 7)
+        threshold: Minimum usage count to suggest (default: 5)
+
+    Returns:
+        Commands that are frequently used but not in allowed list
+    """
+    ensure_fresh_data(storage, days=days)
+    patterns = compute_permission_gaps(storage, days=days, threshold=threshold)
+    return {
+        "status": "ok",
+        "days": days,
+        "threshold": threshold,
+        "gaps": [
+            {
+                "command": p.pattern_key,
+                "count": p.count,
+                "suggestion": p.metadata.get("suggestion", ""),
+            }
+            for p in patterns
+        ],
+    }
+
+
+@mcp.tool()
+def get_insights(refresh: bool = False, days: int = 7) -> dict:
+    """Get pre-computed patterns for /improve-workflow.
+
+    Args:
+        refresh: Force recomputation of patterns (default: False)
+        days: Number of days to analyze if refreshing (default: 7)
+
+    Returns:
+        Insights organized by type (tool_frequency, sequences, permission_gaps)
+    """
+    ensure_fresh_data(storage, days=days)
+    result = do_get_insights(storage, refresh=refresh, days=days)
+    return {"status": "ok", **result}
+
+
 def create_app():
     """Create the ASGI app for uvicorn."""
     # stateless_http=True allows resilience to server restarts
diff --git a/tests/test_patterns.py b/tests/test_patterns.py
new file mode 100644
index 0000000..0163271
--- /dev/null
+++ b/tests/test_patterns.py
@@ -0,0 +1,309 @@
+"""Tests for the pattern detection module."""
+
+import tempfile
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pytest
+
+from session_analytics.patterns import (
+    compute_all_patterns,
+    compute_command_patterns,
+    compute_permission_gaps,
+    compute_sequence_patterns,
+    compute_tool_frequency_patterns,
+    get_insights,
+    load_allowed_commands,
+)
+from session_analytics.storage import Event, SQLiteStorage
+
+
+@pytest.fixture
+def storage():
+    """Create a temporary storage instance for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "test.db"
+        yield SQLiteStorage(db_path)
+
+
+@pytest.fixture
+def populated_storage(storage):
+    """Create a storage instance with sample data for pattern detection."""
+    now = datetime.now()
+
+    # Add events that will create patterns
+    events = [
+        # Session 1: Read -> Edit -> Bash sequence
+        Event(
+            id=None,
+            uuid="e1",
+            timestamp=now - timedelta(hours=1),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Read",
+        ),
+        Event(
+            id=None,
+            uuid="e2",
+            timestamp=now - timedelta(hours=1, minutes=-1),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Edit",
+        ),
+        Event(
+            id=None,
+            uuid="e3",
+            timestamp=now - timedelta(hours=1, minutes=-2),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="git",
+        ),
+        # Session 2: Read -> Edit sequence (same as s1)
+        Event(
+            id=None,
+            uuid="e4",
+            timestamp=now - timedelta(hours=2),
+            session_id="s2",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Read",
+        ),
+        Event(
+            id=None,
+            uuid="e5",
+            timestamp=now - timedelta(hours=2, minutes=-1),
+            session_id="s2",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Edit",
+        ),
+        # Session 3: Read -> Edit sequence (third occurrence)
+        Event(
+            id=None,
+            uuid="e6",
+            timestamp=now - timedelta(hours=3),
+            session_id="s3",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Read",
+        ),
+        Event(
+            id=None,
+            uuid="e7",
+            timestamp=now - timedelta(hours=3, minutes=-1),
+            session_id="s3",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Edit",
+        ),
+        # More Bash commands for permission gap testing
+        Event(
+            id=None,
+            uuid="e8",
+            timestamp=now - timedelta(hours=4),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="make",
+        ),
+        Event(
+            id=None,
+            uuid="e9",
+            timestamp=now - timedelta(hours=4, minutes=-1),
+            session_id="s2",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="make",
+        ),
+        Event(
+            id=None,
+            uuid="e10",
+            timestamp=now - timedelta(hours=4, minutes=-2),
+            session_id="s3",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="make",
+        ),
+        Event(
+            id=None,
+            uuid="e11",
+            timestamp=now - timedelta(hours=4, minutes=-3),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="make",
+        ),
+        Event(
+            id=None,
+            uuid="e12",
+            timestamp=now - timedelta(hours=4, minutes=-4),
+            session_id="s2",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="make",
+        ),
+    ]
+
+    storage.add_events_batch(events)
+    return storage
+
+
+class TestToolFrequencyPatterns:
+    """Tests for tool frequency pattern detection."""
+
+    def test_compute_tool_frequency(self, populated_storage):
+        """Test computing tool frequency patterns."""
+        patterns = compute_tool_frequency_patterns(populated_storage, days=7)
+
+        # Should have patterns for Read, Edit, Bash
+        pattern_keys = {p.pattern_key for p in patterns}
+        assert "Read" in pattern_keys
+        assert "Edit" in pattern_keys
+        assert "Bash" in pattern_keys
+
+    def test_frequency_counts(self, populated_storage):
+        """Test that frequency counts are accurate."""
+        patterns = compute_tool_frequency_patterns(populated_storage, days=7)
+        pattern_dict = {p.pattern_key: p.count for p in patterns}
+
+        assert pattern_dict["Read"] == 3
+        assert pattern_dict["Edit"] == 3
+        assert pattern_dict["Bash"] == 6  # 1 git + 5 make
+
+
+class TestCommandPatterns:
+    """Tests for command pattern detection."""
+
+    def test_compute_command_patterns(self, populated_storage):
+        """Test computing command patterns."""
+        patterns = compute_command_patterns(populated_storage, days=7)
+
+        pattern_dict = {p.pattern_key: p.count for p in patterns}
+        assert pattern_dict.get("git", 0) == 1
+        assert pattern_dict.get("make", 0) == 5
+
+
+class TestSequencePatterns:
+    """Tests for sequence pattern detection."""
+
+    def test_compute_sequences(self, populated_storage):
+        """Test computing sequence patterns."""
+        patterns = compute_sequence_patterns(
+            populated_storage, days=7, sequence_length=2, min_count=2
+        )
+
+        # Should find Read -> Edit pattern (occurs 3 times)
+        pattern_keys = {p.pattern_key for p in patterns}
+        assert "Read → Edit" in pattern_keys
+
+    def test_sequence_counts(self, populated_storage):
+        """Test that sequence counts are accurate."""
+        patterns = compute_sequence_patterns(
+            populated_storage, days=7, sequence_length=2, min_count=1
+        )
+
+        pattern_dict = {p.pattern_key: p.count for p in patterns}
+        assert pattern_dict["Read → Edit"] == 3
+
+    def test_min_count_filter(self, populated_storage):
+        """Test that min_count filter works."""
+        # With min_count=5, should have no sequences
+        patterns = compute_sequence_patterns(
+            populated_storage, days=7, sequence_length=2, min_count=5
+        )
+        assert len(patterns) == 0
+
+
+class TestPermissionGaps:
+    """Tests for permission gap detection."""
+
+    def test_load_allowed_commands_missing_file(self):
+        """Test loading allowed commands from non-existent file."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            missing_path = Path(tmpdir) / "nonexistent.json"
+            allowed = load_allowed_commands(missing_path)
+            assert allowed == set()
+
+    def test_load_allowed_commands(self):
+        """Test loading allowed commands from settings.json."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            settings_path = Path(tmpdir) / "settings.json"
+            settings_path.write_text(
+                '{"permissions": {"allow": ["Bash(git:*)", "Bash(make:*)"]}}'
+            )
+            allowed = load_allowed_commands(settings_path)
+            assert "git" in allowed
+            assert "make" in allowed
+
+    def test_compute_permission_gaps(self, populated_storage):
+        """Test computing permission gaps."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create empty settings.json
+            settings_path = Path(tmpdir) / "settings.json"
+            settings_path.write_text('{"permissions": {"allow": []}}')
+
+            patterns = compute_permission_gaps(
+                populated_storage, days=7, threshold=3, settings_path=settings_path
+            )
+
+            # Should find make (5 uses) but maybe not git (1 use) depending on threshold
+            pattern_keys = {p.pattern_key for p in patterns}
+            assert "make" in pattern_keys
+
+    def test_permission_gaps_respects_allowed(self, populated_storage):
+        """Test that allowed commands are not reported as gaps."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            settings_path = Path(tmpdir) / "settings.json"
+            settings_path.write_text('{"permissions": {"allow": ["Bash(make:*)"]}}')
+
+            patterns = compute_permission_gaps(
+                populated_storage, days=7, threshold=1, settings_path=settings_path
+            )
+
+            # make is allowed, so should only find git
+            pattern_keys = {p.pattern_key for p in patterns}
+            assert "make" not in pattern_keys
+            assert "git" in pattern_keys
+
+
+class TestComputeAllPatterns:
+    """Tests for computing all patterns."""
+
+    def test_compute_all_patterns(self, populated_storage):
+        """Test computing all pattern types."""
+        stats = compute_all_patterns(populated_storage, days=7)
+
+        assert stats["tool_frequency_patterns"] > 0
+        assert stats["command_patterns"] > 0
+        assert stats["total_patterns"] > 0
+
+
+class TestGetInsights:
+    """Tests for the get_insights function."""
+
+    def test_get_insights(self, populated_storage):
+        """Test getting insights."""
+        insights = get_insights(populated_storage, refresh=True, days=7)
+
+        assert "tool_frequency" in insights
+        assert "command_frequency" in insights
+        assert "sequences" in insights
+        assert "permission_gaps" in insights
+        assert "summary" in insights
+
+    def test_insights_summary(self, populated_storage):
+        """Test that insights include summary stats."""
+        insights = get_insights(populated_storage, refresh=True, days=7)
+
+        assert "total_tools" in insights["summary"]
+        assert "total_commands" in insights["summary"]
+        assert "total_sequences" in insights["summary"]
diff --git a/tests/test_server.py b/tests/test_server.py
index 23a86cd..8cfe684 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -1,9 +1,12 @@
 """Tests for the MCP server."""
 
 from session_analytics.server import (
+    get_insights,
     get_status,
     ingest_logs,
     query_commands,
+    query_permission_gaps,
+    query_sequences,
     query_sessions,
     query_timeline,
     query_tokens,
@@ -78,3 +81,31 @@ def test_query_tokens():
     assert "group_by" in result
     assert "breakdown" in result
     assert isinstance(result["breakdown"], list)
+
+
+def test_query_sequences():
+    """Test that query_sequences returns sequence patterns."""
+    result = query_sequences.fn(days=7, min_count=1, length=2)
+    assert result["status"] == "ok"
+    assert "days" in result
+    assert "sequences" in result
+    assert isinstance(result["sequences"], list)
+
+
+def test_query_permission_gaps():
+    """Test that query_permission_gaps returns gap analysis."""
+    result = query_permission_gaps.fn(days=7, threshold=1)
+    assert result["status"] == "ok"
+    assert "days" in result
+    assert "gaps" in result
+    assert isinstance(result["gaps"], list)
+
+
+def test_get_insights():
+    """Test that get_insights returns organized patterns."""
+    result = get_insights.fn(refresh=True, days=7)
+    assert result["status"] == "ok"
+    assert "tool_frequency" in result
+    assert "sequences" in result
+    assert "permission_gaps" in result
+    assert "summary" in result

From dc9a7b219bd43620e7ee34950de6a73d98aa28c3 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 04:54:44 +0000
Subject: [PATCH 06/10] Add Phase 6: CLI and documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds command-line interface for shell access:
- session-analytics-cli status: Database stats
- session-analytics-cli ingest: Trigger log ingestion
- session-analytics-cli frequency: Tool usage counts
- session-analytics-cli commands: Bash command breakdown
- session-analytics-cli sessions: Session metadata
- session-analytics-cli tokens: Token usage by day/session/model
- session-analytics-cli sequences: Tool patterns
- session-analytics-cli permissions: Commands needing settings.json
- session-analytics-cli insights: Pre-computed patterns

All commands support --json for machine-readable output.

Also updates README with CLI usage documentation.

Closes #6

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 README.md                    |  35 +++++
 pyproject.toml               |   1 +
 src/session_analytics/cli.py | 250 +++++++++++++++++++++++++++++++++++
 3 files changed, 286 insertions(+)
 create mode 100644 src/session_analytics/cli.py

diff --git a/README.md b/README.md
index b724c57..d8169ab 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,41 @@ make install    # Install LaunchAgent + CLI
 make uninstall  # Remove LaunchAgent + CLI
 ```
 
+## CLI Usage
+
+The CLI provides direct access to analytics from the command line:
+
+```bash
+# Database status
+session-analytics-cli status
+
+# Ingest log files
+session-analytics-cli ingest --days 7
+
+# Tool frequency
+session-analytics-cli frequency --days 30
+
+# Bash command breakdown
+session-analytics-cli commands --prefix git
+
+# Session info
+session-analytics-cli sessions
+
+# Token usage by day/session/model
+session-analytics-cli tokens --by model
+
+# Tool sequences
+session-analytics-cli sequences --min-count 3
+
+# Permission gaps (commands needing settings.json)
+session-analytics-cli permissions --threshold 5
+
+# Insights for /improve-workflow
+session-analytics-cli insights --refresh
+```
+
+All commands support `--json` for machine-readable output.
+
 ## MCP Tools
 
 | Tool | Purpose |
diff --git a/pyproject.toml b/pyproject.toml
index 3194b72..cb3aabc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dev = [
 
 [project.scripts]
 session-analytics = "session_analytics.server:main"
+session-analytics-cli = "session_analytics.cli:main"
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/session_analytics"]
diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py
new file mode 100644
index 0000000..fe41ff1
--- /dev/null
+++ b/src/session_analytics/cli.py
@@ -0,0 +1,250 @@
+"""Command-line interface for session analytics."""
+
+import argparse
+import json
+
+from session_analytics.ingest import ingest_logs
+from session_analytics.patterns import compute_permission_gaps, compute_sequence_patterns
+from session_analytics.patterns import get_insights as do_get_insights
+from session_analytics.queries import (
+    query_commands,
+    query_sessions,
+    query_tokens,
+    query_tool_frequency,
+)
+from session_analytics.storage import SQLiteStorage
+
+
+def format_output(data: dict, json_output: bool = False) -> str:
+    """Format output as JSON or human-readable."""
+    if json_output:
+        return json.dumps(data, indent=2, default=str)
+
+    # Human-readable formatting based on data type
+    lines = []
+
+    if "total_tool_calls" in data:
+        lines.append(f"Total tool calls: {data['total_tool_calls']}")
+        lines.append("")
+        lines.append("Tool frequency:")
+        for tool in data.get("tools", [])[:20]:
+            lines.append(f"  {tool['tool']}: {tool['count']}")
+
+    elif "total_commands" in data:
+        lines.append(f"Total commands: {data['total_commands']}")
+        lines.append("")
+        lines.append("Command frequency:")
+        for cmd in data.get("commands", [])[:20]:
+            lines.append(f"  {cmd['command']}: {cmd['count']}")
+
+    elif "session_count" in data and "total_entries" in data:
+        # Session query result
+        lines.append(f"Sessions: {data['session_count']}")
+        lines.append(f"Total entries: {data['total_entries']}")
+        lines.append(f"Total tokens: {data.get('total_input_tokens', 0) + data.get('total_output_tokens', 0)}")
+
+    elif "breakdown" in data:
+        lines.append(f"Token usage by {data.get('group_by', 'unknown')}:")
+        lines.append(f"Total input: {data['total_input_tokens']}")
+        lines.append(f"Total output: {data['total_output_tokens']}")
+        lines.append("")
+        for item in data["breakdown"][:20]:
+            key = item.get("day") or item.get("session_id") or item.get("model")
+            lines.append(f"  {key}: {item['input_tokens']} in / {item['output_tokens']} out")
+
+    elif "sequences" in data:
+        lines.append("Common tool sequences:")
+        for seq in data.get("sequences", [])[:20]:
+            lines.append(f"  {seq['pattern']}: {seq['count']}")
+
+    elif "gaps" in data:
+        lines.append("Permission gaps (consider adding to settings.json):")
+        for gap in data.get("gaps", [])[:20]:
+            lines.append(f"  {gap['command']}: {gap['count']} uses -> {gap['suggestion']}")
+
+    elif "summary" in data:
+        lines.append("Insights summary:")
+        lines.append(f"  Tools: {data['summary']['total_tools']}")
+        lines.append(f"  Commands: {data['summary']['total_commands']}")
+        lines.append(f"  Sequences: {data['summary']['total_sequences']}")
+        lines.append(f"  Permission gaps: {data['summary']['permission_gaps_found']}")
+
+    elif "files_found" in data:
+        lines.append(f"Files found: {data['files_found']}")
+        lines.append(f"Files processed: {data['files_processed']}")
+        lines.append(f"Events added: {data['events_added']}")
+        lines.append(f"Sessions updated: {data.get('sessions_updated', 0)}")
+
+    elif "event_count" in data:
+        lines.append(f"Database: {data.get('db_path', 'unknown')}")
+        lines.append(f"Size: {data.get('db_size_bytes', 0) / 1024:.1f} KB")
+        lines.append(f"Events: {data['event_count']}")
+        lines.append(f"Sessions: {data['session_count']}")
+        lines.append(f"Patterns: {data.get('pattern_count', 0)}")
+        if data.get("earliest_event"):
+            lines.append(f"Date range: {data['earliest_event'][:10]} to {data['latest_event'][:10]}")
+
+    else:
+        return json.dumps(data, indent=2, default=str)
+
+    return "\n".join(lines)
+
+
+def cmd_status(args):
+    """Show database status."""
+    storage = SQLiteStorage()
+    stats = storage.get_db_stats()
+    last_ingest = storage.get_last_ingestion_time()
+
+    result = {
+        "last_ingestion": last_ingest.isoformat() if last_ingest else None,
+        **stats,
+    }
+    print(format_output(result, args.json))
+
+
+def cmd_ingest(args):
+    """Ingest log files."""
+    storage = SQLiteStorage()
+    result = ingest_logs(
+        storage,
+        days=args.days,
+        project=args.project,
+        force=args.force,
+    )
+    print(format_output(result, args.json))
+
+
+def cmd_frequency(args):
+    """Show tool frequency."""
+    storage = SQLiteStorage()
+    result = query_tool_frequency(storage, days=args.days, project=args.project)
+    print(format_output(result, args.json))
+
+
+def cmd_commands(args):
+    """Show command frequency."""
+    storage = SQLiteStorage()
+    result = query_commands(storage, days=args.days, project=args.project, prefix=args.prefix)
+    print(format_output(result, args.json))
+
+
+def cmd_sessions(args):
+    """Show session info."""
+    storage = SQLiteStorage()
+    result = query_sessions(storage, days=args.days, project=args.project)
+    print(format_output(result, args.json))
+
+
+def cmd_tokens(args):
+    """Show token usage."""
+    storage = SQLiteStorage()
+    result = query_tokens(storage, days=args.days, project=args.project, by=args.by)
+    print(format_output(result, args.json))
+
+
+def cmd_sequences(args):
+    """Show tool sequences."""
+    storage = SQLiteStorage()
+    patterns = compute_sequence_patterns(
+        storage, days=args.days, sequence_length=args.length, min_count=args.min_count
+    )
+    result = {
+        "days": args.days,
+        "sequences": [{"pattern": p.pattern_key, "count": p.count} for p in patterns],
+    }
+    print(format_output(result, args.json))
+
+
+def cmd_permissions(args):
+    """Show permission gaps."""
+    storage = SQLiteStorage()
+    patterns = compute_permission_gaps(storage, days=args.days, threshold=args.threshold)
+    result = {
+        "days": args.days,
+        "gaps": [
+            {"command": p.pattern_key, "count": p.count, "suggestion": p.metadata.get("suggestion", "")}
+            for p in patterns
+        ],
+    }
+    print(format_output(result, args.json))
+
+
+def cmd_insights(args):
+    """Show insights for /improve-workflow."""
+    storage = SQLiteStorage()
+    result = do_get_insights(storage, refresh=args.refresh, days=args.days)
+    print(format_output(result, args.json))
+
+
+def main():
+    """CLI entry point."""
+    parser = argparse.ArgumentParser(
+        description="Claude Session Analytics CLI",
+        prog="session-analytics-cli",
+    )
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # status
+    sub = subparsers.add_parser("status", help="Show database status")
+    sub.set_defaults(func=cmd_status)
+
+    # ingest
+    sub = subparsers.add_parser("ingest", help="Ingest log files")
+    sub.add_argument("--days", type=int, default=7, help="Days to look back (default: 7)")
+    sub.add_argument("--project", help="Project path filter")
+    sub.add_argument("--force", action="store_true", help="Force re-ingestion")
+    sub.set_defaults(func=cmd_ingest)
+
+    # frequency
+    sub = subparsers.add_parser("frequency", help="Show tool frequency")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--project", help="Project path filter")
+    sub.set_defaults(func=cmd_frequency)
+
+    # commands
+    sub = subparsers.add_parser("commands", help="Show command frequency")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--project", help="Project path filter")
+    sub.add_argument("--prefix", help="Command prefix filter (e.g., 'git')")
+    sub.set_defaults(func=cmd_commands)
+
+    # sessions
+    sub = subparsers.add_parser("sessions", help="Show session info")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--project", help="Project path filter")
+    sub.set_defaults(func=cmd_sessions)
+
+    # tokens
+    sub = subparsers.add_parser("tokens", help="Show token usage")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--project", help="Project path filter")
+    sub.add_argument("--by", choices=["day", "session", "model"], default="day", help="Group by")
+    sub.set_defaults(func=cmd_tokens)
+
+    # sequences
+    sub = subparsers.add_parser("sequences", help="Show tool sequences")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--min-count", type=int, default=3, help="Minimum occurrences")
+    sub.add_argument("--length", type=int, default=2, help="Sequence length")
+    sub.set_defaults(func=cmd_sequences)
+
+    # permissions
+    sub = subparsers.add_parser("permissions", help="Show permission gaps")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--threshold", type=int, default=5, help="Minimum usage count")
+    sub.set_defaults(func=cmd_permissions)
+
+    # insights
+    sub = subparsers.add_parser("insights", help="Show insights for /improve-workflow")
+    sub.add_argument("--days", type=int, default=7, help="Days to analyze (default: 7)")
+    sub.add_argument("--refresh", action="store_true", help="Force refresh patterns")
+    sub.set_defaults(func=cmd_insights)
+
+    args = parser.parse_args()
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()

From b08a5f04d6afe76b1194f52c2942c6b7062093d2 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 05:01:58 +0000
Subject: [PATCH 07/10] Add Phase 7: Polish and CLI tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add comprehensive CLI test coverage (15 tests)
- Fix format_output ordering for insights command
- Test all CLI commands: status, ingest, frequency, commands,
  sessions, tokens, sequences, permissions, insights
- Test both human-readable and JSON output modes

Closes #7

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/session_analytics/cli.py |  29 ++--
 tests/test_cli.py            | 288 +++++++++++++++++++++++++++++++++++
 tests/test_patterns.py       |   4 +-
 3 files changed, 308 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_cli.py

diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py
index fe41ff1..1767921 100644
--- a/src/session_analytics/cli.py
+++ b/src/session_analytics/cli.py
@@ -41,7 +41,9 @@ def format_output(data: dict, json_output: bool = False) -> str:
         # Session query result
         lines.append(f"Sessions: {data['session_count']}")
         lines.append(f"Total entries: {data['total_entries']}")
-        lines.append(f"Total tokens: {data.get('total_input_tokens', 0) + data.get('total_output_tokens', 0)}")
+        lines.append(
+            f"Total tokens: {data.get('total_input_tokens', 0) + data.get('total_output_tokens', 0)}"
+        )
 
     elif "breakdown" in data:
         lines.append(f"Token usage by {data.get('group_by', 'unknown')}:")
@@ -52,6 +54,14 @@ def format_output(data: dict, json_output: bool = False) -> str:
             key = item.get("day") or item.get("session_id") or item.get("model")
             lines.append(f"  {key}: {item['input_tokens']} in / {item['output_tokens']} out")
 
+    elif "summary" in data:
+        # get_insights output (has both summary and other keys)
+        lines.append("Insights summary:")
+        lines.append(f"  Tools: {data['summary']['total_tools']}")
+        lines.append(f"  Commands: {data['summary']['total_commands']}")
+        lines.append(f"  Sequences: {data['summary']['total_sequences']}")
+        lines.append(f"  Permission gaps: {data['summary']['permission_gaps_found']}")
+
     elif "sequences" in data:
         lines.append("Common tool sequences:")
         for seq in data.get("sequences", [])[:20]:
@@ -62,13 +72,6 @@ def format_output(data: dict, json_output: bool = False) -> str:
         for gap in data.get("gaps", [])[:20]:
             lines.append(f"  {gap['command']}: {gap['count']} uses -> {gap['suggestion']}")
 
-    elif "summary" in data:
-        lines.append("Insights summary:")
-        lines.append(f"  Tools: {data['summary']['total_tools']}")
-        lines.append(f"  Commands: {data['summary']['total_commands']}")
-        lines.append(f"  Sequences: {data['summary']['total_sequences']}")
-        lines.append(f"  Permission gaps: {data['summary']['permission_gaps_found']}")
-
     elif "files_found" in data:
         lines.append(f"Files found: {data['files_found']}")
         lines.append(f"Files processed: {data['files_processed']}")
@@ -82,7 +85,9 @@ def format_output(data: dict, json_output: bool = False) -> str:
         lines.append(f"Sessions: {data['session_count']}")
         lines.append(f"Patterns: {data.get('pattern_count', 0)}")
         if data.get("earliest_event"):
-            lines.append(f"Date range: {data['earliest_event'][:10]} to {data['latest_event'][:10]}")
+            lines.append(
+                f"Date range: {data['earliest_event'][:10]} to {data['latest_event'][:10]}"
+            )
 
     else:
         return json.dumps(data, indent=2, default=str)
@@ -163,7 +168,11 @@ def cmd_permissions(args):
     result = {
         "days": args.days,
         "gaps": [
-            {"command": p.pattern_key, "count": p.count, "suggestion": p.metadata.get("suggestion", "")}
+            {
+                "command": p.pattern_key,
+                "count": p.count,
+                "suggestion": p.metadata.get("suggestion", ""),
+            }
             for p in patterns
         ],
     }
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..317a4e4
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,288 @@
+"""Tests for the CLI module."""
+
+import tempfile
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from session_analytics.cli import (
+    cmd_commands,
+    cmd_frequency,
+    cmd_insights,
+    cmd_permissions,
+    cmd_sequences,
+    cmd_sessions,
+    cmd_status,
+    cmd_tokens,
+    format_output,
+)
+from session_analytics.storage import Event, Session, SQLiteStorage
+
+
+@pytest.fixture
+def storage():
+    """Create a temporary storage instance for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "test.db"
+        yield SQLiteStorage(db_path)
+
+
+@pytest.fixture
+def populated_storage(storage):
+    """Create a storage instance with sample data."""
+    now = datetime.now()
+
+    events = [
+        Event(
+            id=None,
+            uuid="e1",
+            timestamp=now - timedelta(hours=1),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Bash",
+            command="git",
+            input_tokens=100,
+            output_tokens=50,
+        ),
+        Event(
+            id=None,
+            uuid="e2",
+            timestamp=now - timedelta(hours=2),
+            session_id="s1",
+            project_path="-test",
+            entry_type="tool_use",
+            tool_name="Read",
+            input_tokens=80,
+            output_tokens=30,
+        ),
+    ]
+    storage.add_events_batch(events)
+
+    storage.upsert_session(
+        Session(
+            id="s1",
+            project_path="-test",
+            first_seen=now - timedelta(hours=2),
+            last_seen=now - timedelta(hours=1),
+            entry_count=2,
+            tool_use_count=2,
+            total_input_tokens=180,
+            total_output_tokens=80,
+        )
+    )
+
+    return storage
+
+
+class TestFormatOutput:
+    """Tests for output formatting."""
+
+    def test_json_output(self):
+        """Test JSON output mode."""
+        data = {"key": "value", "count": 42}
+        result = format_output(data, json_output=True)
+        assert '"key": "value"' in result
+        assert '"count": 42' in result
+
+    def test_tool_frequency_format(self):
+        """Test tool frequency formatting."""
+        data = {
+            "total_tool_calls": 100,
+            "tools": [
+                {"tool": "Bash", "count": 50},
+                {"tool": "Read", "count": 30},
+            ],
+        }
+        result = format_output(data)
+        assert "Total tool calls: 100" in result
+        assert "Bash: 50" in result
+        assert "Read: 30" in result
+
+    def test_command_frequency_format(self):
+        """Test command frequency formatting."""
+        data = {
+            "total_commands": 50,
+            "commands": [
+                {"command": "git", "count": 30},
+                {"command": "make", "count": 20},
+            ],
+        }
+        result = format_output(data)
+        assert "Total commands: 50" in result
+        assert "git: 30" in result
+
+    def test_status_format(self):
+        """Test status formatting."""
+        data = {
+            "db_path": "/path/to/db",
+            "db_size_bytes": 10240,
+            "event_count": 1000,
+            "session_count": 10,
+            "pattern_count": 50,
+            "earliest_event": "2025-01-01T00:00:00",
+            "latest_event": "2025-01-31T23:59:59",
+        }
+        result = format_output(data)
+        assert "Database:" in result
+        assert "Events: 1000" in result
+        assert "Sessions: 10" in result
+
+    def test_sessions_format(self):
+        """Test sessions formatting."""
+        data = {
+            "session_count": 5,
+            "total_entries": 100,
+            "total_input_tokens": 5000,
+            "total_output_tokens": 2500,
+        }
+        result = format_output(data)
+        assert "Sessions: 5" in result
+        assert "Total entries: 100" in result
+
+    def test_insights_format(self):
+        """Test insights formatting."""
+        data = {
+            "summary": {
+                "total_tools": 10,
+                "total_commands": 5,
+                "total_sequences": 3,
+                "permission_gaps_found": 2,
+            }
+        }
+        result = format_output(data)
+        assert "Insights summary:" in result
+        assert "Tools: 10" in result
+
+
+class TestCliCommands:
+    """Tests for CLI command functions."""
+
+    def test_cmd_status(self, populated_storage, capsys):
+        """Test status command."""
+
+        class Args:
+            json = False
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_status(Args())
+
+        captured = capsys.readouterr()
+        assert "Events:" in captured.out
+
+    def test_cmd_frequency(self, populated_storage, capsys):
+        """Test frequency command."""
+
+        class Args:
+            json = False
+            days = 7
+            project = None
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_frequency(Args())
+
+        captured = capsys.readouterr()
+        assert "Total tool calls:" in captured.out
+
+    def test_cmd_commands(self, populated_storage, capsys):
+        """Test commands command."""
+
+        class Args:
+            json = False
+            days = 7
+            project = None
+            prefix = None
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_commands(Args())
+
+        captured = capsys.readouterr()
+        assert "Total commands:" in captured.out
+
+    def test_cmd_sessions(self, populated_storage, capsys):
+        """Test sessions command."""
+
+        class Args:
+            json = False
+            days = 7
+            project = None
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_sessions(Args())
+
+        captured = capsys.readouterr()
+        assert "Sessions:" in captured.out
+
+    def test_cmd_tokens(self, populated_storage, capsys):
+        """Test tokens command."""
+
+        class Args:
+            json = False
+            days = 7
+            project = None
+            by = "day"
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_tokens(Args())
+
+        captured = capsys.readouterr()
+        assert "Token usage" in captured.out
+
+    def test_cmd_sequences(self, populated_storage, capsys):
+        """Test sequences command."""
+
+        class Args:
+            json = False
+            days = 7
+            min_count = 1
+            length = 2
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_sequences(Args())
+
+        captured = capsys.readouterr()
+        assert "Common tool sequences:" in captured.out
+
+    def test_cmd_permissions(self, populated_storage, capsys):
+        """Test permissions command."""
+
+        class Args:
+            json = False
+            days = 7
+            threshold = 1
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_permissions(Args())
+
+        captured = capsys.readouterr()
+        assert "Permission gaps" in captured.out
+
+    def test_cmd_insights(self, populated_storage, capsys):
+        """Test insights command."""
+
+        class Args:
+            json = False
+            days = 7
+            refresh = True
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_insights(Args())
+
+        captured = capsys.readouterr()
+        assert "Insights summary:" in captured.out
+
+    def test_json_output_mode(self, populated_storage, capsys):
+        """Test JSON output mode."""
+
+        class Args:
+            json = True
+            days = 7
+            project = None
+
+        with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
+            cmd_frequency(Args())
+
+        captured = capsys.readouterr()
+        assert '"total_tool_calls"' in captured.out
diff --git a/tests/test_patterns.py b/tests/test_patterns.py
index 0163271..aceefdf 100644
--- a/tests/test_patterns.py
+++ b/tests/test_patterns.py
@@ -237,9 +237,7 @@ def test_load_allowed_commands(self):
         """Test loading allowed commands from settings.json."""
         with tempfile.TemporaryDirectory() as tmpdir:
             settings_path = Path(tmpdir) / "settings.json"
-            settings_path.write_text(
-                '{"permissions": {"allow": ["Bash(git:*)", "Bash(make:*)"]}}'
-            )
+            settings_path.write_text('{"permissions": {"allow": ["Bash(git:*)", "Bash(make:*)"]}}')
             allowed = load_allowed_commands(settings_path)
             assert "git" in allowed
             assert "make" in allowed

From a32e2a448de352e5d31820b3a939c4098a87a444 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 05:09:52 +0000
Subject: [PATCH 08/10] Update CLAUDE.md and README.md with comprehensive docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Document all 10 MCP tools with descriptions
- Add CLI usage examples for all 9 commands
- Include example JSON output for key queries
- Document architecture, data model, and integration points
- Add development and installation instructions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CLAUDE.md | 102 ++++++++++++++++++++++--------
 README.md | 185 ++++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 212 insertions(+), 75 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 398c2e8..3d5536d 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,29 +1,36 @@
 # CLAUDE.md
 
-Queryable analytics for Claude Code session logs, exposed as an MCP server.
+Queryable analytics for Claude Code session logs, exposed as an MCP server and CLI.
 
 ## Project Overview
 
 This MCP server replaces the bash script `~/.claude/contrib/parse-session-logs.sh` with a persistent, queryable analytics layer. It parses JSONL session logs from `~/.claude/projects/` and provides:
 
-- **User-centric timeline**: Events across conversations, organized by timestamp
-- **Rich querying**: Tool frequency, command breakdown, sequences, permission gaps
-- **Persistent storage**: SQLite at `~/.claude/contrib/analytics/data.db`
-- **Auto-refresh**: Queries automatically refresh stale data (>5 min old)
-- **CLI access**: Full CLI for shell scripts and hooks
+- **Tool frequency analysis**: Which tools you use most (Read, Edit, Bash, etc.)
+- **Command breakdown**: Bash command patterns (git, make, cargo, etc.)
+- **Workflow sequences**: Common tool chains like Read → Edit → Bash
+- **Permission gap detection**: Commands that should be added to settings.json
+- **Token usage tracking**: Usage by day, session, or model
+- **Session timeline**: Events across conversations, organized by timestamp
 
 ## Architecture
 
-Follows the `claude-event-bus` pattern:
-- FastMCP for MCP server implementation
-- SQLite for persistence
-- LaunchAgent for always-on availability
-- CLI wrapper for shell access
+```
+~/.claude/projects/**/*.jsonl  →  SQLite DB  →  MCP Server / CLI
+                                     ↓
+                           ~/.claude/contrib/analytics/data.db
+```
+
+Key components:
+- **FastMCP** for MCP server implementation
+- **SQLite** for persistent storage with incremental ingestion
+- **Auto-refresh** queries automatically refresh stale data (>5 min old)
+- **LaunchAgent** for always-on availability (macOS)
 
 ## Commands
 
 ```bash
-make check      # Run fmt, lint, test
+make check      # Run fmt, lint, test (84 tests)
 make install    # Install LaunchAgent + CLI
 make uninstall  # Remove LaunchAgent + CLI
 make dev        # Run in dev mode with auto-reload
@@ -31,29 +38,70 @@ make dev        # Run in dev mode with auto-reload
 
 ## Key Files
 
-- `src/session_analytics/server.py` - MCP tools + entry point
-- `src/session_analytics/storage.py` - SQLite backend
-- `src/session_analytics/ingest.py` - JSONL parsing
-- `src/session_analytics/queries.py` - Query implementations
-- `src/session_analytics/patterns.py` - Pattern detection
+| File | Purpose |
+|------|---------|
+| `src/session_analytics/server.py` | MCP tools + HTTP server entry point |
+| `src/session_analytics/cli.py` | CLI commands (status, ingest, frequency, etc.) |
+| `src/session_analytics/storage.py` | SQLite backend with datetime handling |
+| `src/session_analytics/ingest.py` | JSONL parsing with incremental updates |
+| `src/session_analytics/queries.py` | Query implementations (timeline, tokens, etc.) |
+| `src/session_analytics/patterns.py` | Pattern detection (sequences, permission gaps) |
 
 ## MCP Tools
 
 | Tool | Purpose |
 |------|---------|
+| `get_status` | Database stats and last ingestion time |
 | `ingest_logs` | Refresh data from JSONL files |
-| `query_timeline` | Events in time window |
-| `query_tool_frequency` | Tool usage counts |
-| `query_commands` | Bash command breakdown |
-| `query_sequences` | Common tool patterns |
-| `query_permission_gaps` | Commands needing settings.json |
-| `query_sessions` | Session metadata |
-| `query_tokens` | Token usage analysis |
+| `query_tool_frequency` | Tool usage counts (Read, Edit, Bash, etc.) |
+| `query_timeline` | Events in time window with filtering |
+| `query_commands` | Bash command breakdown with prefix filter |
+| `query_sessions` | Session metadata and token totals |
+| `query_tokens` | Token usage by day, session, or model |
+| `query_sequences` | Common tool patterns (n-grams) |
+| `query_permission_gaps` | Commands needing settings.json entries |
 | `get_insights` | Pre-computed patterns for /improve-workflow |
-| `get_status` | Ingestion status + DB stats |
+
+## CLI Commands
+
+All commands support `--json` for machine-readable output:
+
+```bash
+session-analytics-cli status              # DB stats
+session-analytics-cli ingest --days 30    # Refresh data
+session-analytics-cli frequency           # Tool usage
+session-analytics-cli commands --prefix git  # Command breakdown
+session-analytics-cli sessions            # Session info
+session-analytics-cli tokens --by model   # Token usage
+session-analytics-cli sequences           # Tool chains
+session-analytics-cli permissions         # Permission gaps
+session-analytics-cli insights            # For /improve-workflow
+```
+
+## Integration
+
+### With /improve-workflow
+
+The `get_insights` tool (or `session-analytics-cli insights`) provides pre-computed patterns:
+- Tool frequency for identifying high-value automations
+- Command frequency for settings.json additions
+- Tool sequences for workflow optimization
+- Permission gaps with ready-to-use suggestions
+
+### With session-start hook
+
+Can be used to auto-ingest on session start:
+```bash
+session-analytics-cli ingest --days 1 --json 2>/dev/null || true
+```
+
+## Data Model
+
+**Events table**: Individual tool uses with timestamps, tokens, commands
+**Sessions table**: Aggregated session metadata
+**Patterns table**: Pre-computed patterns for fast querying
+**Ingested files table**: Tracks file mtime/size for incremental updates
 
 ## Reference
 
 Full implementation plan: `~/.claude/plans/precious-crunching-crescent.md`
-
-Reference implementation: `~/Documents/projects/claude-event-bus/`
diff --git a/README.md b/README.md
index d8169ab..858d0b7 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,19 @@
 # Claude Session Analytics
 
-MCP server for queryable analytics on Claude Code session logs.
+MCP server and CLI for queryable analytics on Claude Code session logs.
 
-## Overview
+## What It Does
 
-Replaces `parse-session-logs.sh` with a persistent, queryable analytics layer. Parses JSONL session logs from `~/.claude/projects/` and provides:
+Parses your Claude Code session logs (`~/.claude/projects/**/*.jsonl`) and provides:
 
-- **User-centric timeline**: Events across conversations, organized by timestamp
-- **Rich querying**: Tool frequency, command breakdown, sequences, permission gaps
-- **Persistent storage**: SQLite at `~/.claude/contrib/analytics/data.db`
-- **Auto-refresh**: Queries automatically refresh stale data (>5 min old)
-- **CLI access**: Full CLI for shell scripts and hooks
+- **Tool frequency** - Which tools you use most (Read, Edit, Bash, Grep, etc.)
+- **Command breakdown** - Bash command patterns (git, make, cargo, npm, etc.)
+- **Workflow sequences** - Common tool chains like `Read → Edit → Bash`
+- **Permission gaps** - Commands that should be added to settings.json
+- **Token usage** - Usage breakdown by day, session, or model
+- **Session timeline** - Events across conversations with filtering
+
+Data is stored persistently in SQLite and auto-refreshes when stale (>5 min old).
 
 ## Installation
 
@@ -19,75 +22,161 @@ make install
 ```
 
 This will:
-1. Create a virtual environment
-2. Install dependencies
-3. Set up a LaunchAgent for auto-start
-4. Add the MCP server to Claude Code
-
-## Development
-
-```bash
-make dev        # Install dev dependencies
-./scripts/dev.sh  # Run in dev mode with auto-reload
-```
-
-## Commands
-
-```bash
-make check      # Run fmt, lint, test
-make install    # Install LaunchAgent + CLI
-make uninstall  # Remove LaunchAgent + CLI
-```
+1. Create a virtual environment and install dependencies
+2. Set up a LaunchAgent for auto-start (macOS)
+3. Add the MCP server to Claude Code
+4. Install the CLI to your path
 
 ## CLI Usage
 
-The CLI provides direct access to analytics from the command line:
-
 ```bash
-# Database status
+# Database status and stats
 session-analytics-cli status
 
-# Ingest log files
+# Ingest/refresh log data
 session-analytics-cli ingest --days 7
 
-# Tool frequency
+# Tool frequency (which tools you use most)
 session-analytics-cli frequency --days 30
 
 # Bash command breakdown
-session-analytics-cli commands --prefix git
+session-analytics-cli commands
+session-analytics-cli commands --prefix git    # Just git commands
 
-# Session info
+# Session info and token totals
 session-analytics-cli sessions
 
-# Token usage by day/session/model
+# Token usage analysis
+session-analytics-cli tokens --by day
+session-analytics-cli tokens --by session
 session-analytics-cli tokens --by model
 
-# Tool sequences
-session-analytics-cli sequences --min-count 3
+# Common tool sequences (workflow patterns)
+session-analytics-cli sequences --min-count 5 --length 3
 
-# Permission gaps (commands needing settings.json)
-session-analytics-cli permissions --threshold 5
+# Permission gaps (commands that need settings.json)
+session-analytics-cli permissions --threshold 10
 
-# Insights for /improve-workflow
+# Full insights for /improve-workflow
 session-analytics-cli insights --refresh
 ```
 
-All commands support `--json` for machine-readable output.
+All commands support:
+- `--json` for machine-readable output
+- `--days N` to specify time range (default: 7)
+- `--project PATH` to filter by project
 
 ## MCP Tools
 
-| Tool | Purpose |
-|------|---------|
+When running as an MCP server, these tools are available:
+
+| Tool | Description |
+|------|-------------|
+| `get_status` | Database stats and last ingestion time |
 | `ingest_logs` | Refresh data from JSONL files |
-| `query_timeline` | Events in time window |
 | `query_tool_frequency` | Tool usage counts |
+| `query_timeline` | Events in time window with filtering |
 | `query_commands` | Bash command breakdown |
-| `query_sequences` | Common tool patterns |
+| `query_sessions` | Session metadata and totals |
+| `query_tokens` | Token usage by day/session/model |
+| `query_sequences` | Common tool patterns (n-grams) |
 | `query_permission_gaps` | Commands needing settings.json |
-| `query_sessions` | Session metadata |
-| `query_tokens` | Token usage analysis |
 | `get_insights` | Pre-computed patterns for /improve-workflow |
-| `get_status` | Ingestion status + DB stats |
+
+### Example: query_tool_frequency
+
+```json
+{
+  "days": 7,
+  "total_tool_calls": 1523,
+  "tools": [
+    {"tool": "Read", "count": 423},
+    {"tool": "Bash", "count": 312},
+    {"tool": "Edit", "count": 289},
+    {"tool": "Grep", "count": 156}
+  ]
+}
+```
+
+### Example: query_permission_gaps
+
+```json
+{
+  "gaps": [
+    {
+      "command": "npm",
+      "count": 47,
+      "suggestion": "Bash(npm:*)"
+    },
+    {
+      "command": "docker",
+      "count": 23,
+      "suggestion": "Bash(docker:*)"
+    }
+  ]
+}
+```
+
+### Example: query_sequences
+
+```json
+{
+  "sequences": [
+    {"pattern": "Read → Edit", "count": 156},
+    {"pattern": "Grep → Read", "count": 89},
+    {"pattern": "Edit → Bash", "count": 67},
+    {"pattern": "Read → Edit → Bash", "count": 45}
+  ]
+}
+```
+
+## Integration with /improve-workflow
+
+The `get_insights` tool returns pre-computed patterns optimized for the `/improve-workflow` command:
+
+```bash
+session-analytics-cli insights --refresh
+```
+
+Returns:
+- Tool frequency for identifying high-value automations
+- Command frequency for settings.json additions
+- Tool sequences for workflow optimization
+- Permission gaps with ready-to-use `Bash(cmd:*)` suggestions
+
+## Development
+
+```bash
+# Install dev dependencies
+make dev
+
+# Run in dev mode with auto-reload
+./scripts/dev.sh
+
+# Run checks (format, lint, test)
+make check
+
+# Run tests only
+.venv/bin/pytest tests/ -v
+```
+
+## Data Location
+
+- **Database**: `~/.claude/contrib/analytics/data.db`
+- **Logs parsed from**: `~/.claude/projects/**/*.jsonl`
+
+## How It Works
+
+1. **Ingestion**: Parses JSONL session logs incrementally (tracks file mtime/size)
+2. **Storage**: SQLite database with events, sessions, and patterns tables
+3. **Auto-refresh**: Queries detect stale data (>5 min) and trigger re-ingestion
+4. **Patterns**: Pre-computes tool sequences and permission gaps for fast queries
+
+## Uninstall
+
+```bash
+make uninstall
+```
 
 ## License
 

From 4d80d0cdcac1ae9d501874ee07f9b677b22da04e Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 05:40:33 +0000
Subject: [PATCH 09/10] Refactor codebase per RFC #18 audit findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements all 12 findings from the codebase audit:

**P1 - Encapsulation:**
- Add execute_query() and execute_write() public methods to SQLiteStorage
- Migrate 8 locations (4 in queries.py, 4 in patterns.py) to use public API

**P2 - Code Quality:**
- Add build_where_clause() helper to reduce query duplication
- Refactor format_output() to use formatter registry pattern
- Change to module-qualified imports in server.py
- Read version from importlib.metadata instead of hardcoding

**P3 - Future Extensibility:**
- Add schema migration framework with @migration decorator
- Define __all__ exports in __init__.py

**P4 - Polish:**
- Remove empty pass branch in ingest.py
- Document timestamp handling with clear comments
- Add CLI epilog with examples
- Update CLAUDE.md and README.md with architecture patterns

Closes #18

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/settings.local.json       |  15 +-
 CLAUDE.md                         |  13 +-
 README.md                         |  11 +
 src/session_analytics/__init__.py |  27 +-
 src/session_analytics/cli.py      | 199 +++++++-----
 src/session_analytics/ingest.py   |  11 +-
 src/session_analytics/patterns.py | 256 ++++++++-------
 src/session_analytics/queries.py  | 495 +++++++++++++++---------------
 src/session_analytics/server.py   |  57 ++--
 src/session_analytics/storage.py  |  82 ++++-
 10 files changed, 677 insertions(+), 489 deletions(-)

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 65a0653..725fe3d 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -12,7 +12,20 @@
       "Bash(./scripts/install-launchagent.sh:*)",
       "Bash(claude mcp add:*)",
       "Bash(curl:*)",
-      "Bash(cat:*)"
+      "Bash(cat:*)",
+      "Bash(python3:*)",
+      "Bash(.venv/bin/ruff check . --fix)",
+      "Bash(.venv/bin/pytest tests/test_server.py -v)",
+      "Bash(.venv/bin/python:*)",
+      "Bash(.venv/bin/pytest:*)",
+      "Bash(.venv/bin/ruff check /Users/evansenter/Documents/projects/claude-session-analytics/src --select=F401,F841)",
+      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics log --oneline -20)",
+      "Skill(work)",
+      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics checkout -b issue-18-refactor)",
+      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics log --oneline -15)",
+      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics branch:*)",
+      "Bash(.venv/bin/session-analytics-cli:*)",
+      "Bash(wc:*)"
     ]
   }
 }
diff --git a/CLAUDE.md b/CLAUDE.md
index 3d5536d..b8ed8a8 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -41,12 +41,19 @@ make dev        # Run in dev mode with auto-reload
 | File | Purpose |
 |------|---------|
 | `src/session_analytics/server.py` | MCP tools + HTTP server entry point |
-| `src/session_analytics/cli.py` | CLI commands (status, ingest, frequency, etc.) |
-| `src/session_analytics/storage.py` | SQLite backend with datetime handling |
+| `src/session_analytics/cli.py` | CLI with formatter registry for output |
+| `src/session_analytics/storage.py` | SQLite backend with migration support |
 | `src/session_analytics/ingest.py` | JSONL parsing with incremental updates |
-| `src/session_analytics/queries.py` | Query implementations (timeline, tokens, etc.) |
+| `src/session_analytics/queries.py` | Query implementations with `build_where_clause()` helper |
 | `src/session_analytics/patterns.py` | Pattern detection (sequences, permission gaps) |
 
+## Architecture Patterns
+
+- **Public API**: Use `storage.execute_query()` / `execute_write()` for raw SQL; avoid `_connect()`
+- **Formatter Registry**: CLI uses `@_register_formatter(predicate)` decorator pattern
+- **Schema Migrations**: Use `@migration(version, name)` decorator in storage.py for DB changes
+- **Module Imports**: server.py uses `from session_analytics import queries, patterns, ingest`
+
 ## MCP Tools
 
 | Tool | Purpose |
diff --git a/README.md b/README.md
index 858d0b7..247b2f4 100644
--- a/README.md
+++ b/README.md
@@ -172,6 +172,17 @@ make check
 3. **Auto-refresh**: Queries detect stale data (>5 min) and trigger re-ingestion
 4. **Patterns**: Pre-computes tool sequences and permission gaps for fast queries
 
+## Architecture
+
+Key patterns used in the codebase:
+
+- **Public Storage API**: Use `storage.execute_query()` for reads, `execute_write()` for writes
+- **Query Helpers**: `build_where_clause()` reduces duplication across query functions
+- **Formatter Registry**: CLI uses `@_register_formatter(predicate)` for extensible output formatting
+- **Schema Migrations**: `@migration(version, name)` decorator for future DB schema changes
+
+See `CLAUDE.md` for more details on contributing.
+
 ## Uninstall
 
 ```bash
diff --git a/src/session_analytics/__init__.py b/src/session_analytics/__init__.py
index 345cbea..affffca 100644
--- a/src/session_analytics/__init__.py
+++ b/src/session_analytics/__init__.py
@@ -1,3 +1,28 @@
 """Claude Session Analytics - MCP server for queryable session log analytics."""
 
-__version__ = "0.1.0"
+from importlib.metadata import version
+
+try:
+    __version__ = version("claude-session-analytics")
+except Exception:
+    __version__ = "0.1.0"  # Fallback for development
+
+# Re-export public API
+from session_analytics.storage import (
+    Event,
+    IngestionState,
+    Pattern,
+    Session,
+    SQLiteStorage,
+)
+
+__all__ = [
+    # Version
+    "__version__",
+    # Storage
+    "SQLiteStorage",
+    "Event",
+    "Session",
+    "Pattern",
+    "IngestionState",
+]
diff --git a/src/session_analytics/cli.py b/src/session_analytics/cli.py
index 1767921..9b5152c 100644
--- a/src/session_analytics/cli.py
+++ b/src/session_analytics/cli.py
@@ -14,85 +14,125 @@
 )
 from session_analytics.storage import SQLiteStorage
 
+# Formatter registry: list of (predicate, formatter) tuples
+# Each predicate checks if this formatter can handle the data
+# Order matters - first match wins
+_FORMATTERS: list[tuple[callable, callable]] = []
+
+
+def _register_formatter(predicate: callable):
+    """Decorator to register a formatter with its predicate."""
+
+    def decorator(formatter: callable):
+        _FORMATTERS.append((predicate, formatter))
+        return formatter
+
+    return decorator
+
+
+@_register_formatter(lambda d: "total_tool_calls" in d)
+def _format_tool_frequency(data: dict) -> list[str]:
+    lines = [f"Total tool calls: {data['total_tool_calls']}", "", "Tool frequency:"]
+    for tool in data.get("tools", [])[:20]:
+        lines.append(f"  {tool['tool']}: {tool['count']}")
+    return lines
+
+
+@_register_formatter(lambda d: "total_commands" in d)
+def _format_commands(data: dict) -> list[str]:
+    lines = [f"Total commands: {data['total_commands']}", "", "Command frequency:"]
+    for cmd in data.get("commands", [])[:20]:
+        lines.append(f"  {cmd['command']}: {cmd['count']}")
+    return lines
+
+
+@_register_formatter(lambda d: "session_count" in d and "total_entries" in d)
+def _format_sessions(data: dict) -> list[str]:
+    total_tokens = data.get("total_input_tokens", 0) + data.get("total_output_tokens", 0)
+    return [
+        f"Sessions: {data['session_count']}",
+        f"Total entries: {data['total_entries']}",
+        f"Total tokens: {total_tokens}",
+    ]
+
+
+@_register_formatter(lambda d: "breakdown" in d)
+def _format_tokens(data: dict) -> list[str]:
+    lines = [
+        f"Token usage by {data.get('group_by', 'unknown')}:",
+        f"Total input: {data['total_input_tokens']}",
+        f"Total output: {data['total_output_tokens']}",
+        "",
+    ]
+    for item in data["breakdown"][:20]:
+        key = item.get("day") or item.get("session_id") or item.get("model")
+        lines.append(f"  {key}: {item['input_tokens']} in / {item['output_tokens']} out")
+    return lines
+
+
+@_register_formatter(lambda d: "summary" in d)
+def _format_insights(data: dict) -> list[str]:
+    return [
+        "Insights summary:",
+        f"  Tools: {data['summary']['total_tools']}",
+        f"  Commands: {data['summary']['total_commands']}",
+        f"  Sequences: {data['summary']['total_sequences']}",
+        f"  Permission gaps: {data['summary']['permission_gaps_found']}",
+    ]
+
+
+@_register_formatter(lambda d: "sequences" in d)
+def _format_sequences(data: dict) -> list[str]:
+    lines = ["Common tool sequences:"]
+    for seq in data.get("sequences", [])[:20]:
+        lines.append(f"  {seq['pattern']}: {seq['count']}")
+    return lines
+
+
+@_register_formatter(lambda d: "gaps" in d)
+def _format_gaps(data: dict) -> list[str]:
+    lines = ["Permission gaps (consider adding to settings.json):"]
+    for gap in data.get("gaps", [])[:20]:
+        lines.append(f"  {gap['command']}: {gap['count']} uses -> {gap['suggestion']}")
+    return lines
+
+
+@_register_formatter(lambda d: "files_found" in d)
+def _format_ingest(data: dict) -> list[str]:
+    return [
+        f"Files found: {data['files_found']}",
+        f"Files processed: {data['files_processed']}",
+        f"Events added: {data['events_added']}",
+        f"Sessions updated: {data.get('sessions_updated', 0)}",
+    ]
+
+
+@_register_formatter(lambda d: "event_count" in d)
+def _format_status(data: dict) -> list[str]:
+    lines = [
+        f"Database: {data.get('db_path', 'unknown')}",
+        f"Size: {data.get('db_size_bytes', 0) / 1024:.1f} KB",
+        f"Events: {data['event_count']}",
+        f"Sessions: {data['session_count']}",
+        f"Patterns: {data.get('pattern_count', 0)}",
+    ]
+    if data.get("earliest_event"):
+        lines.append(f"Date range: {data['earliest_event'][:10]} to {data['latest_event'][:10]}")
+    return lines
+
 
 def format_output(data: dict, json_output: bool = False) -> str:
     """Format output as JSON or human-readable."""
     if json_output:
         return json.dumps(data, indent=2, default=str)
 
-    # Human-readable formatting based on data type
-    lines = []
-
-    if "total_tool_calls" in data:
-        lines.append(f"Total tool calls: {data['total_tool_calls']}")
-        lines.append("")
-        lines.append("Tool frequency:")
-        for tool in data.get("tools", [])[:20]:
-            lines.append(f"  {tool['tool']}: {tool['count']}")
-
-    elif "total_commands" in data:
-        lines.append(f"Total commands: {data['total_commands']}")
-        lines.append("")
-        lines.append("Command frequency:")
-        for cmd in data.get("commands", [])[:20]:
-            lines.append(f"  {cmd['command']}: {cmd['count']}")
-
-    elif "session_count" in data and "total_entries" in data:
-        # Session query result
-        lines.append(f"Sessions: {data['session_count']}")
-        lines.append(f"Total entries: {data['total_entries']}")
-        lines.append(
-            f"Total tokens: {data.get('total_input_tokens', 0) + data.get('total_output_tokens', 0)}"
-        )
-
-    elif "breakdown" in data:
-        lines.append(f"Token usage by {data.get('group_by', 'unknown')}:")
-        lines.append(f"Total input: {data['total_input_tokens']}")
-        lines.append(f"Total output: {data['total_output_tokens']}")
-        lines.append("")
-        for item in data["breakdown"][:20]:
-            key = item.get("day") or item.get("session_id") or item.get("model")
-            lines.append(f"  {key}: {item['input_tokens']} in / {item['output_tokens']} out")
-
-    elif "summary" in data:
-        # get_insights output (has both summary and other keys)
-        lines.append("Insights summary:")
-        lines.append(f"  Tools: {data['summary']['total_tools']}")
-        lines.append(f"  Commands: {data['summary']['total_commands']}")
-        lines.append(f"  Sequences: {data['summary']['total_sequences']}")
-        lines.append(f"  Permission gaps: {data['summary']['permission_gaps_found']}")
-
-    elif "sequences" in data:
-        lines.append("Common tool sequences:")
-        for seq in data.get("sequences", [])[:20]:
-            lines.append(f"  {seq['pattern']}: {seq['count']}")
-
-    elif "gaps" in data:
-        lines.append("Permission gaps (consider adding to settings.json):")
-        for gap in data.get("gaps", [])[:20]:
-            lines.append(f"  {gap['command']}: {gap['count']} uses -> {gap['suggestion']}")
-
-    elif "files_found" in data:
-        lines.append(f"Files found: {data['files_found']}")
-        lines.append(f"Files processed: {data['files_processed']}")
-        lines.append(f"Events added: {data['events_added']}")
-        lines.append(f"Sessions updated: {data.get('sessions_updated', 0)}")
-
-    elif "event_count" in data:
-        lines.append(f"Database: {data.get('db_path', 'unknown')}")
-        lines.append(f"Size: {data.get('db_size_bytes', 0) / 1024:.1f} KB")
-        lines.append(f"Events: {data['event_count']}")
-        lines.append(f"Sessions: {data['session_count']}")
-        lines.append(f"Patterns: {data.get('pattern_count', 0)}")
-        if data.get("earliest_event"):
-            lines.append(
-                f"Date range: {data['earliest_event'][:10]} to {data['latest_event'][:10]}"
-            )
-
-    else:
-        return json.dumps(data, indent=2, default=str)
+    # Find matching formatter from registry
+    for predicate, formatter in _FORMATTERS:
+        if predicate(data):
+            return "\n".join(formatter(data))
 
-    return "\n".join(lines)
+    # Fallback to JSON if no formatter matches
+    return json.dumps(data, indent=2, default=str)
 
 
 def cmd_status(args):
@@ -188,9 +228,22 @@ def cmd_insights(args):
 
 def main():
     """CLI entry point."""
+    epilog = """
+Examples:
+  session-analytics-cli status              # Database stats
+  session-analytics-cli frequency --days 30 # Tool usage last 30 days
+  session-analytics-cli commands --prefix git  # Git commands only
+  session-analytics-cli tokens --by model   # Token usage by model
+  session-analytics-cli permissions         # Commands needing settings.json
+
+All commands support --json for machine-readable output.
+Data location: ~/.claude/contrib/analytics/data.db
+"""
     parser = argparse.ArgumentParser(
-        description="Claude Session Analytics CLI",
+        description="Claude Session Analytics CLI - Analyze your Claude Code usage patterns",
         prog="session-analytics-cli",
+        epilog=epilog,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
     )
     parser.add_argument("--json", action="store_true", help="Output as JSON")
     subparsers = parser.add_subparsers(dest="command", required=True)
diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py
index 6ab5ad8..9e15170 100644
--- a/src/session_analytics/ingest.py
+++ b/src/session_analytics/ingest.py
@@ -91,10 +91,7 @@ def parse_tool_use(tool_use: dict) -> dict:
     elif tool_name == "Skill":
         result["skill_name"] = tool_input.get("skill")
 
-    # Handle MCP tools (e.g., mcp__event-bus__register_session)
-    elif tool_name and tool_name.startswith("mcp__"):
-        # Keep the full name for MCP tools
-        pass
+    # Note: MCP tools (mcp__*) don't need special extraction - full name is preserved
 
     return result
 
@@ -129,9 +126,13 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]:
     if not uuid or not session_id or not timestamp_str:
         return []
 
+    # Parse timestamp from Claude Code JSONL format:
+    # - Input format: ISO 8601 with "Z" suffix (e.g., "2024-12-15T10:30:00.000Z")
+    # - We replace "Z" with "+00:00" for Python's fromisoformat() compatibility
+    # - We then strip timezone info to store as naive datetime in SQLite
+    # - This ensures consistent ordering and comparison without timezone complexity
     try:
         timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
-        # Convert to naive datetime (remove timezone for SQLite compatibility)
         timestamp = timestamp.replace(tzinfo=None)
     except (ValueError, AttributeError):
         logger.debug(f"Could not parse timestamp: {timestamp_str}")
diff --git a/src/session_analytics/patterns.py b/src/session_analytics/patterns.py
index bc0e5cd..7e27484 100644
--- a/src/session_analytics/patterns.py
+++ b/src/session_analytics/patterns.py
@@ -30,33 +30,32 @@ def compute_tool_frequency_patterns(
     cutoff = datetime.now() - timedelta(days=days)
     now = datetime.now()
 
-    with storage._connect() as conn:
-        rows = conn.execute(
-            """
-            SELECT tool_name, COUNT(*) as count, MAX(timestamp) as last_seen
-            FROM events
-            WHERE timestamp >= ? AND tool_name IS NOT NULL
-            GROUP BY tool_name
-            ORDER BY count DESC
-            """,
-            (cutoff,),
-        ).fetchall()
-
-        patterns = []
-        for row in rows:
-            patterns.append(
-                Pattern(
-                    id=None,
-                    pattern_type="tool_frequency",
-                    pattern_key=row["tool_name"],
-                    count=row["count"],
-                    last_seen=row["last_seen"],
-                    metadata={},
-                    computed_at=now,
-                )
+    rows = storage.execute_query(
+        """
+        SELECT tool_name, COUNT(*) as count, MAX(timestamp) as last_seen
+        FROM events
+        WHERE timestamp >= ? AND tool_name IS NOT NULL
+        GROUP BY tool_name
+        ORDER BY count DESC
+        """,
+        (cutoff,),
+    )
+
+    patterns = []
+    for row in rows:
+        patterns.append(
+            Pattern(
+                id=None,
+                pattern_type="tool_frequency",
+                pattern_key=row["tool_name"],
+                count=row["count"],
+                last_seen=row["last_seen"],
+                metadata={},
+                computed_at=now,
             )
+        )
 
-        return patterns
+    return patterns
 
 
 def compute_command_patterns(
@@ -75,33 +74,32 @@ def compute_command_patterns(
     cutoff = datetime.now() - timedelta(days=days)
     now = datetime.now()
 
-    with storage._connect() as conn:
-        rows = conn.execute(
-            """
-            SELECT command, COUNT(*) as count, MAX(timestamp) as last_seen
-            FROM events
-            WHERE timestamp >= ? AND tool_name = 'Bash' AND command IS NOT NULL
-            GROUP BY command
-            ORDER BY count DESC
-            """,
-            (cutoff,),
-        ).fetchall()
-
-        patterns = []
-        for row in rows:
-            patterns.append(
-                Pattern(
-                    id=None,
-                    pattern_type="command_frequency",
-                    pattern_key=row["command"],
-                    count=row["count"],
-                    last_seen=row["last_seen"],
-                    metadata={},
-                    computed_at=now,
-                )
+    rows = storage.execute_query(
+        """
+        SELECT command, COUNT(*) as count, MAX(timestamp) as last_seen
+        FROM events
+        WHERE timestamp >= ? AND tool_name = 'Bash' AND command IS NOT NULL
+        GROUP BY command
+        ORDER BY count DESC
+        """,
+        (cutoff,),
+    )
+
+    patterns = []
+    for row in rows:
+        patterns.append(
+            Pattern(
+                id=None,
+                pattern_type="command_frequency",
+                pattern_key=row["command"],
+                count=row["count"],
+                last_seen=row["last_seen"],
+                metadata={},
+                computed_at=now,
             )
+        )
 
-        return patterns
+    return patterns
 
 
 def compute_sequence_patterns(
@@ -124,60 +122,59 @@ def compute_sequence_patterns(
     cutoff = datetime.now() - timedelta(days=days)
     now = datetime.now()
 
-    with storage._connect() as conn:
-        # Get all tool events ordered by session and timestamp
-        rows = conn.execute(
-            """
-            SELECT session_id, tool_name, timestamp
-            FROM events
-            WHERE timestamp >= ? AND tool_name IS NOT NULL
-            ORDER BY session_id, timestamp
-            """,
-            (cutoff,),
-        ).fetchall()
-
-        # Group by session and extract sequences
-        sequences: Counter = Counter()
-        current_session = None
-        session_tools: list[str] = []
-
-        for row in rows:
-            if row["session_id"] != current_session:
-                # Process previous session
-                if len(session_tools) >= sequence_length:
-                    for i in range(len(session_tools) - sequence_length + 1):
-                        seq = tuple(session_tools[i : i + sequence_length])
-                        sequences[seq] += 1
-
-                current_session = row["session_id"]
-                session_tools = []
-
-            session_tools.append(row["tool_name"])
-
-        # Process last session
-        if len(session_tools) >= sequence_length:
-            for i in range(len(session_tools) - sequence_length + 1):
-                seq = tuple(session_tools[i : i + sequence_length])
-                sequences[seq] += 1
-
-        # Create patterns for sequences meeting min_count
-        patterns = []
-        for seq, count in sequences.most_common():
-            if count < min_count:
-                break
-            patterns.append(
-                Pattern(
-                    id=None,
-                    pattern_type="tool_sequence",
-                    pattern_key=" → ".join(seq),
-                    count=count,
-                    last_seen=now,
-                    metadata={"sequence": list(seq)},
-                    computed_at=now,
-                )
+    # Get all tool events ordered by session and timestamp
+    rows = storage.execute_query(
+        """
+        SELECT session_id, tool_name, timestamp
+        FROM events
+        WHERE timestamp >= ? AND tool_name IS NOT NULL
+        ORDER BY session_id, timestamp
+        """,
+        (cutoff,),
+    )
+
+    # Group by session and extract sequences
+    sequences: Counter = Counter()
+    current_session = None
+    session_tools: list[str] = []
+
+    for row in rows:
+        if row["session_id"] != current_session:
+            # Process previous session
+            if len(session_tools) >= sequence_length:
+                for i in range(len(session_tools) - sequence_length + 1):
+                    seq = tuple(session_tools[i : i + sequence_length])
+                    sequences[seq] += 1
+
+            current_session = row["session_id"]
+            session_tools = []
+
+        session_tools.append(row["tool_name"])
+
+    # Process last session
+    if len(session_tools) >= sequence_length:
+        for i in range(len(session_tools) - sequence_length + 1):
+            seq = tuple(session_tools[i : i + sequence_length])
+            sequences[seq] += 1
+
+    # Create patterns for sequences meeting min_count
+    patterns = []
+    for seq, count in sequences.most_common():
+        if count < min_count:
+            break
+        patterns.append(
+            Pattern(
+                id=None,
+                pattern_type="tool_sequence",
+                pattern_key=" → ".join(seq),
+                count=count,
+                last_seen=now,
+                metadata={"sequence": list(seq)},
+                computed_at=now,
             )
+        )
 
-        return patterns
+    return patterns
 
 
 def load_allowed_commands(settings_path: Path = DEFAULT_SETTINGS_PATH) -> set[str]:
@@ -233,36 +230,35 @@ def compute_permission_gaps(
 
     allowed_commands = load_allowed_commands(settings_path)
 
-    with storage._connect() as conn:
-        rows = conn.execute(
-            """
-            SELECT command, COUNT(*) as count
-            FROM events
-            WHERE timestamp >= ? AND tool_name = 'Bash' AND command IS NOT NULL
-            GROUP BY command
-            HAVING COUNT(*) >= ?
-            ORDER BY count DESC
-            """,
-            (cutoff, threshold),
-        ).fetchall()
-
-        patterns = []
-        for row in rows:
-            cmd = row["command"]
-            if cmd not in allowed_commands:
-                patterns.append(
-                    Pattern(
-                        id=None,
-                        pattern_type="permission_gap",
-                        pattern_key=cmd,
-                        count=row["count"],
-                        last_seen=now,
-                        metadata={"suggestion": f"Bash({cmd}:*)"},
-                        computed_at=now,
-                    )
+    rows = storage.execute_query(
+        """
+        SELECT command, COUNT(*) as count
+        FROM events
+        WHERE timestamp >= ? AND tool_name = 'Bash' AND command IS NOT NULL
+        GROUP BY command
+        HAVING COUNT(*) >= ?
+        ORDER BY count DESC
+        """,
+        (cutoff, threshold),
+    )
+
+    patterns = []
+    for row in rows:
+        cmd = row["command"]
+        if cmd not in allowed_commands:
+            patterns.append(
+                Pattern(
+                    id=None,
+                    pattern_type="permission_gap",
+                    pattern_key=cmd,
+                    count=row["count"],
+                    last_seen=now,
+                    metadata={"suggestion": f"Bash({cmd}:*)"},
+                    computed_at=now,
                 )
+            )
 
-        return patterns
+    return patterns
 
 
 def compute_all_patterns(
diff --git a/src/session_analytics/queries.py b/src/session_analytics/queries.py
index 51e0e09..d02a04e 100644
--- a/src/session_analytics/queries.py
+++ b/src/session_analytics/queries.py
@@ -5,6 +5,41 @@
 from session_analytics.storage import SQLiteStorage
 
 
+def build_where_clause(
+    cutoff: datetime | None = None,
+    cutoff_column: str = "timestamp",
+    project: str | None = None,
+    extra_conditions: list[str] | None = None,
+) -> tuple[str, list]:
+    """Build a WHERE clause with common query filters.
+
+    Args:
+        cutoff: Datetime for cutoff filter (>= comparison)
+        cutoff_column: Column name for cutoff (default: "timestamp")
+        project: Optional project path filter (LIKE %project%)
+        extra_conditions: Additional WHERE conditions to include
+
+    Returns:
+        Tuple of (where_clause_string, params_list)
+    """
+    conditions = []
+    params: list = []
+
+    if cutoff:
+        conditions.append(f"{cutoff_column} >= ?")
+        params.append(cutoff)
+
+    if project:
+        conditions.append("project_path LIKE ?")
+        params.append(f"%{project}%")
+
+    if extra_conditions:
+        conditions.extend(extra_conditions)
+
+    where_clause = " AND ".join(conditions) if conditions else "1=1"
+    return where_clause, params
+
+
 def ensure_fresh_data(
     storage: SQLiteStorage,
     max_age_minutes: int = 5,
@@ -56,40 +91,32 @@ def query_tool_frequency(
         Dict with tool frequency breakdown
     """
     cutoff = datetime.now() - timedelta(days=days)
+    where_clause, params = build_where_clause(
+        cutoff=cutoff,
+        project=project,
+        extra_conditions=["tool_name IS NOT NULL"],
+    )
 
-    with storage._connect() as conn:
-        conditions = ["timestamp >= ?", "tool_name IS NOT NULL"]
-        params: list = [cutoff]
-
-        if project:
-            conditions.append("project_path LIKE ?")
-            params.append(f"%{project}%")
-
-        where_clause = " AND ".join(conditions)
-
-        # Get tool frequency counts
-        rows = conn.execute(
-            f"""
-            SELECT tool_name, COUNT(*) as count
-            FROM events
-            WHERE {where_clause}
-            GROUP BY tool_name
-            ORDER BY count DESC
-            """,
-            params,
-        ).fetchall()
-
-        tools = [{"tool": row["tool_name"], "count": row["count"]} for row in rows]
+    # Get tool frequency counts
+    rows = storage.execute_query(
+        f"""
+        SELECT tool_name, COUNT(*) as count
+        FROM events
+        WHERE {where_clause}
+        GROUP BY tool_name
+        ORDER BY count DESC
+        """,
+        params,
+    )
 
-        # Get total tool calls
-        total = sum(t["count"] for t in tools)
+    tools = [{"tool": row["tool_name"], "count": row["count"]} for row in rows]
 
-        return {
-            "days": days,
-            "project": project,
-            "total_tool_calls": total,
-            "tools": tools,
-        }
+    return {
+        "days": days,
+        "project": project,
+        "total_tool_calls": sum(t["count"] for t in tools),
+        "tools": tools,
+    }
 
 
 def query_timeline(
@@ -166,45 +193,38 @@ def query_commands(
         Dict with command breakdown
     """
     cutoff = datetime.now() - timedelta(days=days)
+    where_clause, params = build_where_clause(
+        cutoff=cutoff,
+        project=project,
+        extra_conditions=["tool_name = 'Bash'", "command IS NOT NULL"],
+    )
 
-    with storage._connect() as conn:
-        conditions = ["timestamp >= ?", "tool_name = 'Bash'", "command IS NOT NULL"]
-        params: list = [cutoff]
-
-        if project:
-            conditions.append("project_path LIKE ?")
-            params.append(f"%{project}%")
-
-        if prefix:
-            conditions.append("command LIKE ?")
-            params.append(f"{prefix}%")
-
-        where_clause = " AND ".join(conditions)
-
-        # Get command frequency counts
-        rows = conn.execute(
-            f"""
-            SELECT command, COUNT(*) as count
-            FROM events
-            WHERE {where_clause}
-            GROUP BY command
-            ORDER BY count DESC
-            """,
-            params,
-        ).fetchall()
-
-        commands = [{"command": row["command"], "count": row["count"]} for row in rows]
+    # Add prefix filter if specified
+    if prefix:
+        where_clause += " AND command LIKE ?"
+        params.append(f"{prefix}%")
+
+    # Get command frequency counts
+    rows = storage.execute_query(
+        f"""
+        SELECT command, COUNT(*) as count
+        FROM events
+        WHERE {where_clause}
+        GROUP BY command
+        ORDER BY count DESC
+        """,
+        params,
+    )
 
-        # Get total Bash commands
-        total = sum(c["count"] for c in commands)
+    commands = [{"command": row["command"], "count": row["count"]} for row in rows]
 
-        return {
-            "days": days,
-            "project": project,
-            "prefix": prefix,
-            "total_commands": total,
-            "commands": commands,
-        }
+    return {
+        "days": days,
+        "project": project,
+        "prefix": prefix,
+        "total_commands": sum(c["count"] for c in commands),
+        "commands": commands,
+    }
 
 
 def query_sessions(
@@ -223,62 +243,57 @@ def query_sessions(
         Dict with session information
     """
     cutoff = datetime.now() - timedelta(days=days)
+    where_clause, params = build_where_clause(
+        cutoff=cutoff,
+        cutoff_column="last_seen",
+        project=project,
+    )
 
-    with storage._connect() as conn:
-        conditions = ["last_seen >= ?"]
-        params: list = [cutoff]
-
-        if project:
-            conditions.append("project_path LIKE ?")
-            params.append(f"%{project}%")
-
-        where_clause = " AND ".join(conditions)
-
-        rows = conn.execute(
-            f"""
-            SELECT
-                id, project_path, first_seen, last_seen,
-                entry_count, tool_use_count,
-                total_input_tokens, total_output_tokens,
-                primary_branch
-            FROM sessions
-            WHERE {where_clause}
-            ORDER BY last_seen DESC
-            """,
-            params,
-        ).fetchall()
+    rows = storage.execute_query(
+        f"""
+        SELECT
+            id, project_path, first_seen, last_seen,
+            entry_count, tool_use_count,
+            total_input_tokens, total_output_tokens,
+            primary_branch
+        FROM sessions
+        WHERE {where_clause}
+        ORDER BY last_seen DESC
+        """,
+        params,
+    )
 
-        sessions = [
-            {
-                "id": row["id"],
-                "project": row["project_path"],
-                "first_seen": row["first_seen"],
-                "last_seen": row["last_seen"],
-                "entry_count": row["entry_count"],
-                "tool_use_count": row["tool_use_count"],
-                "input_tokens": row["total_input_tokens"],
-                "output_tokens": row["total_output_tokens"],
-                "branch": row["primary_branch"],
-            }
-            for row in rows
-        ]
+    sessions = [
+        {
+            "id": row["id"],
+            "project": row["project_path"],
+            "first_seen": row["first_seen"],
+            "last_seen": row["last_seen"],
+            "entry_count": row["entry_count"],
+            "tool_use_count": row["tool_use_count"],
+            "input_tokens": row["total_input_tokens"],
+            "output_tokens": row["total_output_tokens"],
+            "branch": row["primary_branch"],
+        }
+        for row in rows
+    ]
 
-        # Calculate totals
-        total_entries = sum(s["entry_count"] for s in sessions)
-        total_tools = sum(s["tool_use_count"] for s in sessions)
-        total_input = sum(s["input_tokens"] or 0 for s in sessions)
-        total_output = sum(s["output_tokens"] or 0 for s in sessions)
+    # Calculate totals
+    total_entries = sum(s["entry_count"] for s in sessions)
+    total_tools = sum(s["tool_use_count"] for s in sessions)
+    total_input = sum(s["input_tokens"] or 0 for s in sessions)
+    total_output = sum(s["output_tokens"] or 0 for s in sessions)
 
-        return {
-            "days": days,
-            "project": project,
-            "session_count": len(sessions),
-            "total_entries": total_entries,
-            "total_tool_uses": total_tools,
-            "total_input_tokens": total_input,
-            "total_output_tokens": total_output,
-            "sessions": sessions,
-        }
+    return {
+        "days": days,
+        "project": project,
+        "session_count": len(sessions),
+        "total_entries": total_entries,
+        "total_tool_uses": total_tools,
+        "total_input_tokens": total_input,
+        "total_output_tokens": total_output,
+        "sessions": sessions,
+    }
 
 
 def query_tokens(
@@ -299,133 +314,127 @@ def query_tokens(
         Dict with token usage breakdown
     """
     cutoff = datetime.now() - timedelta(days=days)
+    where_clause, params = build_where_clause(
+        cutoff=cutoff,
+        project=project,
+    )
 
-    with storage._connect() as conn:
-        conditions = ["timestamp >= ?"]
-        params: list = [cutoff]
-
-        if project:
-            conditions.append("project_path LIKE ?")
-            params.append(f"%{project}%")
-
-        where_clause = " AND ".join(conditions)
-
-        if by == "day":
-            # Group by day
-            rows = conn.execute(
-                f"""
-                SELECT
-                    DATE(timestamp) as day,
-                    SUM(COALESCE(input_tokens, 0)) as input_tokens,
-                    SUM(COALESCE(output_tokens, 0)) as output_tokens,
-                    SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
-                    SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
-                    COUNT(*) as event_count
-                FROM events
-                WHERE {where_clause}
-                GROUP BY DATE(timestamp)
-                ORDER BY day DESC
-                """,
-                params,
-            ).fetchall()
-
-            breakdown = [
-                {
-                    "day": row["day"],
-                    "input_tokens": row["input_tokens"],
-                    "output_tokens": row["output_tokens"],
-                    "cache_read_tokens": row["cache_read_tokens"],
-                    "cache_creation_tokens": row["cache_creation_tokens"],
-                    "event_count": row["event_count"],
-                }
-                for row in rows
-            ]
-            group_key = "day"
-
-        elif by == "session":
-            # Group by session
-            rows = conn.execute(
-                f"""
-                SELECT
-                    session_id,
-                    project_path,
-                    SUM(COALESCE(input_tokens, 0)) as input_tokens,
-                    SUM(COALESCE(output_tokens, 0)) as output_tokens,
-                    SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
-                    SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
-                    COUNT(*) as event_count
-                FROM events
-                WHERE {where_clause}
-                GROUP BY session_id
-                ORDER BY input_tokens DESC
-                """,
-                params,
-            ).fetchall()
-
-            breakdown = [
-                {
-                    "session_id": row["session_id"],
-                    "project": row["project_path"],
-                    "input_tokens": row["input_tokens"],
-                    "output_tokens": row["output_tokens"],
-                    "cache_read_tokens": row["cache_read_tokens"],
-                    "cache_creation_tokens": row["cache_creation_tokens"],
-                    "event_count": row["event_count"],
-                }
-                for row in rows
-            ]
-            group_key = "session"
-
-        elif by == "model":
-            # Group by model
-            rows = conn.execute(
-                f"""
-                SELECT
-                    COALESCE(model, 'unknown') as model,
-                    SUM(COALESCE(input_tokens, 0)) as input_tokens,
-                    SUM(COALESCE(output_tokens, 0)) as output_tokens,
-                    SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
-                    SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
-                    COUNT(*) as event_count
-                FROM events
-                WHERE {where_clause}
-                GROUP BY model
-                ORDER BY input_tokens DESC
-                """,
-                params,
-            ).fetchall()
-
-            breakdown = [
-                {
-                    "model": row["model"],
-                    "input_tokens": row["input_tokens"],
-                    "output_tokens": row["output_tokens"],
-                    "cache_read_tokens": row["cache_read_tokens"],
-                    "cache_creation_tokens": row["cache_creation_tokens"],
-                    "event_count": row["event_count"],
-                }
-                for row in rows
-            ]
-            group_key = "model"
-
-        else:
-            return {
-                "error": f"Invalid grouping: {by}. Use 'day', 'session', or 'model'.",
+    if by == "day":
+        # Group by day
+        rows = storage.execute_query(
+            f"""
+            SELECT
+                DATE(timestamp) as day,
+                SUM(COALESCE(input_tokens, 0)) as input_tokens,
+                SUM(COALESCE(output_tokens, 0)) as output_tokens,
+                SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
+                SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
+                COUNT(*) as event_count
+            FROM events
+            WHERE {where_clause}
+            GROUP BY DATE(timestamp)
+            ORDER BY day DESC
+            """,
+            params,
+        )
+
+        breakdown = [
+            {
+                "day": row["day"],
+                "input_tokens": row["input_tokens"],
+                "output_tokens": row["output_tokens"],
+                "cache_read_tokens": row["cache_read_tokens"],
+                "cache_creation_tokens": row["cache_creation_tokens"],
+                "event_count": row["event_count"],
             }
+            for row in rows
+        ]
+        group_key = "day"
 
-        # Calculate totals
-        total_input = sum(b["input_tokens"] for b in breakdown)
-        total_output = sum(b["output_tokens"] for b in breakdown)
-        total_cache_read = sum(b["cache_read_tokens"] for b in breakdown)
-        total_cache_creation = sum(b["cache_creation_tokens"] for b in breakdown)
+    elif by == "session":
+        # Group by session
+        rows = storage.execute_query(
+            f"""
+            SELECT
+                session_id,
+                project_path,
+                SUM(COALESCE(input_tokens, 0)) as input_tokens,
+                SUM(COALESCE(output_tokens, 0)) as output_tokens,
+                SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
+                SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
+                COUNT(*) as event_count
+            FROM events
+            WHERE {where_clause}
+            GROUP BY session_id
+            ORDER BY input_tokens DESC
+            """,
+            params,
+        )
+
+        breakdown = [
+            {
+                "session_id": row["session_id"],
+                "project": row["project_path"],
+                "input_tokens": row["input_tokens"],
+                "output_tokens": row["output_tokens"],
+                "cache_read_tokens": row["cache_read_tokens"],
+                "cache_creation_tokens": row["cache_creation_tokens"],
+                "event_count": row["event_count"],
+            }
+            for row in rows
+        ]
+        group_key = "session"
+
+    elif by == "model":
+        # Group by model
+        rows = storage.execute_query(
+            f"""
+            SELECT
+                COALESCE(model, 'unknown') as model,
+                SUM(COALESCE(input_tokens, 0)) as input_tokens,
+                SUM(COALESCE(output_tokens, 0)) as output_tokens,
+                SUM(COALESCE(cache_read_tokens, 0)) as cache_read_tokens,
+                SUM(COALESCE(cache_creation_tokens, 0)) as cache_creation_tokens,
+                COUNT(*) as event_count
+            FROM events
+            WHERE {where_clause}
+            GROUP BY model
+            ORDER BY input_tokens DESC
+            """,
+            params,
+        )
 
+        breakdown = [
+            {
+                "model": row["model"],
+                "input_tokens": row["input_tokens"],
+                "output_tokens": row["output_tokens"],
+                "cache_read_tokens": row["cache_read_tokens"],
+                "cache_creation_tokens": row["cache_creation_tokens"],
+                "event_count": row["event_count"],
+            }
+            for row in rows
+        ]
+        group_key = "model"
+
+    else:
         return {
-            "days": days,
-            "project": project,
-            "group_by": group_key,
-            "total_input_tokens": total_input,
-            "total_output_tokens": total_output,
-            "total_cache_read_tokens": total_cache_read,
-            "total_cache_creation_tokens": total_cache_creation,
-            "breakdown": breakdown,
+            "error": f"Invalid grouping: {by}. Use 'day', 'session', or 'model'.",
         }
+
+    # Calculate totals
+    total_input = sum(b["input_tokens"] for b in breakdown)
+    total_output = sum(b["output_tokens"] for b in breakdown)
+    total_cache_read = sum(b["cache_read_tokens"] for b in breakdown)
+    total_cache_creation = sum(b["cache_creation_tokens"] for b in breakdown)
+
+    return {
+        "days": days,
+        "project": project,
+        "group_by": group_key,
+        "total_input_tokens": total_input,
+        "total_output_tokens": total_output,
+        "total_cache_read_tokens": total_cache_read,
+        "total_cache_creation_tokens": total_cache_creation,
+        "breakdown": breakdown,
+    }
diff --git a/src/session_analytics/server.py b/src/session_analytics/server.py
index 6750e89..7a00465 100644
--- a/src/session_analytics/server.py
+++ b/src/session_analytics/server.py
@@ -15,19 +15,18 @@
 
 import logging
 import os
+from importlib.metadata import version
 from pathlib import Path
 
+# Read version from package metadata
+try:
+    __version__ = version("claude-session-analytics")
+except Exception:
+    __version__ = "0.1.0"  # Fallback for development
+
 from fastmcp import FastMCP
 
-from session_analytics.ingest import ingest_logs as do_ingest_logs
-from session_analytics.patterns import compute_permission_gaps, compute_sequence_patterns
-from session_analytics.patterns import get_insights as do_get_insights
-from session_analytics.queries import ensure_fresh_data
-from session_analytics.queries import query_commands as do_query_commands
-from session_analytics.queries import query_sessions as do_query_sessions
-from session_analytics.queries import query_timeline as do_query_timeline
-from session_analytics.queries import query_tokens as do_query_tokens
-from session_analytics.queries import query_tool_frequency as do_query_tool_frequency
+from session_analytics import ingest, patterns, queries
 from session_analytics.storage import SQLiteStorage
 
 # Configure logging
@@ -69,7 +68,7 @@ def get_status() -> dict:
 
     return {
         "status": "ok",
-        "version": "0.1.0",
+        "version": __version__,
         "last_ingestion": last_ingest.isoformat() if last_ingest else None,
         **stats,
     }
@@ -87,7 +86,7 @@ def ingest_logs(days: int = 7, project: str | None = None, force: bool = False)
     Returns:
         Ingestion stats (files processed, entries added, etc.)
     """
-    result = do_ingest_logs(storage, days=days, project=project, force=force)
+    result = ingest.ingest_logs(storage, days=days, project=project, force=force)
     return {
         "status": "ok",
         **result,
@@ -105,8 +104,8 @@ def query_tool_frequency(days: int = 7, project: str | None = None) -> dict:
     Returns:
         Tool frequency breakdown
     """
-    ensure_fresh_data(storage, days=days, project=project)
-    result = do_query_tool_frequency(storage, days=days, project=project)
+    queries.ensure_fresh_data(storage, days=days, project=project)
+    result = queries.query_tool_frequency(storage, days=days, project=project)
     return {"status": "ok", **result}
 
 
@@ -135,8 +134,8 @@ def query_timeline(
     start_dt = datetime.fromisoformat(start) if start else None
     end_dt = datetime.fromisoformat(end) if end else None
 
-    ensure_fresh_data(storage)
-    result = do_query_timeline(
+    queries.ensure_fresh_data(storage)
+    result = queries.query_timeline(
         storage, start=start_dt, end=end_dt, tool=tool, project=project, limit=limit
     )
     return {"status": "ok", **result}
@@ -154,8 +153,8 @@ def query_commands(days: int = 7, project: str | None = None, prefix: str | None
     Returns:
         Command frequency breakdown
     """
-    ensure_fresh_data(storage, days=days, project=project)
-    result = do_query_commands(storage, days=days, project=project, prefix=prefix)
+    queries.ensure_fresh_data(storage, days=days, project=project)
+    result = queries.query_commands(storage, days=days, project=project, prefix=prefix)
     return {"status": "ok", **result}
 
 
@@ -170,8 +169,8 @@ def query_sessions(days: int = 7, project: str | None = None) -> dict:
     Returns:
         Session information
     """
-    ensure_fresh_data(storage, days=days, project=project)
-    result = do_query_sessions(storage, days=days, project=project)
+    queries.ensure_fresh_data(storage, days=days, project=project)
+    result = queries.query_sessions(storage, days=days, project=project)
     return {"status": "ok", **result}
 
 
@@ -187,8 +186,8 @@ def query_tokens(days: int = 7, project: str | None = None, by: str = "day") ->
     Returns:
         Token usage breakdown
     """
-    ensure_fresh_data(storage, days=days, project=project)
-    result = do_query_tokens(storage, days=days, project=project, by=by)
+    queries.ensure_fresh_data(storage, days=days, project=project)
+    result = queries.query_tokens(storage, days=days, project=project, by=by)
     return {"status": "ok", **result}
 
 
@@ -204,8 +203,8 @@ def query_sequences(days: int = 7, min_count: int = 3, length: int = 2) -> dict:
     Returns:
         Common tool sequences
     """
-    ensure_fresh_data(storage, days=days)
-    patterns = compute_sequence_patterns(
+    queries.ensure_fresh_data(storage, days=days)
+    sequence_patterns = patterns.compute_sequence_patterns(
         storage, days=days, sequence_length=length, min_count=min_count
     )
     return {
@@ -213,7 +212,7 @@ def query_sequences(days: int = 7, min_count: int = 3, length: int = 2) -> dict:
         "days": days,
         "min_count": min_count,
         "sequence_length": length,
-        "sequences": [{"pattern": p.pattern_key, "count": p.count} for p in patterns],
+        "sequences": [{"pattern": p.pattern_key, "count": p.count} for p in sequence_patterns],
     }
 
 
@@ -228,8 +227,8 @@ def query_permission_gaps(days: int = 7, threshold: int = 5) -> dict:
     Returns:
         Commands that are frequently used but not in allowed list
     """
-    ensure_fresh_data(storage, days=days)
-    patterns = compute_permission_gaps(storage, days=days, threshold=threshold)
+    queries.ensure_fresh_data(storage, days=days)
+    gap_patterns = patterns.compute_permission_gaps(storage, days=days, threshold=threshold)
     return {
         "status": "ok",
         "days": days,
@@ -240,7 +239,7 @@ def query_permission_gaps(days: int = 7, threshold: int = 5) -> dict:
                 "count": p.count,
                 "suggestion": p.metadata.get("suggestion", ""),
             }
-            for p in patterns
+            for p in gap_patterns
         ],
     }
 
@@ -256,8 +255,8 @@ def get_insights(refresh: bool = False, days: int = 7) -> dict:
     Returns:
         Insights organized by type (tool_frequency, sequences, permission_gaps)
     """
-    ensure_fresh_data(storage, days=days)
-    result = do_get_insights(storage, refresh=refresh, days=days)
+    queries.ensure_fresh_data(storage, days=days)
+    result = patterns.get_insights(storage, refresh=refresh, days=days)
     return {"status": "ok", **result}
 
 
diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py
index 0fa58e3..dc2b440 100644
--- a/src/session_analytics/storage.py
+++ b/src/session_analytics/storage.py
@@ -109,6 +109,28 @@ class Pattern:
 # Schema version for migrations
 SCHEMA_VERSION = 1
 
+# Migration functions: dict of version -> (migration_name, migration_func)
+# Each migration upgrades FROM version-1 TO version
+# e.g., MIGRATIONS[2] upgrades from version 1 to version 2
+MIGRATIONS: dict[int, tuple[str, callable]] = {}
+
+
+def migration(version: int, name: str):
+    """Decorator to register a schema migration."""
+
+    def decorator(func: callable):
+        MIGRATIONS[version] = (name, func)
+        return func
+
+    return decorator
+
+
+# Example migration (commented out, uncomment when needed):
+# @migration(2, "add_example_column")
+# def migrate_v2(conn):
+#     """Add example column to events table."""
+#     conn.execute("ALTER TABLE events ADD COLUMN example TEXT")
+
 
 class SQLiteStorage:
     """SQLite-backed storage for session analytics."""
@@ -137,6 +159,58 @@ def _connect(self):
         finally:
             conn.close()
 
+    def execute_query(self, sql: str, params: tuple | list = ()) -> list[sqlite3.Row]:
+        """Execute a SQL query and return all results.
+
+        This is the public API for raw SQL queries. Use this instead of
+        accessing _connect() directly.
+
+        Args:
+            sql: SQL query string
+            params: Query parameters (tuple or list)
+
+        Returns:
+            List of sqlite3.Row objects
+        """
+        with self._connect() as conn:
+            return conn.execute(sql, params).fetchall()
+
+    def execute_write(self, sql: str, params: tuple | list = ()) -> int:
+        """Execute a SQL write operation and return rows affected.
+
+        This is the public API for INSERT/UPDATE/DELETE operations.
+
+        Args:
+            sql: SQL statement
+            params: Query parameters (tuple or list)
+
+        Returns:
+            Number of rows affected
+        """
+        with self._connect() as conn:
+            cursor = conn.execute(sql, params)
+            return cursor.rowcount
+
+    def _get_schema_version(self, conn: sqlite3.Connection) -> int:
+        """Get current schema version from database."""
+        try:
+            row = conn.execute("SELECT version FROM schema_version LIMIT 1").fetchone()
+            return row[0] if row else 0
+        except sqlite3.OperationalError:
+            # Table doesn't exist yet
+            return 0
+
+    def _run_migrations(self, conn: sqlite3.Connection, current_version: int):
+        """Run all pending migrations."""
+        for version in range(current_version + 1, SCHEMA_VERSION + 1):
+            if version in MIGRATIONS:
+                name, migration_func = MIGRATIONS[version]
+                logger.info(f"Running migration {version}: {name}")
+                migration_func(conn)
+        conn.execute(
+            "INSERT OR REPLACE INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,)
+        )
+
     def _init_db(self):
         """Create tables if they don't exist."""
         with self._connect() as conn:
@@ -231,10 +305,10 @@ def _init_db(self):
                 )
             """)
 
-            # Set schema version
-            conn.execute(
-                "INSERT OR REPLACE INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,)
-            )
+            # Run any pending migrations
+            current_version = self._get_schema_version(conn)
+            if current_version < SCHEMA_VERSION:
+                self._run_migrations(conn, current_version)
 
     # Event operations
 

From 8f52671504466bb1534dbb46295b3a22c70a3415 Mon Sep 17 00:00:00 2001
From: Evan Senter <evansenter@gmail.com>
Date: Wed, 31 Dec 2025 12:46:25 +0000
Subject: [PATCH 10/10] Ignore .claude/ directory with local settings

---
 .claude/settings.local.json | 31 -------------------------------
 .gitignore                  |  1 +
 2 files changed, 1 insertion(+), 31 deletions(-)
 delete mode 100644 .claude/settings.local.json

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
deleted file mode 100644
index 725fe3d..0000000
--- a/.claude/settings.local.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(chmod:*)",
-      "Bash(python3 -m venv:*)",
-      "Bash(.venv/bin/pip install:*)",
-      "Bash(brew list:*)",
-      "Bash(/opt/homebrew/bin/python3.12:*)",
-      "Bash(.venv/bin/ruff format:*)",
-      "Bash(.venv/bin/ruff check .)",
-      "Bash(.venv/bin/pytest tests/ -v)",
-      "Bash(./scripts/install-launchagent.sh:*)",
-      "Bash(claude mcp add:*)",
-      "Bash(curl:*)",
-      "Bash(cat:*)",
-      "Bash(python3:*)",
-      "Bash(.venv/bin/ruff check . --fix)",
-      "Bash(.venv/bin/pytest tests/test_server.py -v)",
-      "Bash(.venv/bin/python:*)",
-      "Bash(.venv/bin/pytest:*)",
-      "Bash(.venv/bin/ruff check /Users/evansenter/Documents/projects/claude-session-analytics/src --select=F401,F841)",
-      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics log --oneline -20)",
-      "Skill(work)",
-      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics checkout -b issue-18-refactor)",
-      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics log --oneline -15)",
-      "Bash(git -C /Users/evansenter/Documents/projects/claude-session-analytics branch:*)",
-      "Bash(.venv/bin/session-analytics-cli:*)",
-      "Bash(wc:*)"
-    ]
-  }
-}
diff --git a/.gitignore b/.gitignore
index bcc3337..eefedf0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,3 +38,4 @@ htmlcov/
 
 # Project-specific
 *.db
+.claude/