From 63a51c6c1105c23843ab57dfc035710d59625a9c Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 13:09:06 +0000 Subject: [PATCH 1/9] RFC #17 Phase 1: Schema extensions for LLM-powered analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds new columns and table for RFC #17 LLM-Powered Analysis capabilities: Schema changes: - Add `user_message_text` column to events table for user journey tracking - Add `exit_code` column to events table for failure detection - Add `git_commits` table with indexes for git correlation Implementation: - Add GitCommit dataclass to storage.py - Add git commit CRUD operations (add, batch add, get with filters, count) - Update Event dataclass with new fields - Update ingest.py to capture user message text from user entries - Make migration idempotent (checks for existing columns before ALTER) - Export GitCommit in __init__.py Tests: - Add TestGitCommitOperations class with 4 tests - Add TestNewEventFields class with 3 tests - Total: 91 tests passing Closes part of #17 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/__init__.py | 2 + src/session_analytics/ingest.py | 17 +++ src/session_analytics/storage.py | 183 ++++++++++++++++++++++++++++-- tests/test_storage.py | 124 +++++++++++++++++++- 4 files changed, 315 insertions(+), 11 deletions(-) diff --git a/src/session_analytics/__init__.py b/src/session_analytics/__init__.py index affffca..210e118 100644 --- a/src/session_analytics/__init__.py +++ b/src/session_analytics/__init__.py @@ -10,6 +10,7 @@ # Re-export public API from session_analytics.storage import ( Event, + GitCommit, IngestionState, Pattern, Session, @@ -25,4 +26,5 @@ "Session", "Pattern", "IngestionState", + "GitCommit", ] diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py index 9e15170..1c9ae41 100644 --- a/src/session_analytics/ingest.py +++ b/src/session_analytics/ingest.py @@ -211,6 +211,21 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: elif entry_type == "user": content = message.get("content", "") + # Extract user message text for user journey tracking + user_message_text = None + if isinstance(content, str): + user_message_text = content[:2000] if content else None # Limit size + elif isinstance(content, list): + # Extract text from text blocks in the content list + text_parts = [] + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text", "")) + elif isinstance(item, str): + text_parts.append(item) + if text_parts: + user_message_text = " ".join(text_parts)[:2000] # Limit size + # Check if content is a list with tool_result blocks if isinstance(content, list): tool_results = [ @@ -244,6 +259,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: session_id=session_id, project_path=project_path, entry_type="user", + user_message_text=user_message_text, git_branch=git_branch, cwd=cwd, ) @@ -258,6 +274,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: session_id=session_id, project_path=project_path, entry_type="user", + user_message_text=user_message_text, git_branch=git_branch, cwd=cwd, ) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index dc2b440..7188819 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -62,6 +62,10 @@ class Event: git_branch: str | None = None cwd: str | None = None + # RFC #17 Phase 1 additions + user_message_text: str | None = None # For user journey tracking + exit_code: int | None = None # For failure detection (Bash commands) + @dataclass class Session: @@ -103,11 +107,22 @@ class Pattern: computed_at: datetime | None = None +@dataclass +class GitCommit: + """A git commit for correlation with session activity.""" + + sha: str + timestamp: datetime | None = None + message: str | None = None + session_id: str | None = None # Inferred from timestamp proximity + project_path: str | None = None + + # Default database path DEFAULT_DB_PATH = Path.home() / ".claude" / "contrib" / "analytics" / "data.db" # Schema version for migrations -SCHEMA_VERSION = 1 +SCHEMA_VERSION = 2 # Migration functions: dict of version -> (migration_name, migration_func) # Each migration upgrades FROM version-1 TO version @@ -125,11 +140,31 @@ def decorator(func: callable): return decorator -# Example migration (commented out, uncomment when needed): -# @migration(2, "add_example_column") -# def migrate_v2(conn): -# """Add example column to events table.""" -# conn.execute("ALTER TABLE events ADD COLUMN example TEXT") +@migration(2, "add_rfc17_phase1_columns") +def migrate_v2(conn): + """Add columns for RFC #17 Phase 1: user_message_text, exit_code, and git_commits table.""" + # Check if columns already exist (for fresh installs that already have them) + existing_cols = {row[1] for row in conn.execute("PRAGMA table_info(events)")} + + # Add user_message_text for user journey tracking + if "user_message_text" not in existing_cols: + conn.execute("ALTER TABLE events ADD COLUMN user_message_text TEXT") + # Add exit_code for failure detection + if "exit_code" not in existing_cols: + conn.execute("ALTER TABLE events ADD COLUMN exit_code INTEGER") + + # Create git_commits table for git correlation + conn.execute(""" + CREATE TABLE IF NOT EXISTS git_commits ( + sha TEXT PRIMARY KEY, + timestamp TIMESTAMP, + message TEXT, + session_id TEXT, + project_path TEXT + ) + """) + conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_timestamp ON git_commits(timestamp)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)") class SQLiteStorage: @@ -254,6 +289,10 @@ def _init_db(self): git_branch TEXT, cwd TEXT, + -- RFC #17 Phase 1 additions + user_message_text TEXT, + exit_code INTEGER, + UNIQUE(session_id, uuid) ) """) @@ -305,6 +344,23 @@ def _init_db(self): ) """) + # Git commits for correlation (RFC #17 Phase 1) + conn.execute(""" + CREATE TABLE IF NOT EXISTS git_commits ( + sha TEXT PRIMARY KEY, + timestamp TIMESTAMP, + message TEXT, + session_id TEXT, + project_path TEXT + ) + """) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_timestamp ON git_commits(timestamp)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)" + ) + # Run any pending migrations current_version = self._get_schema_version(conn) if current_version < SCHEMA_VERSION: @@ -322,8 +378,8 @@ def add_event(self, event: Event) -> Event: tool_name, tool_input_json, tool_id, is_error, command, command_args, file_path, skill_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model, - git_branch, cwd - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + git_branch, cwd, user_message_text, exit_code + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( event.uuid, @@ -346,6 +402,8 @@ def add_event(self, event: Event) -> Event: event.model, event.git_branch, event.cwd, + event.user_message_text, + event.exit_code, ), ) event.id = cursor.lastrowid @@ -361,8 +419,8 @@ def add_events_batch(self, events: list[Event]) -> int: tool_name, tool_input_json, tool_id, is_error, command, command_args, file_path, skill_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model, - git_branch, cwd - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + git_branch, cwd, user_message_text, exit_code + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, [ ( @@ -386,6 +444,8 @@ def add_events_batch(self, events: list[Event]) -> int: e.model, e.git_branch, e.cwd, + e.user_message_text, + e.exit_code, ) for e in events ], @@ -441,6 +501,14 @@ def get_events_in_range( def _row_to_event(self, row: sqlite3.Row) -> Event: """Convert a database row to an Event object.""" + + # Helper to safely get column that might not exist in older schema + def get_col(name: str, default=None): + try: + return row[name] + except IndexError: + return default + return Event( id=row["id"], uuid=row["uuid"], @@ -463,6 +531,8 @@ def _row_to_event(self, row: sqlite3.Row) -> Event: model=row["model"], git_branch=row["git_branch"], cwd=row["cwd"], + user_message_text=get_col("user_message_text"), + exit_code=get_col("exit_code"), ) # Session operations @@ -626,6 +696,92 @@ def clear_patterns(self, pattern_type: str | None = None) -> int: cursor = conn.execute("DELETE FROM patterns") return cursor.rowcount + # Git commit operations (RFC #17 Phase 1) + + def add_git_commit(self, commit: GitCommit) -> None: + """Add a git commit for correlation.""" + with self._connect() as conn: + conn.execute( + """ + INSERT OR REPLACE INTO git_commits ( + sha, timestamp, message, session_id, project_path + ) VALUES (?, ?, ?, ?, ?) + """, + ( + commit.sha, + commit.timestamp, + commit.message, + commit.session_id, + commit.project_path, + ), + ) + + def add_git_commits_batch(self, commits: list[GitCommit]) -> int: + """Add multiple git commits in a single transaction. Returns count added.""" + with self._connect() as conn: + cursor = conn.executemany( + """ + INSERT OR REPLACE INTO git_commits ( + sha, timestamp, message, session_id, project_path + ) VALUES (?, ?, ?, ?, ?) + """, + [(c.sha, c.timestamp, c.message, c.session_id, c.project_path) for c in commits], + ) + return cursor.rowcount + + def get_git_commits( + self, + project_path: str | None = None, + start: datetime | None = None, + end: datetime | None = None, + limit: int = 100, + ) -> list[GitCommit]: + """Get git commits, optionally filtered by project and time range.""" + with self._connect() as conn: + conditions = [] + params: list = [] + + if project_path: + conditions.append("project_path LIKE ?") + params.append(f"%{project_path}%") + if start: + conditions.append("timestamp >= ?") + params.append(start) + if end: + conditions.append("timestamp <= ?") + params.append(end) + + where_clause = " AND ".join(conditions) if conditions else "1=1" + params.append(limit) + + rows = conn.execute( + f""" + SELECT sha, timestamp, message, session_id, project_path + FROM git_commits + WHERE {where_clause} + ORDER BY timestamp DESC + LIMIT ? + """, + params, + ).fetchall() + + return [ + GitCommit( + sha=row["sha"], + timestamp=row["timestamp"], + message=row["message"], + session_id=row["session_id"], + project_path=row["project_path"], + ) + for row in rows + ] + + def get_git_commit_count(self) -> int: + """Get total number of git commits.""" + with self._connect() as conn: + row = conn.execute("SELECT COUNT(*) as count FROM git_commits").fetchone() + return row["count"] + # Utility operations def get_db_stats(self) -> dict: @@ -636,6 +792,12 @@ def get_db_stats(self) -> dict: pattern_count = conn.execute("SELECT COUNT(*) FROM patterns").fetchone()[0] file_count = conn.execute("SELECT COUNT(*) FROM ingestion_state").fetchone()[0] + # Git commit count (may not exist in older schemas) + try: + git_commit_count = conn.execute("SELECT COUNT(*) FROM git_commits").fetchone()[0] + except sqlite3.OperationalError: + git_commit_count = 0 + # Get date range date_range = conn.execute( "SELECT MIN(timestamp) as min_ts, MAX(timestamp) as max_ts FROM events" @@ -654,6 +816,7 @@ def to_iso(val): "event_count": event_count, "session_count": session_count, "pattern_count": pattern_count, + "git_commit_count": git_commit_count, "files_processed": file_count, "earliest_event": to_iso(date_range["min_ts"]), "latest_event": to_iso(date_range["max_ts"]), diff --git a/tests/test_storage.py b/tests/test_storage.py index 9c8519f..3c82d89 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,7 +1,7 @@ """Tests for the SQLite storage layer.""" import tempfile -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path import pytest @@ -265,3 +265,125 @@ def test_get_db_stats(self, storage, sample_event): assert stats["session_count"] == 1 assert stats["pattern_count"] == 1 assert stats["db_path"] is not None + + +class TestGitCommitOperations: + """Tests for git commit operations (RFC #17 Phase 1).""" + + def test_add_git_commit(self, storage): + """Test adding a git commit.""" + from session_analytics.storage import GitCommit + + commit = GitCommit( + sha="abc123", + timestamp=datetime.now(), + message="Test commit", + session_id="session-1", + project_path="test-project", + ) + storage.add_git_commit(commit) + + commits = storage.get_git_commits() + assert len(commits) == 1 + assert commits[0].sha == "abc123" + assert commits[0].message == "Test commit" + + def test_add_git_commits_batch(self, storage): + """Test batch adding git commits.""" + from session_analytics.storage import GitCommit + + commits = [ + GitCommit(sha="aaa111", timestamp=datetime.now(), message="Commit 1"), + GitCommit(sha="bbb222", timestamp=datetime.now(), message="Commit 2"), + GitCommit(sha="ccc333", timestamp=datetime.now(), message="Commit 3"), + ] + count = storage.add_git_commits_batch(commits) + assert count == 3 + + stored = storage.get_git_commits() + assert len(stored) == 3 + + def test_get_git_commits_with_filters(self, storage): + """Test filtering git commits.""" + from session_analytics.storage import GitCommit + + now = datetime.now() + yesterday = now - timedelta(days=1) + commits = [ + GitCommit(sha="old1", timestamp=yesterday, project_path="project-a"), + GitCommit(sha="new1", timestamp=now, project_path="project-a"), + GitCommit(sha="new2", timestamp=now, project_path="project-b"), + ] + storage.add_git_commits_batch(commits) + + # Filter by project + project_a = storage.get_git_commits(project_path="project-a") + assert len(project_a) == 2 + + # Filter by time range + recent = storage.get_git_commits(start=now - timedelta(hours=1)) + assert len(recent) == 2 + + def test_git_commit_count(self, storage): + """Test getting git commit count.""" + from session_analytics.storage import GitCommit + + assert storage.get_git_commit_count() == 0 + + storage.add_git_commit(GitCommit(sha="test123")) + assert storage.get_git_commit_count() == 1 + + +class TestNewEventFields: + """Tests for RFC #17 Phase 1 Event fields (user_message_text, exit_code).""" + + def test_event_with_user_message_text(self, storage): + """Test storing and retrieving user_message_text.""" + event = Event( + id=None, + uuid="test-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Hello, please help me with something", + ) + stored = storage.add_event(event) + assert stored.id is not None + + events = storage.get_events_in_range() + assert len(events) == 1 + assert events[0].user_message_text == "Hello, please help me with something" + + def test_event_with_exit_code(self, storage): + """Test storing and retrieving exit_code.""" + event = Event( + id=None, + uuid="bash-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="tool_result", + tool_name="Bash", + exit_code=1, + ) + storage.add_event(event) + + events = storage.get_events_in_range() + assert len(events) == 1 + assert events[0].exit_code == 1 + + def test_event_with_all_new_fields(self, storage): + """Test event with all new fields populated.""" + event = Event( + id=None, + uuid="full-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Run a command", + exit_code=0, + ) + storage.add_event(event) + + events = storage.get_events_in_range() + assert events[0].user_message_text == "Run a command" + assert events[0].exit_code == 0 From 242620b8de75601fe541da7774d32a03ea5d5eb6 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 13:21:13 +0000 Subject: [PATCH 2/9] Improve GitCommit type design and expand test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Make GitCommit frozen (immutable) for stronger invariants - Add SHA validation in __post_init__: - Must be 7-40 characters (short or full SHA) - Must be valid hexadecimal - Add comprehensive validation tests (TestGitCommitValidation) - Add tests for edge cases: - Deduplication behavior (INSERT OR REPLACE) - Empty batch handling - NULL value handling for new Event fields - End time and combined filters for git commits 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/storage.py | 17 +++- tests/test_storage.py | 130 ++++++++++++++++++++++++++----- 2 files changed, 125 insertions(+), 22 deletions(-) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index 7188819..ac0890c 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -107,9 +107,13 @@ class Pattern: computed_at: datetime | None = None -@dataclass +@dataclass(frozen=True) class GitCommit: - """A git commit for correlation with session activity.""" + """A git commit for correlation with session activity. + + Immutable dataclass representing a git commit. The SHA is validated + on construction to ensure it's a valid hexadecimal string. + """ sha: str timestamp: datetime | None = None @@ -117,6 +121,15 @@ class GitCommit: session_id: str | None = None # Inferred from timestamp proximity project_path: str | None = None + def __post_init__(self): + """Validate SHA format on construction.""" + if not self.sha: + raise ValueError("SHA cannot be empty") + if not (7 <= len(self.sha) <= 40): + raise ValueError(f"SHA must be 7-40 characters, got {len(self.sha)}") + if not all(c in "0123456789abcdefABCDEF" for c in self.sha): + raise ValueError(f"SHA must be hexadecimal, got '{self.sha}'") + # Default database path DEFAULT_DB_PATH = Path.home() / ".claude" / "contrib" / "analytics" / "data.db" diff --git a/tests/test_storage.py b/tests/test_storage.py index 3c82d89..e512c7b 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -8,6 +8,7 @@ from session_analytics.storage import ( Event, + GitCommit, IngestionState, Pattern, Session, @@ -267,15 +268,54 @@ def test_get_db_stats(self, storage, sample_event): assert stats["db_path"] is not None +class TestGitCommitValidation: + """Tests for GitCommit validation (RFC #17 Phase 1).""" + + def test_valid_short_sha(self): + """Test that 7-character short SHA is valid.""" + commit = GitCommit(sha="abc1234") + assert commit.sha == "abc1234" + + def test_valid_full_sha(self): + """Test that 40-character full SHA is valid.""" + full_sha = "a" * 40 + commit = GitCommit(sha=full_sha) + assert commit.sha == full_sha + + def test_invalid_sha_empty(self): + """Test that empty SHA raises ValueError.""" + with pytest.raises(ValueError, match="cannot be empty"): + GitCommit(sha="") + + def test_invalid_sha_too_short(self): + """Test that SHA shorter than 7 chars raises ValueError.""" + with pytest.raises(ValueError, match="must be 7-40 characters"): + GitCommit(sha="abc123") + + def test_invalid_sha_too_long(self): + """Test that SHA longer than 40 chars raises ValueError.""" + with pytest.raises(ValueError, match="must be 7-40 characters"): + GitCommit(sha="a" * 41) + + def test_invalid_sha_non_hex(self): + """Test that non-hexadecimal SHA raises ValueError.""" + with pytest.raises(ValueError, match="must be hexadecimal"): + GitCommit(sha="ghijklm") + + def test_gitcommit_is_frozen(self): + """Test that GitCommit is immutable.""" + commit = GitCommit(sha="abc1234") + with pytest.raises(AttributeError): + commit.sha = "def5678" + + class TestGitCommitOperations: """Tests for git commit operations (RFC #17 Phase 1).""" def test_add_git_commit(self, storage): """Test adding a git commit.""" - from session_analytics.storage import GitCommit - commit = GitCommit( - sha="abc123", + sha="abc1234", timestamp=datetime.now(), message="Test commit", session_id="session-1", @@ -285,17 +325,36 @@ def test_add_git_commit(self, storage): commits = storage.get_git_commits() assert len(commits) == 1 - assert commits[0].sha == "abc123" + assert commits[0].sha == "abc1234" assert commits[0].message == "Test commit" + assert commits[0].session_id == "session-1" + assert commits[0].project_path == "test-project" + + def test_add_git_commit_deduplication(self, storage): + """Test that duplicate SHA overwrites existing commit (INSERT OR REPLACE behavior).""" + # Add initial commit + storage.add_git_commit( + GitCommit(sha="abc1234", message="Original message", project_path="project-1") + ) + + # Add commit with same SHA but different data + storage.add_git_commit( + GitCommit(sha="abc1234", message="Updated message", project_path="project-2") + ) + + # Should still have only one commit, with updated data + commits = storage.get_git_commits() + assert len(commits) == 1 + assert commits[0].sha == "abc1234" + assert commits[0].message == "Updated message" + assert commits[0].project_path == "project-2" def test_add_git_commits_batch(self, storage): """Test batch adding git commits.""" - from session_analytics.storage import GitCommit - commits = [ - GitCommit(sha="aaa111", timestamp=datetime.now(), message="Commit 1"), - GitCommit(sha="bbb222", timestamp=datetime.now(), message="Commit 2"), - GitCommit(sha="ccc333", timestamp=datetime.now(), message="Commit 3"), + GitCommit(sha="aaa1111", timestamp=datetime.now(), message="Commit 1"), + GitCommit(sha="bbb2222", timestamp=datetime.now(), message="Commit 2"), + GitCommit(sha="ccc3333", timestamp=datetime.now(), message="Commit 3"), ] count = storage.add_git_commits_batch(commits) assert count == 3 @@ -303,34 +362,48 @@ def test_add_git_commits_batch(self, storage): stored = storage.get_git_commits() assert len(stored) == 3 - def test_get_git_commits_with_filters(self, storage): - """Test filtering git commits.""" - from session_analytics.storage import GitCommit + def test_add_git_commits_batch_empty(self, storage): + """Test batch add with empty list.""" + count = storage.add_git_commits_batch([]) + assert count == 0 + assert storage.get_git_commit_count() == 0 + def test_get_git_commits_with_filters(self, storage): + """Test filtering git commits by project, start, and end time.""" now = datetime.now() yesterday = now - timedelta(days=1) + two_days_ago = now - timedelta(days=2) commits = [ - GitCommit(sha="old1", timestamp=yesterday, project_path="project-a"), - GitCommit(sha="new1", timestamp=now, project_path="project-a"), - GitCommit(sha="new2", timestamp=now, project_path="project-b"), + GitCommit(sha="aaa1111", timestamp=two_days_ago, project_path="project-a"), + GitCommit(sha="bbb2222", timestamp=yesterday, project_path="project-a"), + GitCommit(sha="ccc3333", timestamp=now, project_path="project-a"), + GitCommit(sha="ddd4444", timestamp=now, project_path="project-b"), ] storage.add_git_commits_batch(commits) # Filter by project project_a = storage.get_git_commits(project_path="project-a") - assert len(project_a) == 2 + assert len(project_a) == 3 - # Filter by time range + # Filter by start time recent = storage.get_git_commits(start=now - timedelta(hours=1)) assert len(recent) == 2 + # Filter by end time + old = storage.get_git_commits(end=yesterday + timedelta(hours=1)) + assert len(old) == 2 + + # Combined filters: project AND time range + project_a_recent = storage.get_git_commits( + project_path="project-a", start=yesterday - timedelta(hours=1), end=now + ) + assert len(project_a_recent) == 2 # mid1 and new1 + def test_git_commit_count(self, storage): """Test getting git commit count.""" - from session_analytics.storage import GitCommit - assert storage.get_git_commit_count() == 0 - storage.add_git_commit(GitCommit(sha="test123")) + storage.add_git_commit(GitCommit(sha="abcdef1")) assert storage.get_git_commit_count() == 1 @@ -387,3 +460,20 @@ def test_event_with_all_new_fields(self, storage): events = storage.get_events_in_range() assert events[0].user_message_text == "Run a command" assert events[0].exit_code == 0 + + def test_event_with_null_new_fields(self, storage): + """Test that events with NULL user_message_text and exit_code are handled correctly.""" + event = Event( + id=None, + uuid="null-fields-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="assistant", + # user_message_text and exit_code are None by default + ) + storage.add_event(event) + + events = storage.get_events_in_range() + assert len(events) == 1 + assert events[0].user_message_text is None + assert events[0].exit_code is None From bc6c2c9a9eb8b96d03d2f21f76e056178955583c Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 13:40:35 +0000 Subject: [PATCH 3/9] Address PR feedback: improve code quality and add index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes based on claude[bot] review: 1. Add TODO comment for exit_code field explaining that Claude Code JSONL format doesn't currently include exit codes (deferred to Phase 4) 2. Add index on git_commits.project_path for query performance 3. Extract USER_MESSAGE_MAX_LENGTH constant (2000) in ingest.py 4. Standardize project_path filtering to use = instead of LIKE for consistency with get_events_in_range 5. Add safety comments explaining dynamic WHERE clause construction is SQL-injection safe (conditions are hardcoded strings) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/ingest.py | 7 +++++-- src/session_analytics/storage.py | 13 +++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py index 1c9ae41..716f7d5 100644 --- a/src/session_analytics/ingest.py +++ b/src/session_analytics/ingest.py @@ -12,6 +12,9 @@ # Default location for Claude Code session logs DEFAULT_LOGS_DIR = Path.home() / ".claude" / "projects" +# Maximum length for user message text to prevent DB bloat while preserving context +USER_MESSAGE_MAX_LENGTH = 2000 + def find_log_files( logs_dir: Path = DEFAULT_LOGS_DIR, @@ -214,7 +217,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: # Extract user message text for user journey tracking user_message_text = None if isinstance(content, str): - user_message_text = content[:2000] if content else None # Limit size + user_message_text = content[:USER_MESSAGE_MAX_LENGTH] if content else None elif isinstance(content, list): # Extract text from text blocks in the content list text_parts = [] @@ -224,7 +227,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: elif isinstance(item, str): text_parts.append(item) if text_parts: - user_message_text = " ".join(text_parts)[:2000] # Limit size + user_message_text = " ".join(text_parts)[:USER_MESSAGE_MAX_LENGTH] # Check if content is a list with tool_result blocks if isinstance(content, list): diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index ac0890c..82191b7 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -64,6 +64,10 @@ class Event: # RFC #17 Phase 1 additions user_message_text: str | None = None # For user journey tracking + # TODO(Phase 4): exit_code is not currently available in Claude Code JSONL format. + # The toolUseResult has stdout/stderr/interrupted but no exit code. + # This field is reserved for future extraction when format changes or + # we implement heuristic detection (e.g., stderr patterns, "Exit code: N" in output). exit_code: int | None = None # For failure detection (Bash commands) @@ -178,6 +182,9 @@ def migrate_v2(conn): """) conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_timestamp ON git_commits(timestamp)") conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)") + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)" + ) class SQLiteStorage: @@ -497,6 +504,7 @@ def get_events_in_range( conditions.append("project_path = ?") params.append(project_path) + # Safe: where_clause is built from hardcoded condition strings, not user input where_clause = " AND ".join(conditions) if conditions else "1=1" params.append(limit) @@ -755,8 +763,8 @@ def get_git_commits( params: list = [] if project_path: - conditions.append("project_path LIKE ?") - params.append(f"%{project_path}%") + conditions.append("project_path = ?") + params.append(project_path) if start: conditions.append("timestamp >= ?") params.append(start) @@ -764,6 +772,7 @@ def get_git_commits( conditions.append("timestamp <= ?") params.append(end) + # Safe: where_clause is built from hardcoded condition strings, not user input where_clause = " AND ".join(conditions) if conditions else "1=1" params.append(limit) From 37441eef08ae46aa584509a6a32b06d8c0b211e0 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 13:43:19 +0000 Subject: [PATCH 4/9] Fix formatting --- src/session_analytics/storage.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index 82191b7..6812b0e 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -182,9 +182,7 @@ def migrate_v2(conn): """) conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_timestamp ON git_commits(timestamp)") conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)") - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)" - ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)") class SQLiteStorage: From 98a93092853bfe0cfad31f8efa854ae9cf351bd6 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 14:01:31 +0000 Subject: [PATCH 5/9] Add FTS index on user_message_text and boundary tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add FTS5 full-text search index on user_message_text (migration v3) - Add search_user_messages() method for efficient text search - Add tests for 2000-char truncation boundary in parse_entry - Add tests for FTS search functionality (basic, no-match, phrase) Addresses remaining reviewer suggestions from PR #20. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/storage.py | 77 ++++++++++++++++++++++++- tests/test_ingest.py | 54 ++++++++++++++++++ tests/test_storage.py | 98 ++++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 1 deletion(-) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index 6812b0e..f9b428f 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -139,7 +139,7 @@ def __post_init__(self): DEFAULT_DB_PATH = Path.home() / ".claude" / "contrib" / "analytics" / "data.db" # Schema version for migrations -SCHEMA_VERSION = 2 +SCHEMA_VERSION = 3 # Migration functions: dict of version -> (migration_name, migration_func) # Each migration upgrades FROM version-1 TO version @@ -185,6 +185,54 @@ def migrate_v2(conn): conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)") +@migration(3, "add_user_message_fts") +def migrate_v3(conn): + """Add FTS5 full-text search index on user_message_text for efficient text search.""" + # Create FTS5 virtual table (content= points to external events table) + # Using content-less FTS (no redundant storage) with events.id as rowid + conn.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5( + user_message_text, + content='events', + content_rowid='id' + ) + """) + + # Populate FTS index from existing events with non-null user_message_text + conn.execute(""" + INSERT INTO events_fts(rowid, user_message_text) + SELECT id, user_message_text FROM events WHERE user_message_text IS NOT NULL + """) + + # Create triggers to keep FTS in sync with events table + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_insert AFTER INSERT ON events + WHEN NEW.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(rowid, user_message_text) VALUES (NEW.id, NEW.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_delete AFTER DELETE ON events + WHEN OLD.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + VALUES ('delete', OLD.id, OLD.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_update AFTER UPDATE OF user_message_text ON events + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + VALUES ('delete', OLD.id, OLD.user_message_text); + INSERT INTO events_fts(rowid, user_message_text) + SELECT NEW.id, NEW.user_message_text WHERE NEW.user_message_text IS NOT NULL; + END + """) + + class SQLiteStorage: """SQLite-backed storage for session analytics.""" @@ -802,6 +850,33 @@ def get_git_commit_count(self) -> int: row = conn.execute("SELECT COUNT(*) as count FROM git_commits").fetchone() return row["count"] + # Full-text search operations + + def search_user_messages(self, query: str, limit: int = 100) -> list[Event]: + """Search user messages using full-text search. + + Args: + query: FTS5 query string (supports AND, OR, NOT, phrases, etc.) + limit: Maximum number of results + + Returns: + List of Event objects matching the search query + """ + with self._connect() as conn: + # Use FTS5 MATCH to search, join back to events for full data + rows = conn.execute( + """ + SELECT events.* FROM events + INNER JOIN events_fts ON events.id = events_fts.rowid + WHERE events_fts MATCH ? + ORDER BY rank + LIMIT ? + """, + (query, limit), + ).fetchall() + + return [self._row_to_event(row) for row in rows] + # Utility operations def get_db_stats(self) -> dict: diff --git a/tests/test_ingest.py b/tests/test_ingest.py index b2503b5..a1ba199 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -229,6 +229,60 @@ def test_skip_malformed_entry(self): events = parse_entry(entry, "test-project") assert len(events) == 0 + def test_user_message_text_truncation_at_boundary(self): + """Test that user_message_text is truncated at USER_MESSAGE_MAX_LENGTH (2000 chars).""" + from session_analytics.ingest import USER_MESSAGE_MAX_LENGTH + + # Test content exactly at the limit - should not be truncated + exact_limit_content = "x" * USER_MESSAGE_MAX_LENGTH + entry_exact = { + "type": "user", + "uuid": "user-exact", + "sessionId": "session-1", + "timestamp": "2025-01-01T12:00:00.000Z", + "message": {"role": "user", "content": exact_limit_content}, + } + events = parse_entry(entry_exact, "test-project") + assert len(events) == 1 + assert len(events[0].user_message_text) == USER_MESSAGE_MAX_LENGTH + + # Test content over the limit - should be truncated + over_limit_content = "y" * (USER_MESSAGE_MAX_LENGTH + 500) + entry_over = { + "type": "user", + "uuid": "user-over", + "sessionId": "session-1", + "timestamp": "2025-01-01T12:00:05.000Z", + "message": {"role": "user", "content": over_limit_content}, + } + events = parse_entry(entry_over, "test-project") + assert len(events) == 1 + assert len(events[0].user_message_text) == USER_MESSAGE_MAX_LENGTH + assert events[0].user_message_text == "y" * USER_MESSAGE_MAX_LENGTH + + def test_user_message_text_truncation_with_list_content(self): + """Test truncation when content is a list of text blocks.""" + from session_analytics.ingest import USER_MESSAGE_MAX_LENGTH + + # Create content with multiple text blocks that exceed limit when joined + text_block = "z" * 1500 + entry = { + "type": "user", + "uuid": "user-list", + "sessionId": "session-1", + "timestamp": "2025-01-01T12:00:00.000Z", + "message": { + "role": "user", + "content": [ + {"type": "text", "text": text_block}, + {"type": "text", "text": text_block}, # Combined: 3001 chars with space + ], + }, + } + events = parse_entry(entry, "test-project") + assert len(events) == 1 + assert len(events[0].user_message_text) == USER_MESSAGE_MAX_LENGTH + class TestIngestFile: """Tests for file ingestion.""" diff --git a/tests/test_storage.py b/tests/test_storage.py index e512c7b..228e41a 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -73,6 +73,12 @@ def test_add_events_batch(self, storage): assert count == 5 assert storage.get_event_count() == 5 + def test_add_events_batch_empty(self, storage): + """Test batch add with empty list.""" + count = storage.add_events_batch([]) + assert count == 0 + assert storage.get_event_count() == 0 + def test_get_events_in_range(self, storage): """Test filtering events by time range.""" # Add events across different times @@ -477,3 +483,95 @@ def test_event_with_null_new_fields(self, storage): assert len(events) == 1 assert events[0].user_message_text is None assert events[0].exit_code is None + + +class TestFullTextSearch: + """Tests for full-text search on user_message_text.""" + + def test_search_user_messages_basic(self, storage): + """Test basic full-text search on user messages.""" + # Add events with searchable text + storage.add_event( + Event( + id=None, + uuid="uuid-1", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Help me debug the authentication error", + ) + ) + storage.add_event( + Event( + id=None, + uuid="uuid-2", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Fix the database connection issue", + ) + ) + storage.add_event( + Event( + id=None, + uuid="uuid-3", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Another error message to debug", + ) + ) + + # Search for "debug" + results = storage.search_user_messages("debug") + assert len(results) == 2 + assert all("debug" in r.user_message_text.lower() for r in results) + + # Search for "authentication" + results = storage.search_user_messages("authentication") + assert len(results) == 1 + assert "authentication" in results[0].user_message_text.lower() + + def test_search_user_messages_no_match(self, storage): + """Test search returns empty when no matches found.""" + storage.add_event( + Event( + id=None, + uuid="uuid-1", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="This is a test message", + ) + ) + + results = storage.search_user_messages("nonexistent") + assert len(results) == 0 + + def test_search_user_messages_phrase(self, storage): + """Test searching for exact phrases.""" + storage.add_event( + Event( + id=None, + uuid="uuid-1", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Run the unit tests", + ) + ) + storage.add_event( + Event( + id=None, + uuid="uuid-2", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Unit testing is important", + ) + ) + + # Search for phrase "unit tests" + results = storage.search_user_messages('"unit tests"') + assert len(results) == 1 + assert "unit tests" in results[0].user_message_text.lower() From 525ee6c57f7860f68d52573f08f1e75e189c7229 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 14:04:25 +0000 Subject: [PATCH 6/9] Fix exception types in get_col helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Catch both IndexError and KeyError since sqlite3.Row raises KeyError for missing column names. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index f9b428f..8544f2b 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -573,7 +573,7 @@ def _row_to_event(self, row: sqlite3.Row) -> Event: def get_col(name: str, default=None): try: return row[name] - except IndexError: + except (IndexError, KeyError): return default return Event( From e8eb1258ee254dfc9ffd83fb96b97e08fd20f7c4 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 14:51:15 +0000 Subject: [PATCH 7/9] Fix fresh install schema: add missing index and FTS5 table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add idx_git_commits_project index to _init_db (was only in migration) - Add events_fts FTS5 table and sync triggers to _init_db - Ensures fresh installs have identical schema to migrated databases 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/storage.py | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index 8544f2b..35dd628 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -426,6 +426,46 @@ def _init_db(self): conn.execute( "CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)" ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)" + ) + + # FTS5 full-text search on user_message_text (RFC #17 Phase 1) + conn.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5( + user_message_text, + content='events', + content_rowid='id' + ) + """) + + # Triggers to keep FTS in sync with events table + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_insert AFTER INSERT ON events + WHEN NEW.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(rowid, user_message_text) VALUES (NEW.id, NEW.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_delete AFTER DELETE ON events + WHEN OLD.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + VALUES ('delete', OLD.id, OLD.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_update AFTER UPDATE OF user_message_text ON events + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + VALUES ('delete', OLD.id, OLD.user_message_text); + INSERT INTO events_fts(rowid, user_message_text) + SELECT NEW.id, NEW.user_message_text WHERE NEW.user_message_text IS NOT NULL; + END + """) # Run any pending migrations current_version = self._get_schema_version(conn) From 63ddddbd3e62b39a91f765a8dc3d6807bcafbcd7 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 14:59:55 +0000 Subject: [PATCH 8/9] Fix FTS trigger edge case and add partial index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix events_fts_update trigger to handle NULL->non-NULL transitions - Add partial index idx_events_has_user_message for efficient user message queries - Fix stale test comment (mid1/new1 -> bbb2222/ccc3333) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/storage.py | 18 ++++++++++++++++-- tests/test_storage.py | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index 35dd628..ee4bb70 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -224,14 +224,21 @@ def migrate_v3(conn): conn.execute(""" CREATE TRIGGER IF NOT EXISTS events_fts_update AFTER UPDATE OF user_message_text ON events + WHEN OLD.user_message_text IS NOT NULL OR NEW.user_message_text IS NOT NULL BEGIN INSERT INTO events_fts(events_fts, rowid, user_message_text) - VALUES ('delete', OLD.id, OLD.user_message_text); + SELECT 'delete', OLD.id, OLD.user_message_text WHERE OLD.user_message_text IS NOT NULL; INSERT INTO events_fts(rowid, user_message_text) SELECT NEW.id, NEW.user_message_text WHERE NEW.user_message_text IS NOT NULL; END """) + # Partial index for efficiently querying events with user messages + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_events_has_user_message + ON events(id) WHERE user_message_text IS NOT NULL + """) + class SQLiteStorage: """SQLite-backed storage for session analytics.""" @@ -459,14 +466,21 @@ def _init_db(self): conn.execute(""" CREATE TRIGGER IF NOT EXISTS events_fts_update AFTER UPDATE OF user_message_text ON events + WHEN OLD.user_message_text IS NOT NULL OR NEW.user_message_text IS NOT NULL BEGIN INSERT INTO events_fts(events_fts, rowid, user_message_text) - VALUES ('delete', OLD.id, OLD.user_message_text); + SELECT 'delete', OLD.id, OLD.user_message_text WHERE OLD.user_message_text IS NOT NULL; INSERT INTO events_fts(rowid, user_message_text) SELECT NEW.id, NEW.user_message_text WHERE NEW.user_message_text IS NOT NULL; END """) + # Partial index for efficiently querying events with user messages + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_events_has_user_message + ON events(id) WHERE user_message_text IS NOT NULL + """) + # Run any pending migrations current_version = self._get_schema_version(conn) if current_version < SCHEMA_VERSION: diff --git a/tests/test_storage.py b/tests/test_storage.py index 228e41a..6dc13fd 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -403,7 +403,7 @@ def test_get_git_commits_with_filters(self, storage): project_a_recent = storage.get_git_commits( project_path="project-a", start=yesterday - timedelta(hours=1), end=now ) - assert len(project_a_recent) == 2 # mid1 and new1 + assert len(project_a_recent) == 2 # bbb2222 and ccc3333 def test_git_commit_count(self, storage): """Test getting git commit count.""" From 0f8fc6a59b4c1316e0f0625e8cb21731cd007074 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Wed, 31 Dec 2025 15:08:52 +0000 Subject: [PATCH 9/9] Add schema duplication comment and FTS trigger tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Document why schema is defined in both migrations and _init_db - Add 4 tests verifying FTS trigger behavior: - Insert trigger populates FTS - Update NULL→value adds to FTS - Update value→different removes old, adds new - Update value→NULL removes from FTS 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/storage.py | 7 ++ tests/test_storage.py | 110 +++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index ee4bb70..9c288d4 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -144,6 +144,13 @@ def __post_init__(self): # Migration functions: dict of version -> (migration_name, migration_func) # Each migration upgrades FROM version-1 TO version # e.g., MIGRATIONS[2] upgrades from version 1 to version 2 +# +# NOTE: Schema elements (tables, indexes, triggers) are defined in BOTH migrations +# AND _init_db(). This is intentional: +# - _init_db() defines the complete current schema for fresh installs +# - Migrations incrementally upgrade existing databases to the current schema +# Both paths must result in identical schemas. When adding new schema elements, +# add them to both places and use IF NOT EXISTS for idempotency. MIGRATIONS: dict[int, tuple[str, callable]] = {} diff --git a/tests/test_storage.py b/tests/test_storage.py index 6dc13fd..f9729be 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -575,3 +575,113 @@ def test_search_user_messages_phrase(self, storage): results = storage.search_user_messages('"unit tests"') assert len(results) == 1 assert "unit tests" in results[0].user_message_text.lower() + + +class TestFTSTriggers: + """Tests for FTS trigger behavior on insert/update/delete.""" + + def test_fts_trigger_on_insert(self, storage): + """Test that FTS index is updated on insert.""" + storage.add_event( + Event( + id=None, + uuid="insert-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="searchable insert content", + ) + ) + + # Verify FTS finds the inserted content + results = storage.search_user_messages("searchable") + assert len(results) == 1 + assert results[0].uuid == "insert-test" + + def test_fts_trigger_on_update_null_to_value(self, storage): + """Test FTS trigger handles NULL -> non-NULL update correctly.""" + # Insert event without user_message_text + storage.add_event( + Event( + id=None, + uuid="update-null-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text=None, + ) + ) + + # Verify not in FTS + results = storage.search_user_messages("updated") + assert len(results) == 0 + + # Update to add user_message_text + storage.execute_write( + "UPDATE events SET user_message_text = ? WHERE uuid = ?", + ("updated content here", "update-null-test"), + ) + + # Verify FTS now finds it + results = storage.search_user_messages("updated") + assert len(results) == 1 + assert results[0].uuid == "update-null-test" + + def test_fts_trigger_on_update_value_to_different(self, storage): + """Test FTS trigger handles value -> different value update correctly.""" + storage.add_event( + Event( + id=None, + uuid="update-value-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="original searchterm", + ) + ) + + # Verify original is searchable + results = storage.search_user_messages("original") + assert len(results) == 1 + + # Update to different value + storage.execute_write( + "UPDATE events SET user_message_text = ? WHERE uuid = ?", + ("replacement searchterm", "update-value-test"), + ) + + # Old value should not be found + results = storage.search_user_messages("original") + assert len(results) == 0 + + # New value should be found + results = storage.search_user_messages("replacement") + assert len(results) == 1 + assert results[0].uuid == "update-value-test" + + def test_fts_trigger_on_update_value_to_null(self, storage): + """Test FTS trigger handles non-NULL -> NULL update correctly.""" + storage.add_event( + Event( + id=None, + uuid="update-to-null-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="removable content", + ) + ) + + # Verify in FTS + results = storage.search_user_messages("removable") + assert len(results) == 1 + + # Update to NULL + storage.execute_write( + "UPDATE events SET user_message_text = NULL WHERE uuid = ?", + ("update-to-null-test",), + ) + + # Should no longer be in FTS + results = storage.search_user_messages("removable") + assert len(results) == 0