diff --git a/src/session_analytics/__init__.py b/src/session_analytics/__init__.py index affffca..210e118 100644 --- a/src/session_analytics/__init__.py +++ b/src/session_analytics/__init__.py @@ -10,6 +10,7 @@ # Re-export public API from session_analytics.storage import ( Event, + GitCommit, IngestionState, Pattern, Session, @@ -25,4 +26,5 @@ "Session", "Pattern", "IngestionState", + "GitCommit", ] diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py index 9e15170..716f7d5 100644 --- a/src/session_analytics/ingest.py +++ b/src/session_analytics/ingest.py @@ -12,6 +12,9 @@ # Default location for Claude Code session logs DEFAULT_LOGS_DIR = Path.home() / ".claude" / "projects" +# Maximum length for user message text to prevent DB bloat while preserving context +USER_MESSAGE_MAX_LENGTH = 2000 + def find_log_files( logs_dir: Path = DEFAULT_LOGS_DIR, @@ -211,6 +214,21 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: elif entry_type == "user": content = message.get("content", "") + # Extract user message text for user journey tracking + user_message_text = None + if isinstance(content, str): + user_message_text = content[:USER_MESSAGE_MAX_LENGTH] if content else None + elif isinstance(content, list): + # Extract text from text blocks in the content list + text_parts = [] + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text", "")) + elif isinstance(item, str): + text_parts.append(item) + if text_parts: + user_message_text = " ".join(text_parts)[:USER_MESSAGE_MAX_LENGTH] + # Check if content is a list with tool_result blocks if isinstance(content, list): tool_results = [ @@ -244,6 +262,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: session_id=session_id, project_path=project_path, entry_type="user", + user_message_text=user_message_text, git_branch=git_branch, cwd=cwd, ) @@ -258,6 +277,7 @@ def parse_entry(raw: dict, project_path: str) -> list[Event]: session_id=session_id, project_path=project_path, entry_type="user", + user_message_text=user_message_text, git_branch=git_branch, cwd=cwd, ) diff --git a/src/session_analytics/storage.py b/src/session_analytics/storage.py index dc2b440..9c288d4 100644 --- a/src/session_analytics/storage.py +++ b/src/session_analytics/storage.py @@ -62,6 +62,14 @@ class Event: git_branch: str | None = None cwd: str | None = None + # RFC #17 Phase 1 additions + user_message_text: str | None = None # For user journey tracking + # TODO(Phase 4): exit_code is not currently available in Claude Code JSONL format. + # The toolUseResult has stdout/stderr/interrupted but no exit code. + # This field is reserved for future extraction when format changes or + # we implement heuristic detection (e.g., stderr patterns, "Exit code: N" in output). + exit_code: int | None = None # For failure detection (Bash commands) + @dataclass class Session: @@ -103,15 +111,46 @@ class Pattern: computed_at: datetime | None = None +@dataclass(frozen=True) +class GitCommit: + """A git commit for correlation with session activity. + + Immutable dataclass representing a git commit. The SHA is validated + on construction to ensure it's a valid hexadecimal string. + """ + + sha: str + timestamp: datetime | None = None + message: str | None = None + session_id: str | None = None # Inferred from timestamp proximity + project_path: str | None = None + + def __post_init__(self): + """Validate SHA format on construction.""" + if not self.sha: + raise ValueError("SHA cannot be empty") + if not (7 <= len(self.sha) <= 40): + raise ValueError(f"SHA must be 7-40 characters, got {len(self.sha)}") + if not all(c in "0123456789abcdefABCDEF" for c in self.sha): + raise ValueError(f"SHA must be hexadecimal, got '{self.sha}'") + + # Default database path DEFAULT_DB_PATH = Path.home() / ".claude" / "contrib" / "analytics" / "data.db" # Schema version for migrations -SCHEMA_VERSION = 1 +SCHEMA_VERSION = 3 # Migration functions: dict of version -> (migration_name, migration_func) # Each migration upgrades FROM version-1 TO version # e.g., MIGRATIONS[2] upgrades from version 1 to version 2 +# +# NOTE: Schema elements (tables, indexes, triggers) are defined in BOTH migrations +# AND _init_db(). This is intentional: +# - _init_db() defines the complete current schema for fresh installs +# - Migrations incrementally upgrade existing databases to the current schema +# Both paths must result in identical schemas. When adding new schema elements, +# add them to both places and use IF NOT EXISTS for idempotency. MIGRATIONS: dict[int, tuple[str, callable]] = {} @@ -125,11 +164,87 @@ def decorator(func: callable): return decorator -# Example migration (commented out, uncomment when needed): -# @migration(2, "add_example_column") -# def migrate_v2(conn): -# """Add example column to events table.""" -# conn.execute("ALTER TABLE events ADD COLUMN example TEXT") +@migration(2, "add_rfc17_phase1_columns") +def migrate_v2(conn): + """Add columns for RFC #17 Phase 1: user_message_text, exit_code, and git_commits table.""" + # Check if columns already exist (for fresh installs that already have them) + existing_cols = {row[1] for row in conn.execute("PRAGMA table_info(events)")} + + # Add user_message_text for user journey tracking + if "user_message_text" not in existing_cols: + conn.execute("ALTER TABLE events ADD COLUMN user_message_text TEXT") + # Add exit_code for failure detection + if "exit_code" not in existing_cols: + conn.execute("ALTER TABLE events ADD COLUMN exit_code INTEGER") + + # Create git_commits table for git correlation + conn.execute(""" + CREATE TABLE IF NOT EXISTS git_commits ( + sha TEXT PRIMARY KEY, + timestamp TIMESTAMP, + message TEXT, + session_id TEXT, + project_path TEXT + ) + """) + conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_timestamp ON git_commits(timestamp)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)") + + +@migration(3, "add_user_message_fts") +def migrate_v3(conn): + """Add FTS5 full-text search index on user_message_text for efficient text search.""" + # Create FTS5 virtual table (content= points to external events table) + # Using content-less FTS (no redundant storage) with events.id as rowid + conn.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5( + user_message_text, + content='events', + content_rowid='id' + ) + """) + + # Populate FTS index from existing events with non-null user_message_text + conn.execute(""" + INSERT INTO events_fts(rowid, user_message_text) + SELECT id, user_message_text FROM events WHERE user_message_text IS NOT NULL + """) + + # Create triggers to keep FTS in sync with events table + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_insert AFTER INSERT ON events + WHEN NEW.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(rowid, user_message_text) VALUES (NEW.id, NEW.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_delete AFTER DELETE ON events + WHEN OLD.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + VALUES ('delete', OLD.id, OLD.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_update AFTER UPDATE OF user_message_text ON events + WHEN OLD.user_message_text IS NOT NULL OR NEW.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + SELECT 'delete', OLD.id, OLD.user_message_text WHERE OLD.user_message_text IS NOT NULL; + INSERT INTO events_fts(rowid, user_message_text) + SELECT NEW.id, NEW.user_message_text WHERE NEW.user_message_text IS NOT NULL; + END + """) + + # Partial index for efficiently querying events with user messages + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_events_has_user_message + ON events(id) WHERE user_message_text IS NOT NULL + """) class SQLiteStorage: @@ -254,6 +369,10 @@ def _init_db(self): git_branch TEXT, cwd TEXT, + -- RFC #17 Phase 1 additions + user_message_text TEXT, + exit_code INTEGER, + UNIQUE(session_id, uuid) ) """) @@ -305,6 +424,70 @@ def _init_db(self): ) """) + # Git commits for correlation (RFC #17 Phase 1) + conn.execute(""" + CREATE TABLE IF NOT EXISTS git_commits ( + sha TEXT PRIMARY KEY, + timestamp TIMESTAMP, + message TEXT, + session_id TEXT, + project_path TEXT + ) + """) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_timestamp ON git_commits(timestamp)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_session ON git_commits(session_id)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_git_commits_project ON git_commits(project_path)" + ) + + # FTS5 full-text search on user_message_text (RFC #17 Phase 1) + conn.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS events_fts USING fts5( + user_message_text, + content='events', + content_rowid='id' + ) + """) + + # Triggers to keep FTS in sync with events table + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_insert AFTER INSERT ON events + WHEN NEW.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(rowid, user_message_text) VALUES (NEW.id, NEW.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_delete AFTER DELETE ON events + WHEN OLD.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + VALUES ('delete', OLD.id, OLD.user_message_text); + END + """) + + conn.execute(""" + CREATE TRIGGER IF NOT EXISTS events_fts_update AFTER UPDATE OF user_message_text ON events + WHEN OLD.user_message_text IS NOT NULL OR NEW.user_message_text IS NOT NULL + BEGIN + INSERT INTO events_fts(events_fts, rowid, user_message_text) + SELECT 'delete', OLD.id, OLD.user_message_text WHERE OLD.user_message_text IS NOT NULL; + INSERT INTO events_fts(rowid, user_message_text) + SELECT NEW.id, NEW.user_message_text WHERE NEW.user_message_text IS NOT NULL; + END + """) + + # Partial index for efficiently querying events with user messages + conn.execute(""" + CREATE INDEX IF NOT EXISTS idx_events_has_user_message + ON events(id) WHERE user_message_text IS NOT NULL + """) + # Run any pending migrations current_version = self._get_schema_version(conn) if current_version < SCHEMA_VERSION: @@ -322,8 +505,8 @@ def add_event(self, event: Event) -> Event: tool_name, tool_input_json, tool_id, is_error, command, command_args, file_path, skill_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model, - git_branch, cwd - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + git_branch, cwd, user_message_text, exit_code + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( event.uuid, @@ -346,6 +529,8 @@ def add_event(self, event: Event) -> Event: event.model, event.git_branch, event.cwd, + event.user_message_text, + event.exit_code, ), ) event.id = cursor.lastrowid @@ -361,8 +546,8 @@ def add_events_batch(self, events: list[Event]) -> int: tool_name, tool_input_json, tool_id, is_error, command, command_args, file_path, skill_name, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens, model, - git_branch, cwd - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + git_branch, cwd, user_message_text, exit_code + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, [ ( @@ -386,6 +571,8 @@ def add_events_batch(self, events: list[Event]) -> int: e.model, e.git_branch, e.cwd, + e.user_message_text, + e.exit_code, ) for e in events ], @@ -424,6 +611,7 @@ def get_events_in_range( conditions.append("project_path = ?") params.append(project_path) + # Safe: where_clause is built from hardcoded condition strings, not user input where_clause = " AND ".join(conditions) if conditions else "1=1" params.append(limit) @@ -441,6 +629,14 @@ def get_events_in_range( def _row_to_event(self, row: sqlite3.Row) -> Event: """Convert a database row to an Event object.""" + + # Helper to safely get column that might not exist in older schema + def get_col(name: str, default=None): + try: + return row[name] + except (IndexError, KeyError): + return default + return Event( id=row["id"], uuid=row["uuid"], @@ -463,6 +659,8 @@ def _row_to_event(self, row: sqlite3.Row) -> Event: model=row["model"], git_branch=row["git_branch"], cwd=row["cwd"], + user_message_text=get_col("user_message_text"), + exit_code=get_col("exit_code"), ) # Session operations @@ -626,6 +824,120 @@ def clear_patterns(self, pattern_type: str | None = None) -> int: cursor = conn.execute("DELETE FROM patterns") return cursor.rowcount + # Git commit operations (RFC #17 Phase 1) + + def add_git_commit(self, commit: GitCommit) -> None: + """Add a git commit for correlation.""" + with self._connect() as conn: + conn.execute( + """ + INSERT OR REPLACE INTO git_commits ( + sha, timestamp, message, session_id, project_path + ) VALUES (?, ?, ?, ?, ?) + """, + ( + commit.sha, + commit.timestamp, + commit.message, + commit.session_id, + commit.project_path, + ), + ) + + def add_git_commits_batch(self, commits: list[GitCommit]) -> int: + """Add multiple git commits in a single transaction. Returns count added.""" + with self._connect() as conn: + cursor = conn.executemany( + """ + INSERT OR REPLACE INTO git_commits ( + sha, timestamp, message, session_id, project_path + ) VALUES (?, ?, ?, ?, ?) + """, + [(c.sha, c.timestamp, c.message, c.session_id, c.project_path) for c in commits], + ) + return cursor.rowcount + + def get_git_commits( + self, + project_path: str | None = None, + start: datetime | None = None, + end: datetime | None = None, + limit: int = 100, + ) -> list[GitCommit]: + """Get git commits, optionally filtered by project and time range.""" + with self._connect() as conn: + conditions = [] + params: list = [] + + if project_path: + conditions.append("project_path = ?") + params.append(project_path) + if start: + conditions.append("timestamp >= ?") + params.append(start) + if end: + conditions.append("timestamp <= ?") + params.append(end) + + # Safe: where_clause is built from hardcoded condition strings, not user input + where_clause = " AND ".join(conditions) if conditions else "1=1" + params.append(limit) + + rows = conn.execute( + f""" + SELECT sha, timestamp, message, session_id, project_path + FROM git_commits + WHERE {where_clause} + ORDER BY timestamp DESC + LIMIT ? + """, + params, + ).fetchall() + + return [ + GitCommit( + sha=row["sha"], + timestamp=row["timestamp"], + message=row["message"], + session_id=row["session_id"], + project_path=row["project_path"], + ) + for row in rows + ] + + def get_git_commit_count(self) -> int: + """Get total number of git commits.""" + with self._connect() as conn: + row = conn.execute("SELECT COUNT(*) as count FROM git_commits").fetchone() + return row["count"] + + # Full-text search operations + + def search_user_messages(self, query: str, limit: int = 100) -> list[Event]: + """Search user messages using full-text search. + + Args: + query: FTS5 query string (supports AND, OR, NOT, phrases, etc.) + limit: Maximum number of results + + Returns: + List of Event objects matching the search query + """ + with self._connect() as conn: + # Use FTS5 MATCH to search, join back to events for full data + rows = conn.execute( + """ + SELECT events.* FROM events + INNER JOIN events_fts ON events.id = events_fts.rowid + WHERE events_fts MATCH ? + ORDER BY rank + LIMIT ? + """, + (query, limit), + ).fetchall() + + return [self._row_to_event(row) for row in rows] + # Utility operations def get_db_stats(self) -> dict: @@ -636,6 +948,12 @@ def get_db_stats(self) -> dict: pattern_count = conn.execute("SELECT COUNT(*) FROM patterns").fetchone()[0] file_count = conn.execute("SELECT COUNT(*) FROM ingestion_state").fetchone()[0] + # Git commit count (may not exist in older schemas) + try: + git_commit_count = conn.execute("SELECT COUNT(*) FROM git_commits").fetchone()[0] + except sqlite3.OperationalError: + git_commit_count = 0 + # Get date range date_range = conn.execute( "SELECT MIN(timestamp) as min_ts, MAX(timestamp) as max_ts FROM events" @@ -654,6 +972,7 @@ def to_iso(val): "event_count": event_count, "session_count": session_count, "pattern_count": pattern_count, + "git_commit_count": git_commit_count, "files_processed": file_count, "earliest_event": to_iso(date_range["min_ts"]), "latest_event": to_iso(date_range["max_ts"]), diff --git a/tests/test_ingest.py b/tests/test_ingest.py index b2503b5..a1ba199 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -229,6 +229,60 @@ def test_skip_malformed_entry(self): events = parse_entry(entry, "test-project") assert len(events) == 0 + def test_user_message_text_truncation_at_boundary(self): + """Test that user_message_text is truncated at USER_MESSAGE_MAX_LENGTH (2000 chars).""" + from session_analytics.ingest import USER_MESSAGE_MAX_LENGTH + + # Test content exactly at the limit - should not be truncated + exact_limit_content = "x" * USER_MESSAGE_MAX_LENGTH + entry_exact = { + "type": "user", + "uuid": "user-exact", + "sessionId": "session-1", + "timestamp": "2025-01-01T12:00:00.000Z", + "message": {"role": "user", "content": exact_limit_content}, + } + events = parse_entry(entry_exact, "test-project") + assert len(events) == 1 + assert len(events[0].user_message_text) == USER_MESSAGE_MAX_LENGTH + + # Test content over the limit - should be truncated + over_limit_content = "y" * (USER_MESSAGE_MAX_LENGTH + 500) + entry_over = { + "type": "user", + "uuid": "user-over", + "sessionId": "session-1", + "timestamp": "2025-01-01T12:00:05.000Z", + "message": {"role": "user", "content": over_limit_content}, + } + events = parse_entry(entry_over, "test-project") + assert len(events) == 1 + assert len(events[0].user_message_text) == USER_MESSAGE_MAX_LENGTH + assert events[0].user_message_text == "y" * USER_MESSAGE_MAX_LENGTH + + def test_user_message_text_truncation_with_list_content(self): + """Test truncation when content is a list of text blocks.""" + from session_analytics.ingest import USER_MESSAGE_MAX_LENGTH + + # Create content with multiple text blocks that exceed limit when joined + text_block = "z" * 1500 + entry = { + "type": "user", + "uuid": "user-list", + "sessionId": "session-1", + "timestamp": "2025-01-01T12:00:00.000Z", + "message": { + "role": "user", + "content": [ + {"type": "text", "text": text_block}, + {"type": "text", "text": text_block}, # Combined: 3001 chars with space + ], + }, + } + events = parse_entry(entry, "test-project") + assert len(events) == 1 + assert len(events[0].user_message_text) == USER_MESSAGE_MAX_LENGTH + class TestIngestFile: """Tests for file ingestion.""" diff --git a/tests/test_storage.py b/tests/test_storage.py index 9c8519f..f9729be 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -1,13 +1,14 @@ """Tests for the SQLite storage layer.""" import tempfile -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path import pytest from session_analytics.storage import ( Event, + GitCommit, IngestionState, Pattern, Session, @@ -72,6 +73,12 @@ def test_add_events_batch(self, storage): assert count == 5 assert storage.get_event_count() == 5 + def test_add_events_batch_empty(self, storage): + """Test batch add with empty list.""" + count = storage.add_events_batch([]) + assert count == 0 + assert storage.get_event_count() == 0 + def test_get_events_in_range(self, storage): """Test filtering events by time range.""" # Add events across different times @@ -265,3 +272,416 @@ def test_get_db_stats(self, storage, sample_event): assert stats["session_count"] == 1 assert stats["pattern_count"] == 1 assert stats["db_path"] is not None + + +class TestGitCommitValidation: + """Tests for GitCommit validation (RFC #17 Phase 1).""" + + def test_valid_short_sha(self): + """Test that 7-character short SHA is valid.""" + commit = GitCommit(sha="abc1234") + assert commit.sha == "abc1234" + + def test_valid_full_sha(self): + """Test that 40-character full SHA is valid.""" + full_sha = "a" * 40 + commit = GitCommit(sha=full_sha) + assert commit.sha == full_sha + + def test_invalid_sha_empty(self): + """Test that empty SHA raises ValueError.""" + with pytest.raises(ValueError, match="cannot be empty"): + GitCommit(sha="") + + def test_invalid_sha_too_short(self): + """Test that SHA shorter than 7 chars raises ValueError.""" + with pytest.raises(ValueError, match="must be 7-40 characters"): + GitCommit(sha="abc123") + + def test_invalid_sha_too_long(self): + """Test that SHA longer than 40 chars raises ValueError.""" + with pytest.raises(ValueError, match="must be 7-40 characters"): + GitCommit(sha="a" * 41) + + def test_invalid_sha_non_hex(self): + """Test that non-hexadecimal SHA raises ValueError.""" + with pytest.raises(ValueError, match="must be hexadecimal"): + GitCommit(sha="ghijklm") + + def test_gitcommit_is_frozen(self): + """Test that GitCommit is immutable.""" + commit = GitCommit(sha="abc1234") + with pytest.raises(AttributeError): + commit.sha = "def5678" + + +class TestGitCommitOperations: + """Tests for git commit operations (RFC #17 Phase 1).""" + + def test_add_git_commit(self, storage): + """Test adding a git commit.""" + commit = GitCommit( + sha="abc1234", + timestamp=datetime.now(), + message="Test commit", + session_id="session-1", + project_path="test-project", + ) + storage.add_git_commit(commit) + + commits = storage.get_git_commits() + assert len(commits) == 1 + assert commits[0].sha == "abc1234" + assert commits[0].message == "Test commit" + assert commits[0].session_id == "session-1" + assert commits[0].project_path == "test-project" + + def test_add_git_commit_deduplication(self, storage): + """Test that duplicate SHA overwrites existing commit (INSERT OR REPLACE behavior).""" + # Add initial commit + storage.add_git_commit( + GitCommit(sha="abc1234", message="Original message", project_path="project-1") + ) + + # Add commit with same SHA but different data + storage.add_git_commit( + GitCommit(sha="abc1234", message="Updated message", project_path="project-2") + ) + + # Should still have only one commit, with updated data + commits = storage.get_git_commits() + assert len(commits) == 1 + assert commits[0].sha == "abc1234" + assert commits[0].message == "Updated message" + assert commits[0].project_path == "project-2" + + def test_add_git_commits_batch(self, storage): + """Test batch adding git commits.""" + commits = [ + GitCommit(sha="aaa1111", timestamp=datetime.now(), message="Commit 1"), + GitCommit(sha="bbb2222", timestamp=datetime.now(), message="Commit 2"), + GitCommit(sha="ccc3333", timestamp=datetime.now(), message="Commit 3"), + ] + count = storage.add_git_commits_batch(commits) + assert count == 3 + + stored = storage.get_git_commits() + assert len(stored) == 3 + + def test_add_git_commits_batch_empty(self, storage): + """Test batch add with empty list.""" + count = storage.add_git_commits_batch([]) + assert count == 0 + assert storage.get_git_commit_count() == 0 + + def test_get_git_commits_with_filters(self, storage): + """Test filtering git commits by project, start, and end time.""" + now = datetime.now() + yesterday = now - timedelta(days=1) + two_days_ago = now - timedelta(days=2) + commits = [ + GitCommit(sha="aaa1111", timestamp=two_days_ago, project_path="project-a"), + GitCommit(sha="bbb2222", timestamp=yesterday, project_path="project-a"), + GitCommit(sha="ccc3333", timestamp=now, project_path="project-a"), + GitCommit(sha="ddd4444", timestamp=now, project_path="project-b"), + ] + storage.add_git_commits_batch(commits) + + # Filter by project + project_a = storage.get_git_commits(project_path="project-a") + assert len(project_a) == 3 + + # Filter by start time + recent = storage.get_git_commits(start=now - timedelta(hours=1)) + assert len(recent) == 2 + + # Filter by end time + old = storage.get_git_commits(end=yesterday + timedelta(hours=1)) + assert len(old) == 2 + + # Combined filters: project AND time range + project_a_recent = storage.get_git_commits( + project_path="project-a", start=yesterday - timedelta(hours=1), end=now + ) + assert len(project_a_recent) == 2 # bbb2222 and ccc3333 + + def test_git_commit_count(self, storage): + """Test getting git commit count.""" + assert storage.get_git_commit_count() == 0 + + storage.add_git_commit(GitCommit(sha="abcdef1")) + assert storage.get_git_commit_count() == 1 + + +class TestNewEventFields: + """Tests for RFC #17 Phase 1 Event fields (user_message_text, exit_code).""" + + def test_event_with_user_message_text(self, storage): + """Test storing and retrieving user_message_text.""" + event = Event( + id=None, + uuid="test-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Hello, please help me with something", + ) + stored = storage.add_event(event) + assert stored.id is not None + + events = storage.get_events_in_range() + assert len(events) == 1 + assert events[0].user_message_text == "Hello, please help me with something" + + def test_event_with_exit_code(self, storage): + """Test storing and retrieving exit_code.""" + event = Event( + id=None, + uuid="bash-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="tool_result", + tool_name="Bash", + exit_code=1, + ) + storage.add_event(event) + + events = storage.get_events_in_range() + assert len(events) == 1 + assert events[0].exit_code == 1 + + def test_event_with_all_new_fields(self, storage): + """Test event with all new fields populated.""" + event = Event( + id=None, + uuid="full-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Run a command", + exit_code=0, + ) + storage.add_event(event) + + events = storage.get_events_in_range() + assert events[0].user_message_text == "Run a command" + assert events[0].exit_code == 0 + + def test_event_with_null_new_fields(self, storage): + """Test that events with NULL user_message_text and exit_code are handled correctly.""" + event = Event( + id=None, + uuid="null-fields-uuid", + timestamp=datetime.now(), + session_id="session-1", + entry_type="assistant", + # user_message_text and exit_code are None by default + ) + storage.add_event(event) + + events = storage.get_events_in_range() + assert len(events) == 1 + assert events[0].user_message_text is None + assert events[0].exit_code is None + + +class TestFullTextSearch: + """Tests for full-text search on user_message_text.""" + + def test_search_user_messages_basic(self, storage): + """Test basic full-text search on user messages.""" + # Add events with searchable text + storage.add_event( + Event( + id=None, + uuid="uuid-1", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Help me debug the authentication error", + ) + ) + storage.add_event( + Event( + id=None, + uuid="uuid-2", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Fix the database connection issue", + ) + ) + storage.add_event( + Event( + id=None, + uuid="uuid-3", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Another error message to debug", + ) + ) + + # Search for "debug" + results = storage.search_user_messages("debug") + assert len(results) == 2 + assert all("debug" in r.user_message_text.lower() for r in results) + + # Search for "authentication" + results = storage.search_user_messages("authentication") + assert len(results) == 1 + assert "authentication" in results[0].user_message_text.lower() + + def test_search_user_messages_no_match(self, storage): + """Test search returns empty when no matches found.""" + storage.add_event( + Event( + id=None, + uuid="uuid-1", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="This is a test message", + ) + ) + + results = storage.search_user_messages("nonexistent") + assert len(results) == 0 + + def test_search_user_messages_phrase(self, storage): + """Test searching for exact phrases.""" + storage.add_event( + Event( + id=None, + uuid="uuid-1", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Run the unit tests", + ) + ) + storage.add_event( + Event( + id=None, + uuid="uuid-2", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="Unit testing is important", + ) + ) + + # Search for phrase "unit tests" + results = storage.search_user_messages('"unit tests"') + assert len(results) == 1 + assert "unit tests" in results[0].user_message_text.lower() + + +class TestFTSTriggers: + """Tests for FTS trigger behavior on insert/update/delete.""" + + def test_fts_trigger_on_insert(self, storage): + """Test that FTS index is updated on insert.""" + storage.add_event( + Event( + id=None, + uuid="insert-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="searchable insert content", + ) + ) + + # Verify FTS finds the inserted content + results = storage.search_user_messages("searchable") + assert len(results) == 1 + assert results[0].uuid == "insert-test" + + def test_fts_trigger_on_update_null_to_value(self, storage): + """Test FTS trigger handles NULL -> non-NULL update correctly.""" + # Insert event without user_message_text + storage.add_event( + Event( + id=None, + uuid="update-null-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text=None, + ) + ) + + # Verify not in FTS + results = storage.search_user_messages("updated") + assert len(results) == 0 + + # Update to add user_message_text + storage.execute_write( + "UPDATE events SET user_message_text = ? WHERE uuid = ?", + ("updated content here", "update-null-test"), + ) + + # Verify FTS now finds it + results = storage.search_user_messages("updated") + assert len(results) == 1 + assert results[0].uuid == "update-null-test" + + def test_fts_trigger_on_update_value_to_different(self, storage): + """Test FTS trigger handles value -> different value update correctly.""" + storage.add_event( + Event( + id=None, + uuid="update-value-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="original searchterm", + ) + ) + + # Verify original is searchable + results = storage.search_user_messages("original") + assert len(results) == 1 + + # Update to different value + storage.execute_write( + "UPDATE events SET user_message_text = ? WHERE uuid = ?", + ("replacement searchterm", "update-value-test"), + ) + + # Old value should not be found + results = storage.search_user_messages("original") + assert len(results) == 0 + + # New value should be found + results = storage.search_user_messages("replacement") + assert len(results) == 1 + assert results[0].uuid == "update-value-test" + + def test_fts_trigger_on_update_value_to_null(self, storage): + """Test FTS trigger handles non-NULL -> NULL update correctly.""" + storage.add_event( + Event( + id=None, + uuid="update-to-null-test", + timestamp=datetime.now(), + session_id="session-1", + entry_type="user", + user_message_text="removable content", + ) + ) + + # Verify in FTS + results = storage.search_user_messages("removable") + assert len(results) == 1 + + # Update to NULL + storage.execute_write( + "UPDATE events SET user_message_text = NULL WHERE uuid = ?", + ("update-to-null-test",), + ) + + # Should no longer be in FTS + results = storage.search_user_messages("removable") + assert len(results) == 0