diff --git a/src/session_analytics/__init__.py b/src/session_analytics/__init__.py index 3107bd5..7a29f7a 100644 --- a/src/session_analytics/__init__.py +++ b/src/session_analytics/__init__.py @@ -8,7 +8,7 @@ __version__ = "0.1.0" # Fallback for development # Re-export public API -from session_analytics.queries import build_where_clause, get_cutoff +from session_analytics.queries import build_where_clause, get_cutoff, normalize_datetime from session_analytics.storage import ( Event, GitCommit, @@ -31,4 +31,5 @@ # Query helpers "build_where_clause", "get_cutoff", + "normalize_datetime", ] diff --git a/src/session_analytics/ingest.py b/src/session_analytics/ingest.py index 830d6b7..2f9fcfb 100644 --- a/src/session_analytics/ingest.py +++ b/src/session_analytics/ingest.py @@ -5,7 +5,7 @@ from datetime import datetime, timedelta from pathlib import Path -from session_analytics.queries import get_cutoff +from session_analytics.queries import get_cutoff, normalize_datetime from session_analytics.storage import Event, GitCommit, IngestionState, Session, SQLiteStorage logger = logging.getLogger("session-analytics") @@ -635,6 +635,9 @@ def correlate_git_with_sessions( start = datetime.fromisoformat(start) if isinstance(end, str): end = datetime.fromisoformat(end) + # Normalize to naive datetime for consistent comparison with git commits + start = normalize_datetime(start) + end = normalize_datetime(end) session_ranges.append( { "session_id": s["session_id"], @@ -663,6 +666,8 @@ def correlate_git_with_sessions( commit_time = commit.timestamp if isinstance(commit_time, str): commit_time = datetime.fromisoformat(commit_time) + # Normalize to naive datetime for consistent comparison with session times + commit_time = normalize_datetime(commit_time) # Find matching session (commit within session window ± 5 min buffer) for sr in session_ranges: diff --git a/src/session_analytics/queries.py b/src/session_analytics/queries.py index 611fc54..258061a 100644 --- a/src/session_analytics/queries.py +++ b/src/session_analytics/queries.py @@ -67,6 +67,27 @@ def get_cutoff(days: int | float = 7, hours: float = 0) -> datetime: return datetime.now() - timedelta(hours=total_hours) +def normalize_datetime(dt: datetime) -> datetime: + """Normalize a datetime to naive (no timezone) for comparison. + + Git commits may have timezone info while session timestamps from SQLite + may not. This strips timezone info to enable safe comparisons. + + Args: + dt: datetime that may or may not have timezone info + + Returns: + Naive datetime (tzinfo=None) + """ + if dt.tzinfo is not None: + # Strip timezone info, preserving local time values. + # We intentionally don't convert to UTC because session timestamps + # in SQLite are naive local time, and git commits represent the same + # local time just with timezone info attached. + return dt.replace(tzinfo=None) + return dt + + def ensure_fresh_data( storage: SQLiteStorage, max_age_minutes: int = 5, diff --git a/tests/test_cli.py b/tests/test_cli.py index f37a169..f32db43 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,8 +3,6 @@ from datetime import datetime from unittest.mock import patch -import pytest - from session_analytics.cli import ( cmd_classify, cmd_commands, @@ -623,11 +621,7 @@ class Args: days = 7 with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage): - try: - cmd_git_correlate(Args()) - except TypeError: - # Known issue: timezone-aware vs naive datetime comparison - pytest.skip("Timezone comparison issue in correlate_git_with_sessions") + cmd_git_correlate(Args()) captured = capsys.readouterr() assert "correlat" in captured.out.lower() or "commit" in captured.out.lower() diff --git a/tests/test_ingest.py b/tests/test_ingest.py index 3fc6fb5..e34513e 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -766,6 +766,66 @@ def test_commit_before_session_outside_pre_buffer(self, storage): assert result["commits_correlated"] == 0 + def test_timezone_aware_commit_correlates_correctly(self, storage): + """Test that timezone-aware git commits correlate with naive session timestamps. + + Regression test for issue #34: TypeError when comparing timezone-aware and + naive datetime objects. + """ + from datetime import datetime, timedelta, timezone + + from session_analytics.ingest import correlate_git_with_sessions + from session_analytics.storage import Event, GitCommit + + now = datetime.now() + session_start = now - timedelta(hours=1) + session_end = now - timedelta(minutes=30) + + # Session events with naive timestamps (no timezone) + storage.add_events_batch( + [ + Event( + id=None, + uuid="tz-aware-e1", + timestamp=session_start, # naive + session_id="tz-aware-session", + project_path="/test/repo", + entry_type="tool_use", + tool_name="Read", + ), + Event( + id=None, + uuid="tz-aware-e2", + timestamp=session_end, # naive + session_id="tz-aware-session", + project_path="/test/repo", + entry_type="tool_use", + tool_name="Edit", + ), + ] + ) + + # Git commit with timezone-aware timestamp (typical of git log output) + commit_time = session_start + timedelta(minutes=10) + commit_time_aware = commit_time.replace(tzinfo=timezone.utc) + + commit = GitCommit( + sha="f" * 40, + message="Commit with timezone", + timestamp=commit_time_aware, # timezone-aware + project_path="/test/repo", + session_id=None, + ) + storage.add_git_commit(commit) + + # Should not raise TypeError + result = correlate_git_with_sessions(storage, days=7) + + # Should successfully correlate + assert result["commits_correlated"] == 1 + commits = storage.get_git_commits() + assert commits[0].session_id == "tz-aware-session" + class TestBatchCorrelationErrorHandling: """Tests for batch correlation error handling.""" diff --git a/tests/test_queries.py b/tests/test_queries.py index ef483d0..f0ed296 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1453,3 +1453,72 @@ def test_cutoff_default_values(self): cutoff = get_cutoff() expected = datetime.now() - timedelta(days=7) assert abs((cutoff - expected).total_seconds()) < 1 + + +class TestNormalizeDatetime: + """Tests for normalize_datetime() helper function.""" + + def test_naive_datetime_unchanged(self): + """Test that naive datetime is returned unchanged.""" + from session_analytics.queries import normalize_datetime + + naive_dt = datetime(2024, 1, 15, 12, 30, 45) + result = normalize_datetime(naive_dt) + assert result == naive_dt + assert result.tzinfo is None + + def test_utc_timezone_stripped(self): + """Test that UTC timezone is stripped.""" + from datetime import timezone + + from session_analytics.queries import normalize_datetime + + aware_dt = datetime(2024, 1, 15, 12, 30, 45, tzinfo=timezone.utc) + result = normalize_datetime(aware_dt) + + assert result.tzinfo is None + assert result.year == 2024 + assert result.month == 1 + assert result.day == 15 + assert result.hour == 12 + assert result.minute == 30 + assert result.second == 45 + + def test_non_utc_timezone_stripped(self): + """Test that non-UTC timezone is stripped, preserving local time values. + + We intentionally preserve the time values (hour/minute/second) rather + than converting to UTC. This is correct because session timestamps in + SQLite are stored as naive local time - a commit at 12:30 local time + should correlate with sessions running at 12:00-13:00 local time, + regardless of the timezone offset attached to the commit timestamp. + """ + from datetime import timezone + + from session_analytics.queries import normalize_datetime + + # Create a timezone offset (e.g., +05:30) + tz_offset = timezone(timedelta(hours=5, minutes=30)) + aware_dt = datetime(2024, 1, 15, 12, 30, 45, tzinfo=tz_offset) + result = normalize_datetime(aware_dt) + + assert result.tzinfo is None + # The time values should remain the same (we strip, not convert) + assert result.hour == 12 + assert result.minute == 30 + + def test_comparison_after_normalization(self): + """Test that normalized datetimes can be compared safely.""" + from datetime import timezone + + from session_analytics.queries import normalize_datetime + + naive_dt = datetime(2024, 1, 15, 12, 30, 45) + aware_dt = datetime(2024, 1, 15, 12, 30, 45, tzinfo=timezone.utc) + + # Direct comparison would raise TypeError + # After normalization, comparison should work + normalized_aware = normalize_datetime(aware_dt) + normalized_naive = normalize_datetime(naive_dt) + + assert normalized_aware == normalized_naive diff --git a/tests/test_server.py b/tests/test_server.py index 1ee1284..cdf00b6 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -224,18 +224,10 @@ def test_ingest_git_history(): def test_correlate_git_with_sessions(): """Test that correlate_git_with_sessions links commits to sessions.""" - # Note: This may fail with timezone-aware commits - known issue - # Just verify it returns expected structure without erroring - try: - result = correlate_git_with_sessions.fn(days=7) - assert result["status"] == "ok" - assert "days" in result - assert "commits_correlated" in result - except TypeError: - # Known issue: timezone-aware vs naive datetime comparison - import pytest - - pytest.skip("Timezone comparison issue in correlate_git_with_sessions") + result = correlate_git_with_sessions.fn(days=7) + assert result["status"] == "ok" + assert "days" in result + assert "commits_correlated" in result def test_get_session_signals():