Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/session_analytics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
__version__ = "0.1.0" # Fallback for development

# Re-export public API
from session_analytics.queries import build_where_clause, get_cutoff
from session_analytics.queries import build_where_clause, get_cutoff, normalize_datetime
from session_analytics.storage import (
Event,
GitCommit,
Expand All @@ -31,4 +31,5 @@
# Query helpers
"build_where_clause",
"get_cutoff",
"normalize_datetime",
]
7 changes: 6 additions & 1 deletion src/session_analytics/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime, timedelta
from pathlib import Path

from session_analytics.queries import get_cutoff
from session_analytics.queries import get_cutoff, normalize_datetime
from session_analytics.storage import Event, GitCommit, IngestionState, Session, SQLiteStorage

logger = logging.getLogger("session-analytics")
Expand Down Expand Up @@ -635,6 +635,9 @@ def correlate_git_with_sessions(
start = datetime.fromisoformat(start)
if isinstance(end, str):
end = datetime.fromisoformat(end)
# Normalize to naive datetime for consistent comparison with git commits
start = normalize_datetime(start)
end = normalize_datetime(end)
session_ranges.append(
{
"session_id": s["session_id"],
Expand Down Expand Up @@ -663,6 +666,8 @@ def correlate_git_with_sessions(
commit_time = commit.timestamp
if isinstance(commit_time, str):
commit_time = datetime.fromisoformat(commit_time)
# Normalize to naive datetime for consistent comparison with session times
commit_time = normalize_datetime(commit_time)

# Find matching session (commit within session window ± 5 min buffer)
for sr in session_ranges:
Expand Down
21 changes: 21 additions & 0 deletions src/session_analytics/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,27 @@ def get_cutoff(days: int | float = 7, hours: float = 0) -> datetime:
return datetime.now() - timedelta(hours=total_hours)


def normalize_datetime(dt: datetime) -> datetime:
"""Normalize a datetime to naive (no timezone) for comparison.

Git commits may have timezone info while session timestamps from SQLite
may not. This strips timezone info to enable safe comparisons.

Args:
dt: datetime that may or may not have timezone info

Returns:
Naive datetime (tzinfo=None)
"""
if dt.tzinfo is not None:
# Strip timezone info, preserving local time values.
# We intentionally don't convert to UTC because session timestamps
# in SQLite are naive local time, and git commits represent the same
# local time just with timezone info attached.
return dt.replace(tzinfo=None)
return dt


def ensure_fresh_data(
storage: SQLiteStorage,
max_age_minutes: int = 5,
Expand Down
8 changes: 1 addition & 7 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from datetime import datetime
from unittest.mock import patch

import pytest

from session_analytics.cli import (
cmd_classify,
cmd_commands,
Expand Down Expand Up @@ -623,11 +621,7 @@ class Args:
days = 7

with patch("session_analytics.cli.SQLiteStorage", return_value=populated_storage):
try:
cmd_git_correlate(Args())
except TypeError:
# Known issue: timezone-aware vs naive datetime comparison
pytest.skip("Timezone comparison issue in correlate_git_with_sessions")
cmd_git_correlate(Args())

captured = capsys.readouterr()
assert "correlat" in captured.out.lower() or "commit" in captured.out.lower()
Expand Down
60 changes: 60 additions & 0 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,66 @@ def test_commit_before_session_outside_pre_buffer(self, storage):

assert result["commits_correlated"] == 0

def test_timezone_aware_commit_correlates_correctly(self, storage):
"""Test that timezone-aware git commits correlate with naive session timestamps.

Regression test for issue #34: TypeError when comparing timezone-aware and
naive datetime objects.
"""
from datetime import datetime, timedelta, timezone

from session_analytics.ingest import correlate_git_with_sessions
from session_analytics.storage import Event, GitCommit

now = datetime.now()
session_start = now - timedelta(hours=1)
session_end = now - timedelta(minutes=30)

# Session events with naive timestamps (no timezone)
storage.add_events_batch(
[
Event(
id=None,
uuid="tz-aware-e1",
timestamp=session_start, # naive
session_id="tz-aware-session",
project_path="/test/repo",
entry_type="tool_use",
tool_name="Read",
),
Event(
id=None,
uuid="tz-aware-e2",
timestamp=session_end, # naive
session_id="tz-aware-session",
project_path="/test/repo",
entry_type="tool_use",
tool_name="Edit",
),
]
)

# Git commit with timezone-aware timestamp (typical of git log output)
commit_time = session_start + timedelta(minutes=10)
commit_time_aware = commit_time.replace(tzinfo=timezone.utc)

commit = GitCommit(
sha="f" * 40,
message="Commit with timezone",
timestamp=commit_time_aware, # timezone-aware
project_path="/test/repo",
session_id=None,
)
storage.add_git_commit(commit)

# Should not raise TypeError
result = correlate_git_with_sessions(storage, days=7)

# Should successfully correlate
assert result["commits_correlated"] == 1
commits = storage.get_git_commits()
assert commits[0].session_id == "tz-aware-session"


class TestBatchCorrelationErrorHandling:
"""Tests for batch correlation error handling."""
Expand Down
69 changes: 69 additions & 0 deletions tests/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1453,3 +1453,72 @@ def test_cutoff_default_values(self):
cutoff = get_cutoff()
expected = datetime.now() - timedelta(days=7)
assert abs((cutoff - expected).total_seconds()) < 1


class TestNormalizeDatetime:
"""Tests for normalize_datetime() helper function."""

def test_naive_datetime_unchanged(self):
"""Test that naive datetime is returned unchanged."""
from session_analytics.queries import normalize_datetime

naive_dt = datetime(2024, 1, 15, 12, 30, 45)
result = normalize_datetime(naive_dt)
assert result == naive_dt
assert result.tzinfo is None

def test_utc_timezone_stripped(self):
"""Test that UTC timezone is stripped."""
from datetime import timezone

from session_analytics.queries import normalize_datetime

aware_dt = datetime(2024, 1, 15, 12, 30, 45, tzinfo=timezone.utc)
result = normalize_datetime(aware_dt)

assert result.tzinfo is None
assert result.year == 2024
assert result.month == 1
assert result.day == 15
assert result.hour == 12
assert result.minute == 30
assert result.second == 45

def test_non_utc_timezone_stripped(self):
"""Test that non-UTC timezone is stripped, preserving local time values.

We intentionally preserve the time values (hour/minute/second) rather
than converting to UTC. This is correct because session timestamps in
SQLite are stored as naive local time - a commit at 12:30 local time
should correlate with sessions running at 12:00-13:00 local time,
regardless of the timezone offset attached to the commit timestamp.
"""
from datetime import timezone

from session_analytics.queries import normalize_datetime

# Create a timezone offset (e.g., +05:30)
tz_offset = timezone(timedelta(hours=5, minutes=30))
aware_dt = datetime(2024, 1, 15, 12, 30, 45, tzinfo=tz_offset)
result = normalize_datetime(aware_dt)

assert result.tzinfo is None
# The time values should remain the same (we strip, not convert)
assert result.hour == 12
assert result.minute == 30

def test_comparison_after_normalization(self):
"""Test that normalized datetimes can be compared safely."""
from datetime import timezone

from session_analytics.queries import normalize_datetime

naive_dt = datetime(2024, 1, 15, 12, 30, 45)
aware_dt = datetime(2024, 1, 15, 12, 30, 45, tzinfo=timezone.utc)

# Direct comparison would raise TypeError
# After normalization, comparison should work
normalized_aware = normalize_datetime(aware_dt)
normalized_naive = normalize_datetime(naive_dt)

assert normalized_aware == normalized_naive
16 changes: 4 additions & 12 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,18 +224,10 @@ def test_ingest_git_history():

def test_correlate_git_with_sessions():
"""Test that correlate_git_with_sessions links commits to sessions."""
# Note: This may fail with timezone-aware commits - known issue
# Just verify it returns expected structure without erroring
try:
result = correlate_git_with_sessions.fn(days=7)
assert result["status"] == "ok"
assert "days" in result
assert "commits_correlated" in result
except TypeError:
# Known issue: timezone-aware vs naive datetime comparison
import pytest

pytest.skip("Timezone comparison issue in correlate_git_with_sessions")
result = correlate_git_with_sessions.fn(days=7)
assert result["status"] == "ok"
assert "days" in result
assert "commits_correlated" in result


def test_get_session_signals():
Expand Down