From 64b5d27e67c13b777521b94ad0b5b2ec7e074730 Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Sun, 4 Jan 2026 04:17:18 +0000 Subject: [PATCH 1/2] feat: Close drill-down gaps in MCP API (RFC #49) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-play testing revealed that LLMs could see aggregate counts but couldn't drill down to actionable specifics. This closes those gaps: - **error_examples in analyze_failures**: When errors_by_tool shows "Bash: 5 errors", error_examples now reveals WHICH commands failed (top 5 per tool with counts) - **classification_factors in classify_sessions**: Sessions now include the trigger threshold and relevant metrics explaining WHY they were categorized (e.g., "error_rate > 15%", error_rate: 33.2%) - **fnmatch for permission_gaps**: Patterns like Bash(make*) now correctly match commands using glob patterns, not just exact matches - **Clearer sample_sequences error**: Message now says "must be alphanumeric or underscores" (was misleading about underscores) Closes #45, #46, #48, #49 Supersedes #47 (was already working via tool_id joins) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/session_analytics/guide.md | 34 ++++-- src/session_analytics/patterns.py | 166 ++++++++++++++++++++++++------ src/session_analytics/queries.py | 47 ++++++++- tests/test_patterns.py | 137 ++++++++++++++++++++++-- tests/test_queries.py | 61 +++++++++++ 5 files changed, 389 insertions(+), 56 deletions(-) diff --git a/src/session_analytics/guide.md b/src/session_analytics/guide.md index c233f14..b33eab0 100644 --- a/src/session_analytics/guide.md +++ b/src/session_analytics/guide.md @@ -38,20 +38,29 @@ identify permission gaps. |------|---------| | `get_tool_sequences(days?, min_count?, length?)` | Common tool chains (e.g., Read → Edit → Bash) | | `sample_sequences(pattern, limit?, context_events?)` | Random samples of a pattern with surrounding context | -| `get_permission_gaps(days?, min_count?)` | Commands that should be in settings.json | +| `get_permission_gaps(days?, min_count?)` | Commands not covered by settings.json (supports glob patterns) | | `get_insights(days?, refresh?)` | Pre-computed patterns for /improve-workflow | ### Failure Analysis | Tool | Purpose | |------|---------| -| `analyze_failures(days?, project?)` | Failure patterns, rework, and correlations | +| `analyze_failures(days?, project?)` | Failure patterns with drill-down to specific commands | + +Returns: +- `errors_by_tool`: Count of errors per tool +- `error_examples`: Top failing commands (Bash) or files (Edit/Read/Write) for drill-down +- `rework_patterns`: Files edited 3+ times within 10 minutes ### Session Classification | Tool | Purpose | |------|---------| -| `classify_sessions(days?, project?)` | Categorize sessions (debugging, development, research, maintenance) | +| `classify_sessions(days?, project?)` | Categorize sessions with explanation of why | + +Each session includes `classification_factors` explaining WHY it was categorized: +- `trigger`: The threshold that was exceeded (e.g., "error_rate > 15%") +- Relevant metrics (error_rate, edit_rate, etc.) ### Trend Analysis @@ -168,15 +177,15 @@ analyze_trends() → "Usage is increasing/decreasing" ### Session Categories -`classify_sessions()` returns one of these categories: +`classify_sessions()` returns one of these categories, with `classification_factors` explaining why: -| Category | Criteria | -|----------|----------| -| **debugging** | High error rate (>15%) or 5+ errors | -| **development** | Heavy editing (>30% edits or 3+ writes) | -| **maintenance** | Git/build focus without much editing | -| **research** | Mostly reading/searching codebase | -| **mixed** | No dominant pattern | +| Category | Criteria | Trigger Example | +|----------|----------|-----------------| +| **debugging** | High error rate (>15%) or 5+ errors | `"error_rate > 15%"` | +| **development** | Heavy editing (>30% edits or 3+ writes) | `"edit_rate > 30%"` | +| **maintenance** | Git/build focus without much editing | `"git_build_rate > 30%"` | +| **research** | Mostly reading/searching codebase | `"read_search_rate > 50%"` | +| **mixed** | No dominant pattern | `"no_dominant_pattern"` | ### Permission Gaps @@ -189,6 +198,9 @@ get_permission_gaps(min_count=5) Add suggestions to `permissions.allow` in your settings. +**Note:** Supports glob pattern matching. Patterns like `Bash(make*)` will correctly +match commands `make`, `make-test`, etc. using fnmatch. + ### Git Integration Git correlation requires two steps: diff --git a/src/session_analytics/patterns.py b/src/session_analytics/patterns.py index f3e9384..935a989 100644 --- a/src/session_analytics/patterns.py +++ b/src/session_analytics/patterns.py @@ -1,5 +1,6 @@ """Pattern detection and insight generation for session analytics.""" +import fnmatch import json import logging import random @@ -241,12 +242,12 @@ def sample_sequences( else: target_tools = [t.strip() for t in pattern.split(",")] - # Validate individual tool names (alphanumeric + underscore only) + # Validate individual tool names (alphanumeric and underscores only) for tool in target_tools: if not tool or not all(c.isalnum() or c == "_" for c in tool): return { "pattern": pattern, - "error": f"Invalid tool name: '{tool}' (must be alphanumeric)", + "error": f"Invalid tool name: '{tool}' (must be alphanumeric or underscores)", "total_occurrences": 0, "samples": [], } @@ -389,6 +390,7 @@ def analyze_failures( Identifies: - Tool errors (is_error=True in tool_result) + - Error examples showing top failing commands/files per tool - Rework patterns (same file edited multiple times quickly) - Error clustering by tool/command @@ -398,7 +400,10 @@ def analyze_failures( rework_window_minutes: Time window for detecting rework (default: 10) Returns: - Dict with failure analysis including error counts, rework patterns, recovery times + Dict with: + - errors_by_tool: Count of errors per tool + - error_examples: Top failing commands (Bash) or files (Edit/Read/Write) per tool + - rework_patterns: Instances of same file edited 3+ times quickly """ cutoff = get_cutoff(days=days) @@ -452,6 +457,54 @@ def analyze_failures( if row["tool_name"] ] + # Get error examples: top failing commands/files for drill-down + # For Bash, group by command; for file tools, group by file_path + error_examples_rows = storage.execute_query( + """ + SELECT + e2.tool_name, + e2.command, + e2.file_path, + COUNT(*) as error_count + FROM events e1 + JOIN events e2 ON e1.tool_id = e2.tool_id AND e2.entry_type = 'tool_use' + WHERE e1.timestamp >= ? + AND e1.is_error = 1 + AND e1.entry_type = 'tool_result' + GROUP BY e2.tool_name, e2.command, e2.file_path + ORDER BY e2.tool_name, error_count DESC + """, + (cutoff,), + ) + + # Organize error examples by tool with top 5 examples each + error_examples: dict[str, list[dict]] = {} + tool_example_counts: dict[str, int] = {} + + for row in error_examples_rows: + tool = row["tool_name"] + if not tool: + continue + + # Limit to 5 examples per tool + if tool_example_counts.get(tool, 0) >= 5: + continue + + if tool not in error_examples: + error_examples[tool] = [] + tool_example_counts[tool] = 0 + + # Build example based on tool type + if tool == "Bash" and row["command"]: + error_examples[tool].append({"command": row["command"], "count": row["error_count"]}) + elif row["file_path"]: + error_examples[tool].append({"file": row["file_path"], "count": row["error_count"]}) + else: + # Generic fallback + error_examples[tool].append({"count": row["error_count"]}) + + tool_example_counts[tool] += 1 + # Detect rework patterns: same file edited multiple times in quick succession rework_window = timedelta(minutes=rework_window_minutes) @@ -542,6 +595,7 @@ def analyze_failures( "sessions_with_errors": sessions_with_errors, "avg_errors_per_session": round(avg_errors_per_session, 2), "errors_by_tool": errors_by_tool[:10], + "error_examples": error_examples, "rework_patterns": { "instances_detected": len(rework_instances), "rework_window_minutes": rework_window_minutes, @@ -550,50 +604,101 @@ def analyze_failures( } -def load_allowed_commands(settings_path: Path = DEFAULT_SETTINGS_PATH) -> set[str]: - """Load allowed base commands from Claude Code settings.json. - - Parses Bash permission patterns and extracts base commands: - - Bash(gh:*) → gh - - Bash(gh pr view:*) → gh - - Bash(git status:*) → git - - This means a command like `gh` won't be reported as a permission gap - if ANY pattern for `gh` exists (e.g., `Bash(gh pr view:*)`). +def load_allowed_commands( + settings_path: Path = DEFAULT_SETTINGS_PATH, +) -> tuple[set[str], list[str]]: + """Load allowed base commands and glob patterns from Claude Code settings.json. + + Parses Bash permission patterns and extracts: + 1. Base commands for simple matching: + - Bash(gh:*) → gh + - Bash(gh pr view:*) → gh + - Bash(git status:*) → git + 2. Glob patterns for fnmatch matching: + - Bash(make*) → make* + - Bash(./scripts/*.sh:*) → ./scripts/*.sh Args: settings_path: Path to settings.json Returns: - Set of base commands that have any configured pattern + Tuple of (base_commands set, glob_patterns list for fnmatch) """ if not settings_path.exists(): - return set() + return set(), [] try: with open(settings_path) as f: settings = json.load(f) - base_commands = set() + base_commands: set[str] = set() + glob_patterns: list[str] = [] permissions = settings.get("permissions", {}) for pattern in permissions.get("allow", []): - if pattern.startswith("Bash(") and ":*)" in pattern: - # Extract full command from "Bash(command args:*)" - # Find the position of ":*)" to handle patterns correctly - start = 5 # len("Bash(") - end = pattern.find(":*)") - if end > start: - full_cmd = pattern[start:end] - # Extract base command (first word) - base_cmd = full_cmd.split()[0] if full_cmd else None + if not pattern.startswith("Bash(") or not pattern.endswith(")"): + continue + + # Extract content from Bash(...) + content = pattern[5:-1] # Remove "Bash(" and ")" + if not content: + continue + + # Handle different formats + if ":*" in content: + # Standard format: Bash(cmd:*) or Bash(cmd args:*) + full_cmd = content.split(":*")[0] + # Extract base command (first word) + base_cmd = full_cmd.split()[0] if full_cmd else None + if base_cmd: + base_commands.add(base_cmd) + # Also store as glob pattern for fnmatch + glob_patterns.append(base_cmd) + elif "*" in content or "?" in content or "[" in content: + # Glob pattern: Bash(make*), Bash(./scripts/*.sh) + # Extract base command (remove glob chars for base matching) + base = content.rstrip("*").rstrip() + if base: + # For patterns like "make*", base is "make" + base_cmd = base.split()[0] if base else None if base_cmd: base_commands.add(base_cmd) + # Store full pattern for fnmatch + glob_patterns.append(content) + else: + # Exact match: Bash(cmd) + base_cmd = content.split()[0] if content else None + if base_cmd: + base_commands.add(base_cmd) + glob_patterns.append(base_cmd) - return base_commands + return base_commands, glob_patterns except (json.JSONDecodeError, OSError) as e: logger.warning(f"Could not load settings.json: {e}") - return set() + return set(), [] + + +def _command_matches_patterns(cmd: str, base_commands: set[str], glob_patterns: list[str]) -> bool: + """Check if a command is covered by allowed patterns. + + Args: + cmd: The base command to check (e.g., "git", "make") + base_commands: Set of allowed base commands + glob_patterns: List of glob patterns for fnmatch + + Returns: + True if command is allowed by any pattern + """ + # First check simple base command membership + if cmd in base_commands: + return True + + # Then check glob patterns using fnmatch + for pattern in glob_patterns: + if fnmatch.fnmatch(cmd, pattern): + return True + + return False def compute_permission_gaps( @@ -604,6 +709,9 @@ def compute_permission_gaps( ) -> list[Pattern]: """Find commands that are frequently used but not in settings.json. + Uses fnmatch for glob pattern matching, so patterns like Bash(make*) + will correctly match commands like 'make', 'make-test', etc. + Args: storage: Storage instance days: Number of days to analyze @@ -616,7 +724,7 @@ def compute_permission_gaps( cutoff = get_cutoff(days=days) now = datetime.now() - allowed_commands = load_allowed_commands(settings_path) + base_commands, glob_patterns = load_allowed_commands(settings_path) rows = storage.execute_query( """ @@ -633,7 +741,7 @@ def compute_permission_gaps( patterns = [] for row in rows: cmd = row["command"] - if cmd not in allowed_commands: + if not _command_matches_patterns(cmd, base_commands, glob_patterns): patterns.append( Pattern( id=None, diff --git a/src/session_analytics/queries.py b/src/session_analytics/queries.py index 57a041a..d413c51 100644 --- a/src/session_analytics/queries.py +++ b/src/session_analytics/queries.py @@ -1052,13 +1052,18 @@ def classify_sessions( - research: Read/search heavy, exploring codebase - maintenance: CI/git heavy, infrastructure work + Each session includes `classification_factors` explaining WHY it was + categorized, including the trigger threshold and relevant metrics. + Args: storage: Storage instance days: Number of days to analyze (default: 7) project: Optional project filter Returns: - Dict with session classifications and category distribution + Dict with: + - sessions: List with category, confidence, classification_factors, and stats + - category_distribution: Count of sessions per category """ cutoff = get_cutoff(days=days) @@ -1123,21 +1128,52 @@ def classify_sessions( # - Maintenance: Git/build focus without editing (>30% combined) # - Research: Mostly reading/searching codebase (>50% combined) # - Mixed: No dominant pattern, balanced activity - if error_pct > 0.15 or (row["error_count"] or 0) > 5: + error_count = row["error_count"] or 0 + write_count = row["write_count"] or 0 + + if error_pct > 0.15 or error_count > 5: category = "debugging" confidence = min(1.0, error_pct * 3) - elif edit_pct > 0.3 or (row["write_count"] or 0) > 3: + classification_factors = { + "trigger": "error_rate > 15%" if error_pct > 0.15 else "error_count > 5", + "error_rate": round(error_pct * 100, 1), + "error_count": error_count, + } + elif edit_pct > 0.3 or write_count > 3: category = "development" - confidence = min(1.0, (edit_pct + (row["write_count"] or 0) / total) * 2) + confidence = min(1.0, (edit_pct + write_count / total) * 2) + classification_factors = { + "trigger": "edit_rate > 30%" if edit_pct > 0.3 else "write_count > 3", + "edit_rate": round(edit_pct * 100, 1), + "write_count": write_count, + } elif git_pct + build_pct > 0.3: category = "maintenance" confidence = min(1.0, (git_pct + build_pct) * 2) + classification_factors = { + "trigger": "git_build_rate > 30%", + "git_rate": round(git_pct * 100, 1), + "build_rate": round(build_pct * 100, 1), + } elif read_pct + search_pct > 0.5: category = "research" confidence = min(1.0, (read_pct + search_pct) * 1.5) + classification_factors = { + "trigger": "read_search_rate > 50%", + "read_rate": round(read_pct * 100, 1), + "search_rate": round(search_pct * 100, 1), + } else: category = "mixed" confidence = 0.5 + classification_factors = { + "trigger": "no_dominant_pattern", + "top_activities": { + "edit_rate": round(edit_pct * 100, 1), + "read_rate": round(read_pct * 100, 1), + "search_rate": round(search_pct * 100, 1), + }, + } category_counts[category] += 1 @@ -1147,13 +1183,14 @@ def classify_sessions( "project": row["project_path"], "category": category, "confidence": round(confidence, 2), + "classification_factors": classification_factors, "stats": { "total_events": row["total_events"], "edit_count": row["edit_count"] or 0, "read_count": row["read_count"] or 0, "search_count": row["search_count"] or 0, "git_count": row["git_count"] or 0, - "error_count": row["error_count"] or 0, + "error_count": error_count, }, "first_seen": _format_timestamp(row["first_seen"]), "last_seen": _format_timestamp(row["last_seen"]), diff --git a/tests/test_patterns.py b/tests/test_patterns.py index 1e29466..e82c0e2 100644 --- a/tests/test_patterns.py +++ b/tests/test_patterns.py @@ -92,17 +92,18 @@ def test_load_allowed_commands_missing_file(self): """Test loading allowed commands from non-existent file.""" with tempfile.TemporaryDirectory() as tmpdir: missing_path = Path(tmpdir) / "nonexistent.json" - allowed = load_allowed_commands(missing_path) - assert allowed == set() + base_commands, glob_patterns = load_allowed_commands(missing_path) + assert base_commands == set() + assert glob_patterns == [] def test_load_allowed_commands(self): """Test loading allowed commands from settings.json.""" with tempfile.TemporaryDirectory() as tmpdir: settings_path = Path(tmpdir) / "settings.json" settings_path.write_text('{"permissions": {"allow": ["Bash(git:*)", "Bash(make:*)"]}}') - allowed = load_allowed_commands(settings_path) - assert "git" in allowed - assert "make" in allowed + base_commands, glob_patterns = load_allowed_commands(settings_path) + assert "git" in base_commands + assert "make" in base_commands def test_compute_permission_gaps(self, pattern_storage): """Test computing permission gaps.""" @@ -150,16 +151,16 @@ def test_load_allowed_commands_extracts_base_from_subcommands(self): '"Bash(cargo build:*)"' "]}}" ) - allowed = load_allowed_commands(settings_path) + base_commands, glob_patterns = load_allowed_commands(settings_path) # Should extract base commands, not full subcommands - assert "gh" in allowed - assert "git" in allowed - assert "cargo" in allowed + assert "gh" in base_commands + assert "git" in base_commands + assert "cargo" in base_commands # Should NOT contain full subcommand strings - assert "gh pr view" not in allowed - assert "git status" not in allowed + assert "gh pr view" not in base_commands + assert "git status" not in base_commands def test_permission_gaps_filters_subcommand_patterns(self, pattern_storage): """Test that gaps are filtered when subcommand patterns exist. @@ -184,6 +185,52 @@ def test_permission_gaps_filters_subcommand_patterns(self, pattern_storage): # make has no patterns, should still be a gap assert "make" in pattern_keys + def test_load_allowed_commands_handles_glob_patterns(self): + """Test that glob patterns (without :*) are handled correctly. + + Patterns like Bash(make*) should be recognized and used for + fnmatch-based matching. + """ + with tempfile.TemporaryDirectory() as tmpdir: + settings_path = Path(tmpdir) / "settings.json" + settings_path.write_text( + '{"permissions": {"allow": [' + '"Bash(make*)", ' + '"Bash(./scripts/*.sh:*)", ' + '"Bash(cargo)"' + "]}}" + ) + base_commands, glob_patterns = load_allowed_commands(settings_path) + + # Should extract base commands + assert "make" in base_commands + assert "cargo" in base_commands + + # Glob patterns should be stored for fnmatch + assert "make*" in glob_patterns + assert "cargo" in glob_patterns + + def test_permission_gaps_uses_fnmatch(self, pattern_storage): + """Test that permission gaps uses fnmatch for glob pattern matching. + + If settings has Bash(make*), then 'make' should NOT be reported + as a permission gap because it matches the glob pattern. + """ + with tempfile.TemporaryDirectory() as tmpdir: + settings_path = Path(tmpdir) / "settings.json" + # Use glob pattern without :* + settings_path.write_text('{"permissions": {"allow": ["Bash(make*)"]}}') + + patterns = compute_permission_gaps( + pattern_storage, days=7, threshold=1, settings_path=settings_path + ) + + pattern_keys = {p.pattern_key for p in patterns} + # make should be filtered out by fnmatch against "make*" + assert "make" not in pattern_keys + # git has no matching pattern, should still be a gap + assert "git" in pattern_keys + class TestComputeAllPatterns: """Tests for computing all patterns.""" @@ -503,6 +550,74 @@ def test_rework_not_detected_different_files(self, storage): # Different files shouldn't count as rework assert result["rework_patterns"]["instances_detected"] == 0 + def test_analyze_failures_error_examples(self, storage): + """Test that error_examples provides drill-down to specific failing commands/files. + + RFC #49: When errors_by_tool shows 'Bash: 5 errors', error_examples should + reveal WHICH commands failed, enabling actionable diagnosis. + """ + from session_analytics.patterns import analyze_failures + + now = datetime.now() + events = [ + # Bash error with command + Event( + id=None, + uuid="bash-use-1", + timestamp=now - timedelta(hours=1), + session_id="s1", + entry_type="tool_use", + tool_name="Bash", + tool_id="bash-1", + command="make test", + ), + Event( + id=None, + uuid="bash-result-1", + timestamp=now - timedelta(hours=1, minutes=-1), + session_id="s1", + entry_type="tool_result", + tool_id="bash-1", + is_error=True, + ), + # Read error with file_path + Event( + id=None, + uuid="read-use-1", + timestamp=now - timedelta(hours=2), + session_id="s1", + entry_type="tool_use", + tool_name="Read", + tool_id="read-1", + file_path="/nonexistent/file.py", + ), + Event( + id=None, + uuid="read-result-1", + timestamp=now - timedelta(hours=2, minutes=-1), + session_id="s1", + entry_type="tool_result", + tool_id="read-1", + is_error=True, + ), + ] + storage.add_events_batch(events) + + result = analyze_failures(storage, days=7) + + # Verify error_examples exists + assert "error_examples" in result + + # Bash errors should include the failing command + bash_examples = result["error_examples"].get("Bash", []) + assert len(bash_examples) >= 1 + assert any(ex.get("command") == "make test" for ex in bash_examples) + + # Read errors should include the failing file + read_examples = result["error_examples"].get("Read", []) + assert len(read_examples) >= 1 + assert any(ex.get("file") == "/nonexistent/file.py" for ex in read_examples) + class TestAnalyzeTrends: """Tests for the analyze_trends function (Phase 7: Trend Analysis).""" diff --git a/tests/test_queries.py b/tests/test_queries.py index 10ae3f0..9119316 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -1106,6 +1106,67 @@ def test_min_event_threshold(self, storage): # Session with only 3 events should be excluded assert result["session_count"] == 0 + def test_classification_factors_included(self, storage): + """Test that classification_factors explains WHY sessions were categorized. + + RFC #49: Without classification_factors, an LLM seeing 'category: debugging' + cannot explain to the user why it was classified that way. + """ + from session_analytics.queries import classify_sessions + + now = datetime.now() + events = [] + # Create session with >15% error rate to trigger debugging classification + for i in range(6): + events.append( + Event( + id=None, + uuid=f"factors-tool-{i}", + timestamp=now - timedelta(hours=1, minutes=i), + session_id="factors-session", + project_path="/factors/project", + entry_type="tool_use", + tool_name="Bash", + tool_id=f"tool-{i}", + ) + ) + # Add 2 error results (33% error rate) + for i in range(2): + events.append( + Event( + id=None, + uuid=f"factors-error-{i}", + timestamp=now - timedelta(hours=1, minutes=i + 10), + session_id="factors-session", + project_path="/factors/project", + entry_type="tool_result", + tool_id=f"tool-{i}", + is_error=True, + ) + ) + storage.add_events_batch(events) + + result = classify_sessions(storage, days=7) + + session = next( + (s for s in result["sessions"] if s["session_id"] == "factors-session"), + None, + ) + assert session is not None + assert session["category"] == "debugging" + + # Verify classification_factors exists and explains WHY + assert "classification_factors" in session + factors = session["classification_factors"] + + # Should include the trigger that caused this classification + assert "trigger" in factors + assert "error_rate" in factors["trigger"] or "error_count" in factors["trigger"] + + # Should include the relevant metrics + assert "error_rate" in factors + assert factors["error_rate"] > 15 # Should be ~33% + class TestGetUserJourneyIncludeProjects: """Test for get_user_journey with include_projects=False.""" From 3cceb366f4150d4ec6584e0f1279b040010dfa7f Mon Sep 17 00:00:00 2001 From: Evan Senter Date: Sun, 4 Jan 2026 04:28:22 +0000 Subject: [PATCH 2/2] style: Clarify confusing timedelta in test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address reviewer feedback: use timedelta(minutes=59) instead of timedelta(hours=1, minutes=-1) for clarity in test timestamps. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/test_patterns.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_patterns.py b/tests/test_patterns.py index e82c0e2..7f707b2 100644 --- a/tests/test_patterns.py +++ b/tests/test_patterns.py @@ -574,7 +574,7 @@ def test_analyze_failures_error_examples(self, storage): Event( id=None, uuid="bash-result-1", - timestamp=now - timedelta(hours=1, minutes=-1), + timestamp=now - timedelta(minutes=59), # Shortly after bash-use-1 session_id="s1", entry_type="tool_result", tool_id="bash-1", @@ -594,7 +594,7 @@ def test_analyze_failures_error_examples(self, storage): Event( id=None, uuid="read-result-1", - timestamp=now - timedelta(hours=2, minutes=-1), + timestamp=now - timedelta(minutes=119), # Shortly after read-use-1 session_id="s1", entry_type="tool_result", tool_id="read-1",