diff --git a/src/copilot_usage/vscode_parser.py b/src/copilot_usage/vscode_parser.py index b275556..fb418a6 100644 --- a/src/copilot_usage/vscode_parser.py +++ b/src/copilot_usage/vscode_parser.py @@ -492,30 +492,43 @@ class _SummaryAccumulator: def _update_vscode_summary( acc: _SummaryAccumulator, requests: Sequence[VSCodeRequest] ) -> None: - """Merge *requests* into *acc* in-place, then discard.""" + """Merge *requests* into *acc* in-place, then discard. + + Accumulator dict fields and repeated request attributes are bound to + locals before the loop to replace ``LOAD_ATTR`` with ``LOAD_FAST``. + """ + rbm = acc.requests_by_model + dbm = acc.duration_by_model + rbc = acc.requests_by_category + rbd = acc.requests_by_date last_date_key: str = "" last_date_val: date | None = None for req in requests: acc.total_requests += 1 - acc.total_duration_ms += req.duration_ms + dur = req.duration_ms + acc.total_duration_ms += dur - acc.requests_by_model[req.model] += 1 - acc.duration_by_model[req.model] += req.duration_ms - acc.requests_by_category[req.category] += 1 + model = req.model + rbm[model] += 1 + dbm[model] += dur + rbc[req.category] += 1 - ts_date = req.timestamp.date() + ts = req.timestamp + ts_date = ts.date() if last_date_val is None or ts_date != last_date_val: - last_date_key = req.timestamp.strftime("%Y-%m-%d") + last_date_key = ts_date.isoformat() last_date_val = ts_date - acc.requests_by_date[last_date_key] += 1 + rbd[last_date_key] += 1 # Timestamp bounds: full min/max scan so callers (especially # build_vscode_summary) need not pre-sort their input. - if acc.first_timestamp is None or req.timestamp < acc.first_timestamp: - acc.first_timestamp = req.timestamp - if acc.last_timestamp is None or req.timestamp > acc.last_timestamp: - acc.last_timestamp = req.timestamp + first = acc.first_timestamp + if first is None or ts < first: + acc.first_timestamp = ts + last_ts = acc.last_timestamp + if last_ts is None or ts > last_ts: + acc.last_timestamp = ts def _merge_partial(acc: _SummaryAccumulator, partial: VSCodeLogSummary) -> None: diff --git a/tests/copilot_usage/test_vscode_parser.py b/tests/copilot_usage/test_vscode_parser.py index a33d224..11178da 100644 --- a/tests/copilot_usage/test_vscode_parser.py +++ b/tests/copilot_usage/test_vscode_parser.py @@ -2336,3 +2336,103 @@ def spy(root: Path) -> frozenset[tuple[str, tuple[int, int]]]: monkeypatch.setattr(_mod, "_scan_child_ids", spy) _cached_discover_vscode_logs(tmp_path) assert scan_calls == [], "child scan must be skipped on root_id cache hit" + + +# --------------------------------------------------------------------------- +# Correctness-equivalence test for the optimised _update_vscode_summary loop +# --------------------------------------------------------------------------- + + +class TestUpdateVscodeSummaryLargeScale: + """Verify _update_vscode_summary produces correct aggregations at scale. + + Builds a synthetic list of 10 000+ VSCodeRequest objects spanning + multiple models, categories, and dates and asserts the accumulated + result is bit-for-bit identical to a hand-computed reference. + No wall-clock timing — only deterministic correctness checks. + """ + + @staticmethod + def _build_requests(n: int = 10_000) -> list[VSCodeRequest]: + """Build *n* synthetic requests across several models/categories/dates.""" + models = ["gpt-4o", "gpt-4o-mini", "claude-opus-4.6", "o3-mini"] + categories = ["inline", "panel/editAgent", "copilotLanguageModelWrapper"] + base = datetime(2026, 3, 1, 0, 0, 0) + requests: list[VSCodeRequest] = [] + for i in range(n): + ts = base.replace( + day=1 + (i % 28), + hour=i % 24, + minute=i % 60, + second=i % 60, + ) + requests.append( + VSCodeRequest( + timestamp=ts, + request_id=f"req{i:06d}", + model=models[i % len(models)], + duration_ms=50 + i, + category=categories[i % len(categories)], + ) + ) + return requests + + def test_aggregation_matches_reference(self) -> None: + """Accumulated totals match a manually computed reference.""" + requests = self._build_requests(10_500) + acc = _SummaryAccumulator() + _update_vscode_summary(acc, requests) + + assert acc.total_requests == 10_500 + + # Total duration: sum(50 + i for i in range(10_500)) + expected_total_dur = sum(50 + i for i in range(10_500)) + assert acc.total_duration_ms == expected_total_dur + + # Per-model counts: 4 models cycled evenly → each gets 10_500 // 4 + # with remainder distributed to first models. + models = ["gpt-4o", "gpt-4o-mini", "claude-opus-4.6", "o3-mini"] + for idx, m in enumerate(models): + expected_count = 10_500 // 4 + (1 if idx < 10_500 % 4 else 0) + assert acc.requests_by_model[m] == expected_count + + # Per-model durations: sum(50 + i for i where i % 4 == model_index) + for idx, m in enumerate(models): + expected_dur = sum(50 + i for i in range(idx, 10_500, 4)) + assert acc.duration_by_model[m] == expected_dur + + # Per-category counts: 3 categories cycled evenly + categories = ["inline", "panel/editAgent", "copilotLanguageModelWrapper"] + for idx, c in enumerate(categories): + expected_count = 10_500 // 3 + (1 if idx < 10_500 % 3 else 0) + assert acc.requests_by_category[c] == expected_count + + # Per-date counts: compute the exact expected mapping from input + # requests so we verify the full distribution, not just the total. + expected_requests_by_date: dict[str, int] = {} + for request in requests: + date_key = request.timestamp.date().isoformat() + expected_requests_by_date[date_key] = ( + expected_requests_by_date.get(date_key, 0) + 1 + ) + assert acc.requests_by_date == expected_requests_by_date + + def test_timestamp_bounds(self) -> None: + """first_timestamp and last_timestamp are correct min/max.""" + requests = self._build_requests(10_000) + acc = _SummaryAccumulator() + _update_vscode_summary(acc, requests) + + expected_first = min(r.timestamp for r in requests) + expected_last = max(r.timestamp for r in requests) + assert acc.first_timestamp == expected_first + assert acc.last_timestamp == expected_last + + def test_empty_input(self) -> None: + """Passing an empty sequence leaves the accumulator unchanged.""" + acc = _SummaryAccumulator() + _update_vscode_summary(acc, []) + assert acc.total_requests == 0 + assert acc.total_duration_ms == 0 + assert acc.first_timestamp is None + assert acc.last_timestamp is None