diff --git a/src/copilot_usage/models.py b/src/copilot_usage/models.py index efa2255..6d177ba 100644 --- a/src/copilot_usage/models.py +++ b/src/copilot_usage/models.py @@ -219,20 +219,25 @@ class AssistantMessageData(BaseModel): @field_validator("outputTokens", mode="before") @classmethod def _sanitize_non_numeric_tokens(cls, v: object) -> object: - """Map non-positive and non-numeric token counts to ``0``. + """Map non-positive, non-numeric, and non-whole-float token counts to ``0``. - JSON ``true``/``false``, numeric strings like ``"100"``, and - non-positive numeric values (int/float) are not valid token counts. - Returning ``0`` preserves parsing of the rest of the assistant - message payload while preventing these values from being lax-coerced - into token counts. + JSON ``true``/``false``, numeric strings like ``"100"``, + non-positive numeric values, and non-integer floats (e.g. ``1.5``) + are not valid token counts. Returning ``0`` preserves parsing of + the rest of the assistant message payload while preventing these + values from being lax-coerced into token counts. This aligns with ``_extract_output_tokens`` in the parser fast path: - both paths agree that only positive numeric values contribute tokens. + both paths agree that only positive whole-number values contribute + tokens. """ if isinstance(v, (bool, str)): return 0 - if isinstance(v, (int, float)) and v <= 0: + if isinstance(v, float): + if not v.is_integer() or v <= 0: + return 0 + return int(v) + if isinstance(v, int) and v <= 0: return 0 return v diff --git a/src/copilot_usage/parser.py b/src/copilot_usage/parser.py index 2d32e6c..c459016 100644 --- a/src/copilot_usage/parser.py +++ b/src/copilot_usage/parser.py @@ -276,13 +276,11 @@ def _extract_output_tokens(ev: SessionEvent) -> int | None: """Extract ``outputTokens`` from an ``assistant.message`` event via direct dict access. Mirrors the domain intent of :class:`AssistantMessageData`'s - ``_sanitize_non_numeric_tokens`` field-validator: only positive numeric - values contribute tokens. When ``AssistantMessageData.model_validate(...)`` - succeeds, both paths agree on whether a value contributes tokens; the - representation differs for non-contributing values — this function returns - ``None``, whereas the Pydantic model stores ``0``. Inputs rejected by - model validation should likewise be treated as non-contributing when - comparing behaviors. + ``_sanitize_non_numeric_tokens`` field-validator: only positive + whole-number values contribute tokens. Both paths agree on whether a + value contributes tokens; the representation differs for + non-contributing values — this function returns ``None``, whereas the + Pydantic model stores ``0``. Specifically: diff --git a/tests/copilot_usage/test_models.py b/tests/copilot_usage/test_models.py index 761410c..3e48ebe 100644 --- a/tests/copilot_usage/test_models.py +++ b/tests/copilot_usage/test_models.py @@ -231,6 +231,21 @@ def test_large_negative_maps_to_zero(self) -> None: d = AssistantMessageData.model_validate({"outputTokens": -100_000}) assert d.outputTokens == 0 + def test_non_whole_float_maps_to_zero(self) -> None: + """Non-integer float like ``1.5`` maps to ``0``, matching ``_extract_output_tokens``.""" + d = AssistantMessageData.model_validate({"outputTokens": 1.5}) + assert d.outputTokens == 0 + + def test_non_whole_float_large_maps_to_zero(self) -> None: + """Large non-integer float like ``2.3`` maps to ``0``.""" + d = AssistantMessageData.model_validate({"outputTokens": 2.3}) + assert d.outputTokens == 0 + + def test_whole_positive_float_coerced_to_int(self) -> None: + """Whole-number float like ``100.0`` is coerced to ``100``.""" + d = AssistantMessageData.model_validate({"outputTokens": 100.0}) + assert d.outputTokens == 100 + def test_session_shutdown_data() -> None: d = SessionShutdownData.model_validate(RAW_SHUTDOWN["data"]) diff --git a/tests/copilot_usage/test_parser.py b/tests/copilot_usage/test_parser.py index 6f150d7..56f2deb 100644 --- a/tests/copilot_usage/test_parser.py +++ b/tests/copilot_usage/test_parser.py @@ -4838,15 +4838,9 @@ def test_positive_contribution_agreement( _ = label fast_path_result = _extract_output_tokens(_make_assistant_event(raw_value)) - # Pydantic rejects non-whole floats with a ValidationError; the model - # path treats those as non-contributing, same as the fast path. - try: - model = AssistantMessageData.model_validate({"outputTokens": raw_value}) - model_contributes = model.outputTokens > 0 - model_result = repr(model.outputTokens) - except ValidationError as exc: - model_contributes = False - model_result = f"ValidationError({exc})" + model = AssistantMessageData.model_validate({"outputTokens": raw_value}) + model_contributes = model.outputTokens > 0 + model_result = repr(model.outputTokens) fast_path_contributes = fast_path_result is not None assert fast_path_contributes == model_contributes, ( @@ -5041,6 +5035,45 @@ def test_mixed_valid_float_null_tokens(self, tmp_path: Path) -> None: summary = build_session_summary(events) assert summary.active_output_tokens == 150 + def test_fractional_float_consistent_summary_and_detail( + self, tmp_path: Path + ) -> None: + """outputTokens=1.5 yields 0 from both session summary and detail view. + + Regression test: before the fix, ``_extract_output_tokens`` returned + ``None`` (→ 0 in the summary) while + ``AssistantMessageData.outputTokens`` truncated to ``1`` via Pydantic + lax coercion. Both paths must now agree. + """ + frac_msg = json.dumps( + { + "type": "assistant.message", + "data": { + "messageId": "m-frac", + "content": "fractional", + "toolRequests": [], + "interactionId": "int-frac", + "outputTokens": 1.5, + }, + "id": "ev-frac", + "timestamp": "2026-03-07T10:01:00.000Z", + "parentId": "ev-user1", + } + ) + p = tmp_path / "s" / "events.jsonl" + _write_events(p, _START_EVENT, _USER_MSG, frac_msg, _TOOL_EXEC) + events = parse_events(p) + summary = build_session_summary(events) + + # Summary path (via _extract_output_tokens / _first_pass) + assert summary.active_output_tokens == 0 + + # Detail path (via AssistantMessageData.outputTokens) + asst_events = [e for e in events if e.type == EventType.ASSISTANT_MESSAGE] + assert len(asst_events) == 1 + detail_tokens = asst_events[0].as_assistant_message().outputTokens + assert detail_tokens == 0 + # --------------------------------------------------------------------------- # Three shutdown cycles with mixed models