Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions src/copilot_usage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,20 +219,25 @@ class AssistantMessageData(BaseModel):
@field_validator("outputTokens", mode="before")
@classmethod
def _sanitize_non_numeric_tokens(cls, v: object) -> object:
"""Map non-positive and non-numeric token counts to ``0``.
"""Map non-positive, non-numeric, and non-whole-float token counts to ``0``.

JSON ``true``/``false``, numeric strings like ``"100"``, and
non-positive numeric values (int/float) are not valid token counts.
Returning ``0`` preserves parsing of the rest of the assistant
message payload while preventing these values from being lax-coerced
into token counts.
JSON ``true``/``false``, numeric strings like ``"100"``,
non-positive numeric values, and non-integer floats (e.g. ``1.5``)
are not valid token counts. Returning ``0`` preserves parsing of
the rest of the assistant message payload while preventing these
values from being lax-coerced into token counts.

This aligns with ``_extract_output_tokens`` in the parser fast path:
both paths agree that only positive numeric values contribute tokens.
both paths agree that only positive whole-number values contribute
tokens.
"""
if isinstance(v, (bool, str)):
return 0
if isinstance(v, (int, float)) and v <= 0:
if isinstance(v, float):
if not v.is_integer() or v <= 0:
return 0
return int(v)
if isinstance(v, int) and v <= 0:
return 0
return v

Expand Down
12 changes: 5 additions & 7 deletions src/copilot_usage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,11 @@ def _extract_output_tokens(ev: SessionEvent) -> int | None:
"""Extract ``outputTokens`` from an ``assistant.message`` event via direct dict access.

Mirrors the domain intent of :class:`AssistantMessageData`'s
``_sanitize_non_numeric_tokens`` field-validator: only positive numeric
values contribute tokens. When ``AssistantMessageData.model_validate(...)``
succeeds, both paths agree on whether a value contributes tokens; the
representation differs for non-contributing values — this function returns
``None``, whereas the Pydantic model stores ``0``. Inputs rejected by
model validation should likewise be treated as non-contributing when
comparing behaviors.
``_sanitize_non_numeric_tokens`` field-validator: only positive
whole-number values contribute tokens. Both paths agree on whether a
value contributes tokens; the representation differs for
non-contributing values — this function returns ``None``, whereas the
Pydantic model stores ``0``.

Specifically:

Expand Down
15 changes: 15 additions & 0 deletions tests/copilot_usage/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,21 @@ def test_large_negative_maps_to_zero(self) -> None:
d = AssistantMessageData.model_validate({"outputTokens": -100_000})
assert d.outputTokens == 0

def test_non_whole_float_maps_to_zero(self) -> None:
"""Non-integer float like ``1.5`` maps to ``0``, matching ``_extract_output_tokens``."""
d = AssistantMessageData.model_validate({"outputTokens": 1.5})
assert d.outputTokens == 0

def test_non_whole_float_large_maps_to_zero(self) -> None:
"""Large non-integer float like ``2.3`` maps to ``0``."""
d = AssistantMessageData.model_validate({"outputTokens": 2.3})
assert d.outputTokens == 0

def test_whole_positive_float_coerced_to_int(self) -> None:
"""Whole-number float like ``100.0`` is coerced to ``100``."""
d = AssistantMessageData.model_validate({"outputTokens": 100.0})
assert d.outputTokens == 100


def test_session_shutdown_data() -> None:
d = SessionShutdownData.model_validate(RAW_SHUTDOWN["data"])
Expand Down
51 changes: 42 additions & 9 deletions tests/copilot_usage/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4838,15 +4838,9 @@ def test_positive_contribution_agreement(
_ = label
fast_path_result = _extract_output_tokens(_make_assistant_event(raw_value))

# Pydantic rejects non-whole floats with a ValidationError; the model
# path treats those as non-contributing, same as the fast path.
try:
model = AssistantMessageData.model_validate({"outputTokens": raw_value})
model_contributes = model.outputTokens > 0
model_result = repr(model.outputTokens)
except ValidationError as exc:
model_contributes = False
model_result = f"ValidationError({exc})"
model = AssistantMessageData.model_validate({"outputTokens": raw_value})
model_contributes = model.outputTokens > 0
model_result = repr(model.outputTokens)

fast_path_contributes = fast_path_result is not None
assert fast_path_contributes == model_contributes, (
Expand Down Expand Up @@ -5041,6 +5035,45 @@ def test_mixed_valid_float_null_tokens(self, tmp_path: Path) -> None:
summary = build_session_summary(events)
assert summary.active_output_tokens == 150

def test_fractional_float_consistent_summary_and_detail(
self, tmp_path: Path
) -> None:
"""outputTokens=1.5 yields 0 from both session summary and detail view.

Regression test: before the fix, ``_extract_output_tokens`` returned
``None`` (→ 0 in the summary) while
``AssistantMessageData.outputTokens`` truncated to ``1`` via Pydantic
lax coercion. Both paths must now agree.
"""
frac_msg = json.dumps(
{
"type": "assistant.message",
"data": {
"messageId": "m-frac",
"content": "fractional",
"toolRequests": [],
"interactionId": "int-frac",
"outputTokens": 1.5,
},
"id": "ev-frac",
"timestamp": "2026-03-07T10:01:00.000Z",
"parentId": "ev-user1",
}
)
p = tmp_path / "s" / "events.jsonl"
_write_events(p, _START_EVENT, _USER_MSG, frac_msg, _TOOL_EXEC)
events = parse_events(p)
summary = build_session_summary(events)

# Summary path (via _extract_output_tokens / _first_pass)
assert summary.active_output_tokens == 0

# Detail path (via AssistantMessageData.outputTokens)
asst_events = [e for e in events if e.type == EventType.ASSISTANT_MESSAGE]
assert len(asst_events) == 1
detail_tokens = asst_events[0].as_assistant_message().outputTokens
assert detail_tokens == 0


# ---------------------------------------------------------------------------
# Three shutdown cycles with mixed models
Expand Down
Loading