Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/google/adk/models/lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2413,6 +2413,16 @@ async def generate_content_async(
usage_metadata = None
grounding_metadata = None
fallback_index = 0
# Track the latest finish_reason + model so we can surface an
# explicit error event when the stream completes with neither text
# nor tool calls (silent-empty pattern). Without this guard the
# aggregator simply yields nothing and the downstream Runner sees
# zero events — the user-visible "empty turn that succeeded" bug
# tracked across #5394 / #5006 / #3618 / user reports against
# anthropic, gemini, and other providers under content_filter,
# safety, model_not_found, and 0-token-completion conditions.
last_finish_reason: str | None = None
last_model_version: str | None = None

def _finalize_tool_call_response(
*, model_version: str, finish_reason: str
Expand Down Expand Up @@ -2502,7 +2512,11 @@ def _reset_stream_buffers() -> None:
part_grounding = _extract_grounding_metadata(part)
if part_grounding:
grounding_metadata = part_grounding
if getattr(part, "model", None):
last_model_version = part.model
for chunk, finish_reason in _model_response_to_chunk(part):
if finish_reason:
last_finish_reason = finish_reason
if isinstance(chunk, FunctionChunk):
index = chunk.index or fallback_index
if index not in function_calls:
Expand Down Expand Up @@ -2616,6 +2630,41 @@ def _reset_stream_buffers() -> None:
)
yield aggregated_llm_response_with_tool_call

# If we observed a finish_reason but produced no aggregated response
# (no text, no tool calls), surface it as an explicit error event so
# downstream consumers see actionable signal instead of a silent
# zero-yield stream. This is the "empty completion" pattern:
# provider returns 200 OK with a finish_reason (content_filter,
# safety, length-without-content, model_not_found, 0-token
# response, ...) but no text or tool deltas. Tracked across:
# #5394 — AnthropicLlm never populates finish_reason
# #5006 — retry with resume message when model returns empty
# #5636 — surface error when model returns STOP with empty content
# #3618 — Handle empty message in LiteLLM response
# Multiple stalled PRs (#5512, #5636, #3699) attempted variants of
# this fix. This guard keeps the change minimal: ONLY fires when
# the entire stream produced nothing meaningful AND the provider
# told us why via finish_reason. Mapped error_code lets the
# downstream agent loop react (retry, surface, escalate) without
# losing the provider's actual signal.
if (
last_finish_reason
and aggregated_llm_response is None
and aggregated_llm_response_with_tool_call is None
):
mapped_empty_finish = _map_finish_reason(last_finish_reason)
empty_response = LlmResponse(
error_code=mapped_empty_finish,
error_message=_finish_reason_to_error_message(mapped_empty_finish),
finish_reason=mapped_empty_finish,
model_version=last_model_version,
)
if usage_metadata:
empty_response.usage_metadata = usage_metadata
if grounding_metadata:
empty_response.grounding_metadata = grounding_metadata
yield empty_response

else:
response = await self.llm_client.acompletion(**completion_args)
yield _model_response_to_generate_content_response(response)
Expand Down
144 changes: 144 additions & 0 deletions tests/unittests/models/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5275,3 +5275,147 @@ def test_redact_file_uri_for_log_http_url_keeps_scheme_and_tail():
_redact_file_uri_for_log("https://example.com/path/file.pdf")
== "https://<redacted>/file.pdf"
)


# ---------------------------------------------------------------------------
# Empty-completion guard (fixes #5394 / #5006 / #5636 / #3618 family)
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_streaming_empty_completion_with_content_filter_surfaces_error(
mock_completion, lite_llm_instance
):
"""A stream that ends with finish_reason but no text/tool deltas must
surface as an explicit error event, not a silent zero-yield stream.

Reproduces the "empty completion" pattern reported across
#5394 / #5006 / #5636 / #3618: provider returns 200 OK with a
finish_reason (content_filter / safety / model_not_found / etc.)
but no content. Pre-fix the aggregator simply yielded nothing and
the downstream Runner saw zero events — silent successful empty
turn from the user's perspective.
"""
stream_chunks = [
ModelResponseStream(
model="anthropic/claude-opus-4-8",
choices=[
StreamingChoices(finish_reason="content_filter", delta=Delta())
],
),
]
mock_completion.return_value = iter(stream_chunks)

responses = [
response
async for response in lite_llm_instance.generate_content_async(
LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
)
]

assert len(responses) == 1, (
"empty completion with finish_reason must surface exactly one error "
f"LlmResponse, got {len(responses)}: {responses}"
)
empty_response = responses[0]
assert empty_response.error_code == types.FinishReason.SAFETY
assert empty_response.finish_reason == types.FinishReason.SAFETY
assert empty_response.error_message
# The model name must propagate so downstream consumers know what failed.
assert empty_response.model_version == "anthropic/claude-opus-4-8"


@pytest.mark.asyncio
async def test_streaming_empty_completion_with_stop_finish_reason_still_surfaces(
mock_completion, lite_llm_instance
):
"""A stream that ends with finish_reason='stop' but no content also
surfaces — the model said "I'm done" but produced nothing. This is the
most common shape of the silent-empty bug (#5636 specifically called
out gemini-2.5-flash-lite returning STOP with empty content after a
tool call). Pre-fix, the (text or reasoning_parts) guard skipped
finalization for this case and the aggregator yielded nothing.
"""
stream_chunks = [
ModelResponseStream(
model="google/gemini-3.5-flash",
choices=[StreamingChoices(finish_reason="stop", delta=Delta())],
),
]
mock_completion.return_value = iter(stream_chunks)

responses = [
response
async for response in lite_llm_instance.generate_content_async(
LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
)
]

assert len(responses) == 1, (
"STOP-with-empty stream must surface exactly one response, got "
f"{len(responses)}: {responses}"
)
empty_response = responses[0]
# The finish_reason is preserved so callers see the model's signal
# ("I stopped cleanly") + the empty body is also visible (no text
# content). error_message names the failure mode so the operator
# does not have to dig.
assert empty_response.finish_reason == types.FinishReason.STOP
assert empty_response.error_message
assert empty_response.model_version == "google/gemini-3.5-flash"


@pytest.mark.asyncio
async def test_streaming_text_response_does_not_trigger_empty_guard(
mock_completion, lite_llm_instance
):
"""Regression guard: a normal text completion must NOT trigger the
empty-guard — the existing aggregated_llm_response handles it and
exactly one response is yielded with the actual text."""
stream_chunks = [
ModelResponseStream(
choices=[
StreamingChoices(
finish_reason=None,
delta=Delta(role="assistant", content="hello"),
)
]
),
ModelResponseStream(
choices=[StreamingChoices(finish_reason="stop", delta=Delta())]
),
]
mock_completion.return_value = iter(stream_chunks)

responses = [
response
async for response in lite_llm_instance.generate_content_async(
LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
)
]

# The text deltas yield partial responses + one final aggregated
# response; the empty-guard MUST NOT fire because text was produced.
assert len(responses) >= 2
final = responses[-1]
assert final.error_code is None
assert final.finish_reason == types.FinishReason.STOP


@pytest.mark.asyncio
async def test_streaming_no_chunks_and_no_finish_reason_is_byte_identical(
mock_completion, lite_llm_instance
):
"""The empty-guard ONLY fires when there's a finish_reason. A stream
that yields literally nothing (test doubles, etc.) must stay
byte-identical to pre-fix — zero responses, no synthesized error."""
mock_completion.return_value = iter([])

responses = [
response
async for response in lite_llm_instance.generate_content_async(
LLM_REQUEST_WITH_FUNCTION_DECLARATION, stream=True
)
]

assert responses == []