diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 7d13696c96..c5d7634436 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -811,6 +811,7 @@ async def _content_to_message_param( follow_up = await _content_to_message_param( types.Content(role=content.role, parts=non_tool_parts), provider=provider, + model=model, ) follow_up_messages = ( follow_up if isinstance(follow_up, list) else [follow_up] @@ -1081,24 +1082,17 @@ async def _get_content( }) continue - # Determine MIME type: use explicit value, infer from URI, or use default. + # Determine MIME type: use explicit value, infer from URI, or fail. mime_type = part.file_data.mime_type if not mime_type: mime_type = _infer_mime_type_from_uri(part.file_data.file_uri) if not mime_type and part.file_data.display_name: guessed_mime_type, _ = mimetypes.guess_type(part.file_data.display_name) mime_type = guessed_mime_type - if not mime_type: - # LiteLLM's Vertex AI backend requires format for GCS URIs. - mime_type = _DEFAULT_MIME_TYPE - logger.debug( - "Could not determine MIME type for file_uri %s, using default: %s", - part.file_data.file_uri, - mime_type, - ) - mime_type = _normalize_mime_type(mime_type) + if mime_type: + mime_type = _normalize_mime_type(mime_type) - if provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url( + if mime_type and provider in _FILE_ID_REQUIRED_PROVIDERS and _is_http_url( part.file_data.file_uri ): url_content_type = _media_url_content_type(mime_type) @@ -1125,6 +1119,13 @@ async def _get_content( }) continue + if not mime_type: + raise ValueError( + f"Cannot determine MIME type for file_uri" + f" '{part.file_data.file_uri}'. Set file_data.mime_type" + f" explicitly." + ) + file_object: ChatCompletionFileUrlObject = { "file_id": part.file_data.file_uri, } diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index ace08ad997..4d167db928 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1882,14 +1882,13 @@ async def test_content_to_message_param_user_message_file_uri_only( @pytest.mark.asyncio async def test_content_to_message_param_user_message_file_uri_without_mime_type(): - """Test handling of file_data without mime_type (GcsArtifactService scenario). + """Test that file_data without determinable MIME type raises ValueError. When using GcsArtifactService, artifacts may have file_uri (gs://...) but - without mime_type set. LiteLLM's Vertex AI backend requires the format - field to be present, so we infer MIME type from the URI extension or use - a default fallback to ensure compatibility. + without mime_type set and no recognizable extension. A clear error should + be raised so the developer can set file_data.mime_type explicitly. - See: https://github.com/google/adk-python/issues/3787 + See: https://github.com/google/adk-python/issues/5184 """ file_part = types.Part( file_data=types.FileData( @@ -1904,22 +1903,8 @@ async def test_content_to_message_param_user_message_file_uri_without_mime_type( ], ) - message = await _content_to_message_param(content) - assert message == { - "role": "user", - "content": [ - {"type": "text", "text": "Analyze this file."}, - { - "type": "file", - "file": { - "file_id": ( - "gs://agent-artifact-bucket/app/user/session/artifact/0" - ), - "format": "application/octet-stream", - }, - }, - ], - } + with pytest.raises(ValueError, match="Cannot determine MIME type"): + await _content_to_message_param(content) @pytest.mark.asyncio @@ -3097,27 +3082,125 @@ async def test_get_content_file_uri_infers_from_display_name(): @pytest.mark.asyncio async def test_get_content_file_uri_default_mime_type(): - """Test that file_uri without extension uses default MIME type. + """Test that file_uri without extension raises ValueError. When file_data has a file_uri without a recognizable extension and no explicit - mime_type, a default MIME type should be used to ensure compatibility with - LiteLLM backends. + mime_type, a ValueError should be raised to prevent silent misconfiguration. - See: https://github.com/google/adk-python/issues/3787 + See: https://github.com/google/adk-python/issues/5184 """ - # Use Part constructor directly to create file_data without mime_type - # (types.Part.from_uri requires a valid mime_type when it can't infer) parts = [ types.Part(file_data=types.FileData(file_uri="gs://bucket/artifact/0")) ] - content = await _get_content(parts) - assert content[0] == { - "type": "file", - "file": { - "file_id": "gs://bucket/artifact/0", - "format": "application/octet-stream", - }, - } + with pytest.raises(ValueError, match="Cannot determine MIME type"): + await _get_content(parts) + + +@pytest.mark.asyncio +async def test_get_content_file_uri_no_mime_text_fallback_still_works(): + """Text fallback for unsupported providers works without MIME type. + + When a provider requires text fallback (e.g., anthropic), file_data + without a determinable MIME type should produce a text reference + rather than raising a ValueError. + + See: https://github.com/google/adk-python/issues/5184 + """ + parts = [ + types.Part( + file_data=types.FileData( + file_uri="gs://bucket/artifact/0", + display_name="my_artifact", + ) + ) + ] + content = await _get_content(parts, provider="anthropic") + assert content == [ + {"type": "text", "text": '[File reference: "my_artifact"]'}, + ] + + +@pytest.mark.asyncio +async def test_content_to_message_param_recursive_model_propagation(): + """Recursive _content_to_message_param calls propagate model parameter. + + When a Content has mixed function_response + file parts, the recursive + call for non-tool parts must forward model= so provider-specific behavior + (e.g., Vertex AI Gemini file block support) works correctly. + + See: https://github.com/google/adk-python/issues/5184 + """ + tool_part = types.Part.from_function_response( + name="fetch_file", + response={"status": "ok"}, + ) + tool_part.function_response.id = "call_1" + + file_part = types.Part( + file_data=types.FileData( + file_uri="gs://bucket/data.csv", + mime_type="text/csv", + ) + ) + + content = types.Content( + role="user", + parts=[tool_part, file_part], + ) + + # vertex_ai + gemini model should keep the file block (not text fallback) + messages = await _content_to_message_param( + content, provider="vertex_ai", model="vertex_ai/gemini-1.5-pro" + ) + assert isinstance(messages, list) + assert len(messages) == 2 + assert messages[0]["role"] == "tool" + # The follow-up user message must contain a file block, not text fallback + user_msg = messages[1] + assert user_msg["role"] == "user" + file_content = user_msg["content"] + assert any( + item.get("type") == "file" for item in file_content + ), "file block expected when model is propagated for vertex_ai/gemini" + + +@pytest.mark.asyncio +async def test_content_to_message_param_recursive_model_propagation_fallback(): + """Without model propagation, vertex_ai non-gemini would use text fallback. + + Verify that vertex_ai with a non-gemini model correctly falls back to text. + """ + tool_part = types.Part.from_function_response( + name="fetch_file", + response={"status": "ok"}, + ) + tool_part.function_response.id = "call_1" + + file_part = types.Part( + file_data=types.FileData( + file_uri="gs://bucket/data.csv", + mime_type="text/csv", + ) + ) + + content = types.Content( + role="user", + parts=[tool_part, file_part], + ) + + # vertex_ai + non-gemini model should use text fallback + messages = await _content_to_message_param( + content, provider="vertex_ai", model="vertex_ai/claude-3-sonnet" + ) + assert isinstance(messages, list) + user_msg = messages[1] + assert user_msg["role"] == "user" + file_content = user_msg["content"] + assert any( + item.get("type") == "text" + and "File reference" in item.get("text", "") + for item in file_content + ), "text fallback expected for vertex_ai non-gemini model" @pytest.mark.asyncio