diff --git a/eval_protocol/pytest/default_agent_rollout_processor.py b/eval_protocol/pytest/default_agent_rollout_processor.py index 997d229a..c2a10ba2 100644 --- a/eval_protocol/pytest/default_agent_rollout_processor.py +++ b/eval_protocol/pytest/default_agent_rollout_processor.py @@ -133,8 +133,11 @@ async def call_agent(self) -> Optional[Union[str, List[ChatCompletionContentPart async def _call_model(self, messages: list[Message], tools: Optional[List[dict[str, Any]]]) -> Message: # Convert Message models to plain dicts for LLM call + # Filter out fields that are not supported by OpenAI/LiteLLM APIs (e.g., weight, control_plane_step, reasoning_content) messages_payload: List[Dict[str, Any]] = [ - message.model_dump() if hasattr(message, "model_dump") else message # type: ignore[misc] + message.dump_mdoel_for_chat_completion_request() + if hasattr(message, "dump_mdoel_for_chat_completion_request") + else (message.model_dump() if hasattr(message, "model_dump") else message) # type: ignore[misc] for message in messages ] # Normalize tool definitions into OpenAI-compatible dicts diff --git a/eval_protocol/pytest/default_single_turn_rollout_process.py b/eval_protocol/pytest/default_single_turn_rollout_process.py index 27fe2559..ffa34804 100644 --- a/eval_protocol/pytest/default_single_turn_rollout_process.py +++ b/eval_protocol/pytest/default_single_turn_rollout_process.py @@ -48,7 +48,9 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: while messages_for_request and messages_for_request[-1].role == "assistant": messages_for_request.pop() - messages_payload = [message.model_dump() for message in messages_for_request] + # Filter out fields that are not supported by OpenAI/LiteLLM APIs (e.g., weight, control_plane_step, reasoning_content) + # Use the Message class method that excludes unsupported fields + messages_payload = [message.dump_mdoel_for_chat_completion_request() for message in messages_for_request] request_params = {"messages": messages_payload, **config.completion_params} # Ensure caching is disabled only for this request (review feedback) diff --git a/eval_protocol/pytest/tracing_utils.py b/eval_protocol/pytest/tracing_utils.py index 6ea69371..0382ba40 100644 --- a/eval_protocol/pytest/tracing_utils.py +++ b/eval_protocol/pytest/tracing_utils.py @@ -101,11 +101,14 @@ def build_init_request( completion_params_base_url: Optional[str] = completion_params_dict.get("base_url") # Strip non-OpenAI fields from messages - allowed_message_fields = {"role", "content", "tool_calls", "tool_call_id", "name"} + # Use dump_mdoel_for_chat_completion_request() to automatically exclude unsupported fields (weight, control_plane_step, reasoning_content) clean_messages = [] for m in row.messages: md: Dict[str, Any] - if hasattr(m, "model_dump"): + if hasattr(m, "dump_mdoel_for_chat_completion_request"): + # Use the Message method that automatically filters unsupported fields + md = m.dump_mdoel_for_chat_completion_request() + elif hasattr(m, "model_dump"): md = m.model_dump() elif isinstance(m, dict): md = m @@ -118,6 +121,8 @@ def build_init_request( "tool_call_id": getattr(m, "tool_call_id", None), "name": getattr(m, "name", None), } + # Additional filtering to ensure only allowed fields are kept (already handled by dump_mdoel_for_chat_completion_request for Message objects) + allowed_message_fields = {"role", "content", "tool_calls", "tool_call_id", "name"} clean_messages.append({k: v for k, v in md.items() if k in allowed_message_fields and v is not None}) # Build final model base URL with tracing metadata diff --git a/tests/test_message_field_filtering.py b/tests/test_message_field_filtering.py new file mode 100644 index 00000000..8e48aa86 --- /dev/null +++ b/tests/test_message_field_filtering.py @@ -0,0 +1,64 @@ +""" +Test to verify that message fields are properly filtered before sending to API. + +This test verifies that unsupported fields like 'weight', 'control_plane_step', +and 'reasoning_content' are excluded from messages when preparing API requests. +""" + +from eval_protocol.models import Message + + +def test_dump_model_excludes_unsupported_fields(): + """Test that dump_mdoel_for_chat_completion_request excludes unsupported fields.""" + # Create a message with all possible fields including unsupported ones + message = Message( + role="user", + content="Hello", + weight=0, + control_plane_step={"step": 1}, + reasoning_content="Some reasoning", + name="test_user", + ) + + # Get the filtered dictionary + filtered = message.dump_mdoel_for_chat_completion_request() + + # Verify unsupported fields are excluded + assert "weight" not in filtered, "weight field should be excluded" + assert "control_plane_step" not in filtered, "control_plane_step field should be excluded" + assert "reasoning_content" not in filtered, "reasoning_content field should be excluded" + + # Verify supported fields are included + assert "role" in filtered, "role field should be included" + assert "content" in filtered, "content field should be included" + assert filtered["role"] == "user" + assert filtered["content"] == "Hello" + + # Verify name is included (it's a supported field for tool calls) + assert "name" in filtered + assert filtered["name"] == "test_user" + + +def test_dump_model_with_only_supported_fields(): + """Test that supported fields are preserved.""" + message = Message( + role="assistant", + content="I can help you", + tool_calls=None, + tool_call_id=None, + ) + + filtered = message.dump_mdoel_for_chat_completion_request() + + # Should only contain supported fields + assert filtered["role"] == "assistant" + assert filtered["content"] == "I can help you" + + # Should not contain unsupported fields even if None + assert "weight" not in filtered + + +if __name__ == "__main__": + import pytest + + pytest.main([__file__, "-v"])