|
12 | 12 | from openai.types.chat.chat_completion_message_tool_call import ( |
13 | 13 | ChatCompletionMessageToolCall, |
14 | 14 | ) |
15 | | -from pydantic import BaseModel, ConfigDict, Field |
| 15 | +from pydantic import BaseModel, ConfigDict, Field, field_validator |
16 | 16 |
|
17 | 17 | from eval_protocol.get_pep440_version import get_pep440_version |
18 | 18 | from eval_protocol.human_id import generate_id |
@@ -595,7 +595,7 @@ class EvaluationRow(BaseModel): |
595 | 595 | supporting both row-wise batch evaluation and trajectory-based RL evaluation. |
596 | 596 | """ |
597 | 597 |
|
598 | | - model_config = ConfigDict(extra="allow") |
| 598 | + model_config = ConfigDict(extra="allow", validate_assignment=True) |
599 | 599 |
|
600 | 600 | # Core OpenAI ChatCompletion compatible conversation data |
601 | 601 | messages: List[Message] = Field(description="List of messages in the conversation. Also known as a trajectory.") |
@@ -626,6 +626,17 @@ class EvaluationRow(BaseModel): |
626 | 626 | default=None, description="The evaluation result for this row/trajectory." |
627 | 627 | ) |
628 | 628 |
|
| 629 | + @field_validator("evaluation_result", mode="before") |
| 630 | + @classmethod |
| 631 | + def _coerce_evaluation_result( |
| 632 | + cls, value: EvaluateResult | dict[str, Any] | None |
| 633 | + ) -> EvaluateResult | None: |
| 634 | + if value is None or isinstance(value, EvaluateResult): |
| 635 | + return value |
| 636 | + if isinstance(value, dict): |
| 637 | + return EvaluateResult(**value) |
| 638 | + return value |
| 639 | + |
629 | 640 | execution_metadata: ExecutionMetadata = Field( |
630 | 641 | default_factory=lambda: ExecutionMetadata(run_id=None), |
631 | 642 | description="Metadata about the execution of the evaluation.", |
|
0 commit comments