From b4da39d69f094b0be186c3bb4f575b563fb5b60f Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 12:09:59 -0700 Subject: [PATCH 1/9] add --- eval_protocol/models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 2804db59..616a9380 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -256,7 +256,6 @@ def __iter__(self): class Message(BaseModel): """Chat message model with trajectory evaluation support.""" - role: str # assistant, user, system, tool content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] = Field( default="", description="The content of the message." @@ -269,7 +268,12 @@ class Message(BaseModel): tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None function_call: Optional[FunctionCall] = None control_plane_step: Optional[Dict[str, Any]] = None + weight: Optional[int] = None + def dump_mdoel_for_chat_completion_request(self): + """Only keep chat completion accepted fields""" + return self.model_dump(exclude_none=True, exclude={"control_plane_step", "weight", "reasoning_content"}) + @classmethod def model_validate(cls, obj, *args, **kwargs): if isinstance(obj, dict): From e99a258c8a23a1fe93174ab4cd23237356637b09 Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 12:15:12 -0700 Subject: [PATCH 2/9] format --- eval_protocol/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 616a9380..6e809f7a 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -256,6 +256,7 @@ def __iter__(self): class Message(BaseModel): """Chat message model with trajectory evaluation support.""" + role: str # assistant, user, system, tool content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] = Field( default="", description="The content of the message." @@ -273,7 +274,7 @@ class Message(BaseModel): def dump_mdoel_for_chat_completion_request(self): """Only keep chat completion accepted fields""" return self.model_dump(exclude_none=True, exclude={"control_plane_step", "weight", "reasoning_content"}) - + @classmethod def model_validate(cls, obj, *args, **kwargs): if isinstance(obj, dict): From 1129e532c3798639867c7a20403483ff981eafe1 Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 12:19:31 -0700 Subject: [PATCH 3/9] add ut --- tests/test_models.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_models.py b/tests/test_models.py index 9e0f09f9..7e7a5dca 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -694,3 +694,28 @@ def test_evaluation_row_extra_fields(): assert "eval" in dictionary assert "accuracy" in dictionary["eval_details"]["metrics"] assert "test" in dictionary["extra_fields"] + +def test_message_with_weight_dump(): + example = { + "role": "user", + "content": "Hello, how are you?", + "weight": 0, + } + + message = Message(**example) + dictionary = message.model_dump() + assert "weight" in dictionary + assert dictionary["weight"] == 0 + +def test_message_dump_for_chat_completion_request(): + example = { + "role": "user", + "content": "Hello, how are you?", + "weight": 0, + "reasoning_content": "I am thinking about the user's question", + } + message = Message(**example) + dictionary = message.dump_mdoel_for_chat_completion_request() + assert "weight" not in dictionary + assert "reasoning_content" not in dictionary + assert dictionary["content"] == "Hello, how are you?" \ No newline at end of file From 90063c71d214c0aa324c43e6697b9563f79c9597 Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 12:24:16 -0700 Subject: [PATCH 4/9] add --- tests/test_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_models.py b/tests/test_models.py index 7e7a5dca..723685b8 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -695,6 +695,7 @@ def test_evaluation_row_extra_fields(): assert "accuracy" in dictionary["eval_details"]["metrics"] assert "test" in dictionary["extra_fields"] + def test_message_with_weight_dump(): example = { "role": "user", @@ -707,6 +708,7 @@ def test_message_with_weight_dump(): assert "weight" in dictionary assert dictionary["weight"] == 0 + def test_message_dump_for_chat_completion_request(): example = { "role": "user", @@ -718,4 +720,4 @@ def test_message_dump_for_chat_completion_request(): dictionary = message.dump_mdoel_for_chat_completion_request() assert "weight" not in dictionary assert "reasoning_content" not in dictionary - assert dictionary["content"] == "Hello, how are you?" \ No newline at end of file + assert dictionary["content"] == "Hello, how are you?" From 6e505c1a5b804934354664c24875291fa817782d Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 13:21:05 -0700 Subject: [PATCH 5/9] add --- eval_protocol/models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 6e809f7a..0673dfda 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -1,6 +1,7 @@ import os from datetime import datetime from enum import Enum +from re import A from typing import Any, ClassVar, Dict, List, Literal, Optional, TypedDict, Union JSONType = Union[Dict[str, Any], List[Any], str, int, float, bool, None] @@ -256,7 +257,8 @@ def __iter__(self): class Message(BaseModel): """Chat message model with trajectory evaluation support.""" - + + model_config = ConfigDict(extra="allow") role: str # assistant, user, system, tool content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] = Field( default="", description="The content of the message." @@ -269,11 +271,10 @@ class Message(BaseModel): tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None function_call: Optional[FunctionCall] = None control_plane_step: Optional[Dict[str, Any]] = None - weight: Optional[int] = None - def dump_mdoel_for_chat_completion_request(self): """Only keep chat completion accepted fields""" - return self.model_dump(exclude_none=True, exclude={"control_plane_step", "weight", "reasoning_content"}) + exclude_fields = {"control_plane_step", "reasoning_content"} | set(self.model_extra.keys()) if self.model_extra else set() + return self.model_dump(exclude_none=True, exclude=exclude_fields) @classmethod def model_validate(cls, obj, *args, **kwargs): From f1289c0feed16308486f2413dd45819fb83d75c2 Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 13:22:49 -0700 Subject: [PATCH 6/9] format --- eval_protocol/models.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 0673dfda..fb600333 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -257,7 +257,7 @@ def __iter__(self): class Message(BaseModel): """Chat message model with trajectory evaluation support.""" - + model_config = ConfigDict(extra="allow") role: str # assistant, user, system, tool content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] = Field( @@ -271,9 +271,12 @@ class Message(BaseModel): tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None function_call: Optional[FunctionCall] = None control_plane_step: Optional[Dict[str, Any]] = None + def dump_mdoel_for_chat_completion_request(self): """Only keep chat completion accepted fields""" - exclude_fields = {"control_plane_step", "reasoning_content"} | set(self.model_extra.keys()) if self.model_extra else set() + exclude_fields = ( + {"control_plane_step", "reasoning_content"} | set(self.model_extra.keys()) if self.model_extra else set() + ) return self.model_dump(exclude_none=True, exclude=exclude_fields) @classmethod From 806fb0f1f20f06bbfb49f3e1691e1f13275efd58 Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 14:38:57 -0700 Subject: [PATCH 7/9] fix --- eval_protocol/models.py | 7 ++----- tests/adapters/test_openai_responses_adapter.py | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index fb600333..a0068d99 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -258,7 +258,6 @@ def __iter__(self): class Message(BaseModel): """Chat message model with trajectory evaluation support.""" - model_config = ConfigDict(extra="allow") role: str # assistant, user, system, tool content: Optional[Union[str, List[ChatCompletionContentPartTextParam]]] = Field( default="", description="The content of the message." @@ -271,13 +270,11 @@ class Message(BaseModel): tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None function_call: Optional[FunctionCall] = None control_plane_step: Optional[Dict[str, Any]] = None + weight: Optional[int] = None def dump_mdoel_for_chat_completion_request(self): """Only keep chat completion accepted fields""" - exclude_fields = ( - {"control_plane_step", "reasoning_content"} | set(self.model_extra.keys()) if self.model_extra else set() - ) - return self.model_dump(exclude_none=True, exclude=exclude_fields) + return self.model_dump(exclude_none=True, exclude={"control_plane_step", "reasoning_content", "weight"}) @classmethod def model_validate(cls, obj, *args, **kwargs): diff --git a/tests/adapters/test_openai_responses_adapter.py b/tests/adapters/test_openai_responses_adapter.py index bf9edd98..add73340 100644 --- a/tests/adapters/test_openai_responses_adapter.py +++ b/tests/adapters/test_openai_responses_adapter.py @@ -22,7 +22,7 @@ def test_openai_responses_adapter_with_real_response_simple(snapshot: SnapshotAs assert len(eval_rows) == 1 # Convert to dict for snapshot testing - eval_rows_dict = [row.model_dump(exclude={"created_at", "execution_metadata"}) for row in eval_rows] + eval_rows_dict = [row.model_dump(exclude={"created_at": True, "execution_metadata": True, "messages": {"__all__": {"weight"}}}) for row in eval_rows] # Assert against snapshot assert eval_rows_dict == snapshot @@ -42,7 +42,7 @@ def test_openai_responses_adapter_with_real_response_parallel_tool_calls(snapsho assert len(eval_rows) == 1 # Convert to dict for snapshot testing - eval_rows_dict = [row.model_dump(exclude={"created_at", "execution_metadata"}) for row in eval_rows] + eval_rows_dict = [row.model_dump(exclude={"created_at": True, "execution_metadata": True, "messages": {"__all__": {"weight"}}}) for row in eval_rows] # Assert against snapshot assert eval_rows_dict == snapshot From c9a7ac7507c778381efdecbdce60e71c8e7ab31a Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 16:04:18 -0700 Subject: [PATCH 8/9] ad --- eval_protocol/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index a0068d99..e2bf355f 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -1,7 +1,6 @@ import os from datetime import datetime from enum import Enum -from re import A from typing import Any, ClassVar, Dict, List, Literal, Optional, TypedDict, Union JSONType = Union[Dict[str, Any], List[Any], str, int, float, bool, None] From 086fc6a77a0f7c0bee3d980de0e48ea1ad0cdf75 Mon Sep 17 00:00:00 2001 From: Yinghan Ma Date: Mon, 22 Sep 2025 16:04:53 -0700 Subject: [PATCH 9/9] format --- tests/adapters/test_openai_responses_adapter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/adapters/test_openai_responses_adapter.py b/tests/adapters/test_openai_responses_adapter.py index add73340..6091e52f 100644 --- a/tests/adapters/test_openai_responses_adapter.py +++ b/tests/adapters/test_openai_responses_adapter.py @@ -22,7 +22,10 @@ def test_openai_responses_adapter_with_real_response_simple(snapshot: SnapshotAs assert len(eval_rows) == 1 # Convert to dict for snapshot testing - eval_rows_dict = [row.model_dump(exclude={"created_at": True, "execution_metadata": True, "messages": {"__all__": {"weight"}}}) for row in eval_rows] + eval_rows_dict = [ + row.model_dump(exclude={"created_at": True, "execution_metadata": True, "messages": {"__all__": {"weight"}}}) + for row in eval_rows + ] # Assert against snapshot assert eval_rows_dict == snapshot @@ -42,7 +45,10 @@ def test_openai_responses_adapter_with_real_response_parallel_tool_calls(snapsho assert len(eval_rows) == 1 # Convert to dict for snapshot testing - eval_rows_dict = [row.model_dump(exclude={"created_at": True, "execution_metadata": True, "messages": {"__all__": {"weight"}}}) for row in eval_rows] + eval_rows_dict = [ + row.model_dump(exclude={"created_at": True, "execution_metadata": True, "messages": {"__all__": {"weight"}}}) + for row in eval_rows + ] # Assert against snapshot assert eval_rows_dict == snapshot