From f29110db8734f6db2eda3262a39c312605107320 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 5 Nov 2025 01:28:19 -0800 Subject: [PATCH 1/3] export ep params --- eval_protocol/pytest/evaluation_test.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 857765d3..980cd58d 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -704,11 +704,22 @@ async def _collect_result(config, lst): ) pytest_wrapper = pytest.mark.asyncio(pytest_wrapper) + ep_params: dict[str, Any] = { + "rollout_processor": rollout_processor, + "server_script_path": server_script_path, + "mcp_config_path": mcp_config_path, + "rollout_processor_kwargs": rollout_processor_kwargs, + "mode": mode, + } + + print(f"ep_params: {ep_params}") + # Create the dual mode wrapper dual_mode_wrapper = create_dual_mode_wrapper( test_func, mode, max_concurrent_rollouts, max_concurrent_evaluations, pytest_wrapper ) + setattr(dual_mode_wrapper, "__ep_params__", ep_params) return dual_mode_wrapper # pyright: ignore[reportReturnType, reportUnknownVariableType] return decorator From 6b9f1331a8ab25d0f057d3426a77849cb1b0a2f0 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 5 Nov 2025 01:57:48 -0800 Subject: [PATCH 2/3] fix server path --- eval_protocol/benchmarks/test_frozen_lake.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval_protocol/benchmarks/test_frozen_lake.py b/eval_protocol/benchmarks/test_frozen_lake.py index c3b1684f..ac5c998a 100644 --- a/eval_protocol/benchmarks/test_frozen_lake.py +++ b/eval_protocol/benchmarks/test_frozen_lake.py @@ -46,7 +46,7 @@ def frozen_lake_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evaluation num_runs=1, max_concurrent_rollouts=3, mode="pointwise", - server_script_path="examples/frozen_lake_mcp/server.py", + server_script_path="eval_protocol/mcp_servers/frozen_lake/server.py", ) def test_frozen_lake_evaluation(row: EvaluationRow) -> EvaluationRow: """ From bf126c18b0695d11b5a0c7b89bea9336886b67eb Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 5 Nov 2025 02:45:58 -0800 Subject: [PATCH 3/3] remove print statement --- eval_protocol/pytest/evaluation_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index 980cd58d..0293cbfc 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -712,8 +712,6 @@ async def _collect_result(config, lst): "mode": mode, } - print(f"ep_params: {ep_params}") - # Create the dual mode wrapper dual_mode_wrapper = create_dual_mode_wrapper( test_func, mode, max_concurrent_rollouts, max_concurrent_evaluations, pytest_wrapper