eval-protocol · xzrderek · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
diff --git a/eval_protocol/benchmarks/test_frozen_lake.py b/eval_protocol/benchmarks/test_frozen_lake.py
@@ -46,7 +46,7 @@ def frozen_lake_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evaluation
     num_runs=1,
     max_concurrent_rollouts=3,
     mode="pointwise",
-    server_script_path="examples/frozen_lake_mcp/server.py",
+    server_script_path="eval_protocol/mcp_servers/frozen_lake/server.py",
 )
 def test_frozen_lake_evaluation(row: EvaluationRow) -> EvaluationRow:
     """

diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
@@ -704,11 +704,20 @@ async def _collect_result(config, lst):
         )
         pytest_wrapper = pytest.mark.asyncio(pytest_wrapper)
 
+        ep_params: dict[str, Any] = {
+            "rollout_processor": rollout_processor,
+            "server_script_path": server_script_path,
+            "mcp_config_path": mcp_config_path,
+            "rollout_processor_kwargs": rollout_processor_kwargs,
+            "mode": mode,
+        }
+
         # Create the dual mode wrapper
         dual_mode_wrapper = create_dual_mode_wrapper(
             test_func, mode, max_concurrent_rollouts, max_concurrent_evaluations, pytest_wrapper
         )
 
+        setattr(dual_mode_wrapper, "__ep_params__", ep_params)
         return dual_mode_wrapper  # pyright: ignore[reportReturnType, reportUnknownVariableType]
 
     return decorator