diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py index a2b3882c..47d98eb6 100644 --- a/eval_protocol/pytest/evaluation_test.py +++ b/eval_protocol/pytest/evaluation_test.py @@ -733,16 +733,18 @@ async def _collect_result(config, lst): r.eval_metadata.status = Status.eval_finished() active_logger.log(r) - tasks = [] - for i in range(num_runs): - tasks.append(asyncio.create_task(execute_run(i, config))) - # if rollout_processor is McpGymRolloutProcessor, we execute runs sequentially since McpGym does not support concurrent runs # else, we execute runs in parallel if isinstance(rollout_processor, MCPGymRolloutProcessor): - for task in tasks: + # For MCPGymRolloutProcessor, create and execute tasks one at a time to avoid port conflicts + for i in range(num_runs): + task = asyncio.create_task(execute_run(i, config)) await task else: + # For other processors, create all tasks at once and run in parallel + tasks = [] + for i in range(num_runs): + tasks.append(asyncio.create_task(execute_run(i, config))) await asyncio.gather(*tasks) # for groupwise mode, the result contains eval otuput from multiple completion_params, we need to differentiate them