Skip to content

Commit d951083

Browse files
author
Dylan Huang
authored
execute runs in parallel when possible (#117)
1 parent 395de24 commit d951083

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
Status,
3131
)
3232
from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter
33+
from eval_protocol.pytest.default_mcp_gym_rollout_processor import MCPGymRolloutProcessor
3334
from eval_protocol.pytest.default_no_op_rollout_processor import NoOpRolloutProcessor
3435
from eval_protocol.pytest.rollout_processor import RolloutProcessor
3536
from eval_protocol.pytest.types import (
@@ -585,7 +586,9 @@ def _log_eval_error(status: Status, rows: Optional[List[EvaluationRow]] | None,
585586
exception_handler_config=exception_handler_config,
586587
)
587588

588-
for i in range(num_runs):
589+
async def execute_run(i: int, config: RolloutProcessorConfig):
590+
nonlocal all_results
591+
589592
# Regenerate outputs each run by deep-copying the pristine dataset
590593
# so model responses are not reused across runs.
591594
run_id = generate_id()
@@ -728,6 +731,18 @@ async def _collect_result(config, lst):
728731
r.eval_metadata.status = Status.eval_finished()
729732
active_logger.log(r)
730733

734+
tasks = []
735+
for i in range(num_runs):
736+
tasks.append(asyncio.create_task(execute_run(i, config)))
737+
738+
# if rollout_processor is McpGymRolloutProcessor, we execute runs sequentially since McpGym does not support concurrent runs
739+
# else, we execute runs in parallel
740+
if isinstance(rollout_processor, MCPGymRolloutProcessor):
741+
for task in tasks:
742+
await task
743+
else:
744+
await asyncio.gather(*tasks)
745+
731746
# for groupwise mode, the result contains eval otuput from multiple completion_params, we need to differentiate them
732747
# rollout_id is used to differentiate the result from different completion_params
733748
if mode == "groupwise":

0 commit comments

Comments
 (0)