@@ -236,7 +236,7 @@ def make(
236236 return mcp_envs
237237
238238
239- async def rollout (
239+ def rollout (
240240 envs : GeneralMCPVectorEnv ,
241241 policy : Union [FireworksPolicy , LLMBasePolicy , Callable ],
242242 * ,
@@ -246,7 +246,7 @@ async def rollout(
246246 steps : int = 512 ,
247247 openai_format_log_file : Optional [str ] = None ,
248248 max_concurrent_rollouts : int = 8 ,
249- ) -> AsyncIterator [ EvaluationRow ]:
249+ ) -> List [ asyncio . Task [ EvaluationRow ] ]:
250250 """
251251 Execute general rollouts using tool calling interface with automatic record/playback.
252252
@@ -274,14 +274,14 @@ async def rollout(
274274 - Set and file exists: Playback mode (uses recorded data)
275275
276276 Returns:
277- List of EvaluationRow objects
277+ List of asyncio.Task objects for external handling
278278
279279 Example:
280280 # Live mode
281- evaluation_rows = await ep.rollout(envs, policy)
281+ tasks = ep.rollout(envs, policy)
282282
283283 # Create environments automatically
284- trajectories = await ep.rollout(
284+ tasks = ep.rollout(
285285 "http://localhost:8000/mcp/",
286286 policy,
287287 evaluation_rows=my_evaluation_rows,
@@ -290,26 +290,26 @@ async def rollout(
290290
291291 # Recording mode
292292 os.environ["EP_PLAYBACK_FILE"] = "record.jsonl"
293- evaluation_rows = await ep.rollout(envs, policy, openai_format_log_file="sft_data.jsonl")
293+ tasks = ep.rollout(envs, policy, openai_format_log_file="sft_data.jsonl")
294294
295295 # Playback mode (after recording file exists)
296- evaluation_rows = await ep.rollout(envs, policy)
296+ tasks = ep.rollout(envs, policy)
297297 """
298298 # Automatically create environments if a base URL is provided
299299 if isinstance (envs , str ):
300300 if evaluation_rows is None and dataset is None :
301301 raise ValueError ("Either 'evaluation_rows' or 'dataset' must be provided when envs is a URL" )
302302
303303 auto_model_id = model_id or getattr (policy , "model_id" , "unknown" )
304- envs = await make (envs , evaluation_rows = evaluation_rows , dataset = dataset , model_id = auto_model_id )
304+ envs = make (envs , evaluation_rows = evaluation_rows , dataset = dataset , model_id = auto_model_id )
305305
306306 # Use the new ExecutionManager for execution
307307 execution_manager = ExecutionManager ()
308308
309- async for evaluation_row in execution_manager .execute_rollouts (
309+ tasks = execution_manager .execute_rollouts (
310310 envs , policy , steps , openai_format_log_file , max_concurrent_rollouts , evaluation_rows
311- ):
312- yield evaluation_row
311+ )
312+ return tasks
313313
314314
315315async def test_mcp (base_url : str , seeds : List [int ]) -> Dict [str , Any ]:
@@ -336,7 +336,7 @@ async def test_mcp(base_url: str, seeds: List[int]) -> Dict[str, Any]:
336336 policy = FireworksPolicy ("test-model" )
337337
338338 # Run short rollout
339- evaluation_rows = await rollout (envs , policy = policy , steps = 10 )
339+ evaluation_rows = rollout (envs , policy = policy , steps = 10 )
340340
341341 if evaluation_rows and len (evaluation_rows [0 ].messages ) > 1 :
342342 results ["successful" ] += 1
0 commit comments