@@ -418,37 +418,42 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
418418 else :
419419 output_buffer = None
420420
421- priority_results = await execute_priority_rollouts (
422- dataset = data ,
423- num_runs = num_runs ,
424- rollout_processor = rollout_processor ,
425- config = config ,
426- max_concurrent_rollouts = max_concurrent_rollouts ,
427- active_logger = active_logger ,
428- eval_executor = test_func ,
429- max_concurrent_evaluations = max_concurrent_evaluations ,
430- mode = mode ,
431- micro_batch_data_buffer = output_buffer ,
432- evaluation_test_kwargs = kwargs .get ("evaluation_test_kwargs" ) or {},
433- )
421+ try :
422+ priority_results = await execute_priority_rollouts (
423+ dataset = data ,
424+ num_runs = num_runs ,
425+ rollout_processor = rollout_processor ,
426+ config = config ,
427+ max_concurrent_rollouts = max_concurrent_rollouts ,
428+ active_logger = active_logger ,
429+ eval_executor = test_func ,
430+ max_concurrent_evaluations = max_concurrent_evaluations ,
431+ mode = mode ,
432+ micro_batch_data_buffer = output_buffer ,
433+ evaluation_test_kwargs = kwargs .get ("evaluation_test_kwargs" ) or {},
434+ )
435+ finally :
436+ if output_buffer :
437+ await output_buffer .close ()
434438
435439 for res in priority_results :
436440 run_idx = (res .execution_metadata .extra or {}).get ("run_index" , 0 )
437441 if run_idx < len (all_results ):
438442 all_results [run_idx ].append (res )
439443
440444 processed_rows_in_run .append (res )
441- postprocess (
442- all_results ,
443- aggregation_method ,
444- passed_threshold ,
445- active_logger ,
446- mode ,
447- completion_params , # pyright: ignore[reportArgumentType]
448- test_func .__name__ ,
449- num_runs ,
450- time .perf_counter () - experiment_start_time ,
451- )
445+
446+ postprocess (
447+ all_results ,
448+ aggregation_method ,
449+ passed_threshold ,
450+ active_logger ,
451+ mode ,
452+ completion_params , # pyright: ignore[reportArgumentType]
453+ test_func .__name__ ,
454+ num_runs ,
455+ time .perf_counter () - experiment_start_time ,
456+ )
452457
453458 else :
454459 async def execute_run (run_idx : int , config : RolloutProcessorConfig ):
0 commit comments