@@ -379,7 +379,38 @@ async def _execute_groupwise_eval_with_semaphore(
379379 pointwise_tasks .append (
380380 asyncio .create_task (_execute_pointwise_eval_with_semaphore (row = row ))
381381 )
382- results = await asyncio .gather (* pointwise_tasks )
382+
383+ # Add tqdm progress bar for evaluations with proper cleanup
384+ eval_position = run_idx + 2 # Position after rollout progress bar
385+ with tqdm (
386+ total = len (pointwise_tasks ),
387+ desc = f" Eval { run_idx + 1 } " ,
388+ unit = "eval" ,
389+ file = sys .__stderr__ ,
390+ leave = False ,
391+ position = eval_position ,
392+ dynamic_ncols = True ,
393+ miniters = 1 ,
394+ mininterval = 0.1 ,
395+ bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]" ,
396+ ) as eval_pbar :
397+
398+ async def task_with_progress (task ):
399+ try :
400+ result = await task
401+ return result
402+ finally :
403+ eval_pbar .update (1 )
404+
405+ wrapped_tasks = [task_with_progress (task ) for task in pointwise_tasks ]
406+ try :
407+ results = await asyncio .gather (* wrapped_tasks )
408+ except Exception :
409+ # Propagate cancellation to the real tasks and await them to quiesce
410+ for task in pointwise_tasks :
411+ task .cancel ()
412+ await asyncio .gather (* pointwise_tasks , return_exceptions = True )
413+ raise
383414
384415 all_results [run_idx ] = results
385416 elif mode == "groupwise" :
@@ -510,14 +541,23 @@ async def _collect_result(config, lst): # pyright: ignore[reportUnknownParamete
510541 ) as run_pbar :
511542
512543 async def execute_run_with_progress (run_idx : int , config ):
513- result = await execute_run (run_idx , config )
514- run_pbar .update (1 )
515- return result
544+ try :
545+ result = await execute_run (run_idx , config )
546+ return result
547+ finally :
548+ run_pbar .update (1 )
516549
517550 tasks = []
518551 for run_idx in range (num_runs ):
519552 tasks .append (asyncio .create_task (execute_run_with_progress (run_idx , config )))
520- await asyncio .gather (* tasks ) # pyright: ignore[reportUnknownArgumentType]
553+ try :
554+ await asyncio .gather (* tasks )
555+ except Exception :
556+ # Propagate cancellation to tasks and await them to quiesce
557+ for task in tasks :
558+ task .cancel ()
559+ await asyncio .gather (* tasks , return_exceptions = True )
560+ raise
521561
522562 experiment_duration_seconds = time .perf_counter () - experiment_start_time
523563
0 commit comments