88from eval_protocol .pytest .types import RolloutProcessorConfig , TestFunction
99from eval_protocol .pytest .rollout_processor import RolloutProcessor
1010from eval_protocol .pytest .evaluation_test_utils import rollout_processor_with_retry , add_cost_metrics
11- from eval_protocol .pytest .buffer import MiniBatchDataBuffer
11+ from eval_protocol .pytest .buffer import MicroBatchDataBuffer
1212from eval_protocol .dataset_logger .dataset_logger import DatasetLogger
1313from eval_protocol .human_id import generate_id
1414from eval_protocol .log_utils .rollout_context import rollout_logging_context
@@ -49,10 +49,10 @@ def __init__(
4949 active_logger : DatasetLogger ,
5050 max_concurrent_evaluations : int ,
5151 eval_executor : TestFunction , # Callback to run evaluation
52- output_buffer : Optional [MiniBatchDataBuffer ] = None ,
52+ output_buffer : Optional [MicroBatchDataBuffer ] = None ,
5353 rollout_n : int = 0 ,
5454 mode : str = "pointwise" ,
55- in_group_microbatch_size : int = 0 , # for one sample, how many runs to execute at the same time
55+ in_group_minibatch_size : int = 0 , # for one sample, how many runs to execute at the same time
5656 evaluation_test_kwargs : Dict [str , Any ] = {},
5757 ):
5858 self .rollout_processor = rollout_processor
@@ -77,7 +77,7 @@ def __init__(
7777 self .background_tasks = set () # run evaluations in the background asynchronously
7878
7979 self .rollout_n = rollout_n
80- self .in_group_microbatch_size = in_group_microbatch_size if in_group_microbatch_size > 0 else rollout_n
80+ self .in_group_minibatch_size = in_group_minibatch_size if in_group_minibatch_size > 0 else rollout_n
8181 self .evaluation_test_kwargs = evaluation_test_kwargs
8282
8383 async def schedule_dataset (
@@ -91,7 +91,7 @@ async def schedule_dataset(
9191 for i , row in enumerate (dataset ):
9292 # Calculate ranges for the first in-group minibatch
9393 batch_start = 0
94- batch_end = min (self .in_group_microbatch_size , self .rollout_n )
94+ batch_end = min (self .in_group_minibatch_size , self .rollout_n )
9595 run_indices = list (range (batch_start , batch_end ))
9696
9797 # Initial priority: Low (1), ordered by dataset index
@@ -243,7 +243,7 @@ async def _run_eval(rows_to_eval: Union[EvaluationRow, List[EvaluationRow]]):
243243 next_start = last_run_idx + 1
244244
245245 if next_start < self .rollout_n :
246- next_end = min (next_start + self .in_group_microbatch_size , self .rollout_n )
246+ next_end = min (next_start + self .in_group_minibatch_size , self .rollout_n )
247247 next_indices = list (range (next_start , next_end ))
248248 new_history = task .history + current_batch_history_updates
249249
@@ -327,27 +327,26 @@ async def run(self, dataset: List[EvaluationRow], num_runs: int, micro_batch_siz
327327async def execute_priority_rollouts (
328328 dataset : List [EvaluationRow ],
329329 num_runs : int ,
330- micro_batch_size : int ,
331330 rollout_processor : RolloutProcessor ,
332331 config : RolloutProcessorConfig ,
333332 max_concurrent_rollouts : int ,
334333 active_logger : DatasetLogger ,
335334 eval_executor : TestFunction ,
336335 max_concurrent_evaluations : int = 96 ,
337336 mode : str = "pointwise" ,
338- mini_batch_data_buffer : Optional [MiniBatchDataBuffer ] = None ,
337+ micro_batch_data_buffer : Optional [MicroBatchDataBuffer ] = None ,
339338 evaluation_test_kwargs : Dict [str , Any ] = {},
340339):
341340 scheduler = PriorityRolloutScheduler (
342341 rollout_processor = rollout_processor ,
343342 max_concurrent_rollouts = max_concurrent_rollouts ,
344343 active_logger = active_logger ,
345344 eval_executor = eval_executor ,
346- output_buffer = mini_batch_data_buffer ,
345+ output_buffer = micro_batch_data_buffer ,
347346 max_concurrent_evaluations = max_concurrent_evaluations ,
348347 rollout_n = num_runs ,
349348 mode = mode ,
350- in_group_microbatch_size = micro_batch_size ,
349+ in_group_minibatch_size = ( num_runs // 2 ) ,
351350 evaluation_test_kwargs = evaluation_test_kwargs ,
352351 )
353352 return await scheduler .run (dataset , num_runs , micro_batch_size , config )
0 commit comments