NVIDIA · tdene · Mar 30, 2026 · Mar 30, 2026 · Apr 1, 2026 · Apr 3, 2026
@@ -33,7 +33,22 @@ class RolloutRequest(Request):
 
 
 class GroupedRolloutRequest(Request):
-    """Request to agent to generate grouped Rollouts."""
+    """Request to agent to generate grouped Rollouts.
+
+    Attributes:
+        num_groups: Number of rollout groups to generate per batch.
+        rollouts_per_group: Number of rollouts within each group.
+        inference_interface: Interface used for model inference during rollout generation.
+        validation: Whether this is a validation (not training) request.
+        filter_groups_with_same_reward: Drop groups where all rollouts have identical rewards.
+        streaming: If True, generate groups indefinitely until the consumer stops iterating.
+            If False, generate exactly `num_groups` groups and stop.
+        enforce_order: If True, yield groups in staleness-preserving order for forced-lag
+            training. In the steady state, complete batches are yielded in strict sequential order
+            so that the consumer always trains on the oldest available rollouts first.
+            During warmup (the first `num_workers` batches), all batches are equally-stale,
+            so this parameter is ignored and groups are yielded immediately as they complete.
+    """
 
     num_groups: int
     rollouts_per_group: int
@@ -271,24 +286,36 @@ async def shutdown_queue_when_done():
         shutdown_task = asyncio.create_task(shutdown_queue_when_done())
 
         try:
-            next_batch_id = 0
+            # Forced lag involves strict ordering at steady-state.
+            # However, the initial conditions do not require (and are harmed by) strict ordering.
+            warmup_groups_until_release = groups_per_worker
+            next_batch_id = num_workers
             pending: dict[int, GroupedRollouts] = {}
             while True:
                 try:
                     group = await grouped_rollouts.get()
                 except asyncio_QueueShutDown:
                     break
                 if request.enforce_order:
-                    # Accumulate groups and enforce submission order across batches.
-                    pending.setdefault(group.batch_id, []).append(group)
-                    while (l := len(pending.get(next_batch_id, []))) >= groups_per_worker:
-                        assert l == groups_per_worker
-                        batch = pending.pop(next_batch_id)
-                        batch.sort(key=lambda g: g.index_in_batch)
-                        next_batch_id += 1
-                        for g in batch:
-                            yield g
-                        submission_gate.release()
+                    if group.batch_id < num_workers:
+                        # Warmup: initial batches all have equal staleness;
+                        # yield immediately without waiting for batch completion.
+                        yield group
+                        warmup_groups_until_release -= 1
+                        if warmup_groups_until_release == 0:
+                            submission_gate.release()
+                            warmup_groups_until_release = groups_per_worker
+                    else:
+                        # Steady state: accumulate and enforce strict batch order.
+                        pending.setdefault(group.batch_id, []).append(group)
+                        while (l := len(pending.get(next_batch_id, []))) >= groups_per_worker:
+                            assert l == groups_per_worker
+                            batch = pending.pop(next_batch_id)
+                            batch.sort(key=lambda g: g.index_in_batch)
+                            next_batch_id += 1
+                            for g in batch:
+                                yield g
+                            submission_gate.release()
                 else:
                     # Yield groups as soon as they're completed.
                     yield group

diff --git a/tests/unit_tests/rl/test_grouped_rollouts.py b/tests/unit_tests/rl/test_grouped_rollouts.py
@@ -32,7 +32,7 @@ async def group_rollout(self, request):
         idx = self._call_count
         self._call_count += 1
         if idx < self.num_slow_calls:
-            await asyncio.sleep(0.03)
+            await asyncio.Event().wait()  # Block forever; cancelled when test completes
         return [
             Rollout(
                 trajectory=[f"t{idx}"],
@@ -54,7 +54,8 @@ class TestGroupedRollouts:
         [
             pytest.param(0, False, 8, 8, None, id="non_batched"),
             pytest.param(0, False, 4, 4, None, id="non_streaming_fewer_than_parallel"),
-            pytest.param(4, True, 2, 8, [0, 0, 1, 1, 2, 2, 3, 3], id="batched_submission_order"),
+            pytest.param(4, True, 2, 8, None, id="streaming_batched"),
+            pytest.param(0, True, 2, 16, None, id="streaming_steady_state_order"),
             pytest.param(0, True, 1, 10, None, id="streaming"),
         ],
     )
@@ -78,6 +79,25 @@ async def test_get_grouped_rollouts(
         assert len(groups) == expected_count
         if expected_batch_ids is not None:
             assert [g.batch_id for g in groups] == expected_batch_ids
+        if num_slow_calls > 0 and streaming:
+            # Warmup should not block on slow batches.
+            batch_ids = [g.batch_id for g in groups]
+            num_slow_batches = num_slow_calls // num_groups
+            slow_batches = set(range(num_slow_batches))
+            assert (
+                batch_ids[0] not in slow_batches
+            ), f"Expected first group from a fast batch, got batch_id={batch_ids[0]}"
+        if streaming and num_groups > 1:
+            # Verify steady-state batches arrive in sequential order.
+            num_workers = gen.parallel_generation_tasks // num_groups
+            steady = [g for g in groups if g.batch_id >= num_workers]
+            if steady:
+                batch_order = [steady[0].batch_id]
+                for g in steady[1:]:
+                    if g.batch_id != batch_order[-1]:
+                        batch_order.append(g.batch_id)
+                expected = list(range(num_workers, num_workers + len(batch_order)))
+                assert batch_order == expected, f"Steady-state batches out of order: {batch_order}"
 
     @pytest.mark.asyncio
     async def test_weighted_multi_task(self):