NVIDIA · yaox12 · Mar 2, 2026 · Feb 27, 2026 · Mar 2, 2026 · greptile-apps
diff --git a/transformer_engine/pytorch/graph.py b/transformer_engine/pytorch/graph.py
@@ -507,11 +507,6 @@ def hook_fn(
                 else:
                     grad_inputs = None
                 del outputs, grad_inputs
-            # The following code is added specifically for MCore's special requirements,
-            # aimed at preventing warmup from altering the control flow.
-            for module in func.modules():
-                if hasattr(module, "is_first_microbatch"):
-                    module.is_first_microbatch = True
     torch.cuda.synchronize()
 
     # All captures here share a mempool. To avoid replays corrupting each other's memory,