From 25af05ac37ebc96cb8f03f2bbdf5d8e374955012 Mon Sep 17 00:00:00 2001 From: Robin Zhang Date: Fri, 27 Feb 2026 00:26:40 -0800 Subject: [PATCH] Remove is_first_microbatch setting after warmup Signed-off-by: Robin Zhang --- transformer_engine/pytorch/graph.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/transformer_engine/pytorch/graph.py b/transformer_engine/pytorch/graph.py index f4b1fb23ae..d3320fd70f 100644 --- a/transformer_engine/pytorch/graph.py +++ b/transformer_engine/pytorch/graph.py @@ -507,11 +507,6 @@ def hook_fn( else: grad_inputs = None del outputs, grad_inputs - # The following code is added specifically for MCore's special requirements, - # aimed at preventing warmup from altering the control flow. - for module in func.modules(): - if hasattr(module, "is_first_microbatch"): - module.is_first_microbatch = True torch.cuda.synchronize() # All captures here share a mempool. To avoid replays corrupting each other's memory,