step cache for Wan 2.2 T2V

syhuang22 · syhuang22 · commit d14726ecd0b1 · 2026-03-16T22:30:05.000Z
Signed-off-by: James Huang &lt;syhuang1201@gmail.com&gt;
diff --git a/src/maxdiffusion/configs/base_wan_27b.yml b/src/maxdiffusion/configs/base_wan_27b.yml
@@ -304,8 +304,8 @@ boundary_ratio: 0.875
 
 # Diffusion CFG cache (FasterCache-style)
 use_cfg_cache: False
-# SenCache: sensitivity-aware adaptive caching (Haghighi & Alahi, 2026)
-use_sen_cache: False
+# StepCache: output-stability step caching — skip forward pass when output is stable
+use_step_cache: False
 
 # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
 guidance_rescale: 0.0
diff --git a/src/maxdiffusion/pipelines/wan/wan_pipeline_2_2.py b/src/maxdiffusion/pipelines/wan/wan_pipeline_2_2.py
@@ -111,10 +111,10 @@ def __call__(
       negative_prompt_embeds: jax.Array = None,
       vae_only: bool = False,
       use_cfg_cache: bool = False,
-      use_sen_cache: bool = False,
+      use_step_cache: bool = False,
   ):
-    if use_cfg_cache and use_sen_cache:
-      raise ValueError("use_cfg_cache and use_sen_cache are mutually exclusive. Enable only one.")
+    if use_cfg_cache and use_step_cache:
+      raise ValueError("use_cfg_cache and use_step_cache are mutually exclusive. Enable only one.")
 
     if use_cfg_cache and (guidance_scale_low <= 1.0 or guidance_scale_high <= 1.0):
       raise ValueError(
@@ -123,11 +123,11 @@ def __call__(
           "CFG cache accelerates classifier-free guidance, which must be enabled for both transformer phases."
       )
 
-    if use_sen_cache and (guidance_scale_low <= 1.0 or guidance_scale_high <= 1.0):
+    if use_step_cache and (guidance_scale_low <= 1.0 or guidance_scale_high <= 1.0):
       raise ValueError(
-          f"use_sen_cache=True requires both guidance_scale_low > 1.0 and guidance_scale_high > 1.0 "
+          f"use_step_cache=True requires both guidance_scale_low > 1.0 and guidance_scale_high > 1.0 "
           f"(got {guidance_scale_low}, {guidance_scale_high}). "
-          "SenCache requires classifier-free guidance to be enabled for both transformer phases."
+          "StepCache requires classifier-free guidance to be enabled for both transformer phases."
       )
 
     latents, prompt_embeds, negative_prompt_embeds, scheduler_state, num_frames = self._prepare_model_inputs(
@@ -159,7 +159,7 @@ def __call__(
         scheduler=self.scheduler,
         scheduler_state=scheduler_state,
         use_cfg_cache=use_cfg_cache,
-        use_sen_cache=use_sen_cache,
+        use_step_cache=use_step_cache,
         height=height,
     )
 
@@ -196,7 +196,7 @@ def run_inference_2_2(
     scheduler: FlaxUniPCMultistepScheduler,
     scheduler_state,
     use_cfg_cache: bool = False,
-    use_sen_cache: bool = False,
+    use_step_cache: bool = False,
     height: int = 480,
 ):
   """Denoising loop for WAN 2.2 T2V with optional caching acceleration.
@@ -206,38 +206,37 @@ def run_inference_2_2(
   1. CFG-Cache (use_cfg_cache=True) — FasterCache-style:
      Caches the unconditional branch and uses FFT frequency-domain compensation.
 
-  2. SenCache (use_sen_cache=True) — Sensitivity-aware caching:
-     Measures output sensitivity after each full forward pass. When sensitivity
-     is low (model output is stable), skips the entire transformer and reuses
-     the cached noise prediction. Naturally handles MoE expert boundaries by
-     detecting high sensitivity at transition points.
+  2. StepCache (use_step_cache=True) — Output-stability step caching:
+     After each forward pass, measures relative output change. If small, skips
+     the next step and reuses the cached noise prediction. Forces execution at
+     MoE expert boundaries to prevent cross-expert cache reuse.
   """
   do_classifier_free_guidance = guidance_scale_low > 1.0 or guidance_scale_high > 1.0
   bsz = latents.shape[0]
 
-  # ── SenCache path ──
-  if use_sen_cache and do_classifier_free_guidance:
+  # ── StepCache path ──
+  if use_step_cache and do_classifier_free_guidance:
     timesteps_np = np.array(scheduler_state.timesteps, dtype=np.int32)
     step_uses_high = [bool(timesteps_np[s] >= boundary) for s in range(num_inference_steps)]
 
-    # Resolution-dependent SenCache config
+    # Resolution-dependent StepCache config
     if height >= 720:
-      sen_threshold = 0.06    # tighter for higher resolution
-      warmup_ratio = 0.10
-      max_consecutive_cache = 2
+      step_threshold = 0.08  # tighter for higher resolution
+      warmup_ratio = 0.08
+      max_consecutive_cache = 3
     else:
-      sen_threshold = 0.08
+      step_threshold = 0.08
       warmup_ratio = 0.08
       max_consecutive_cache = 3
 
     warmup_steps = max(2, int(num_inference_steps * warmup_ratio))
 
     prompt_embeds_combined = jnp.concatenate([prompt_embeds, negative_prompt_embeds], axis=0)
 
-    # SenCache state
-    prev_noise_pred = None          # last full-computation noise prediction
-    sensitivity = float('inf')      # measured relative output change
-    consecutive_cached = 0          # consecutive steps using cache
+    # StepCache state
+    prev_noise_pred = None  # last full-computation noise prediction
+    sensitivity = float("inf")  # measured relative output change
+    consecutive_cached = 0  # consecutive steps using cache
     cache_count = 0
 
     for step in range(num_inference_steps):
@@ -258,7 +257,7 @@ def run_inference_2_2(
           not is_warmup
           and not is_boundary
           and prev_noise_pred is not None
-          and sensitivity < sen_threshold
+          and sensitivity < step_threshold
           and consecutive_cached < max_consecutive_cache
       )
 
@@ -272,8 +271,12 @@ def run_inference_2_2(
         latents_doubled = jnp.concatenate([latents] * 2)
         timestep = jnp.broadcast_to(t, bsz * 2)
         noise_pred, _, _ = transformer_forward_pass_full_cfg(
-            graphdef, state, rest,
-            latents_doubled, timestep, prompt_embeds_combined,
+            graphdef,
+            state,
+            rest,
+            latents_doubled,
+            timestep,
+            prompt_embeds_combined,
             guidance_scale=guidance_scale,
         )
 
@@ -283,15 +286,17 @@ def run_inference_2_2(
           output_magnitude = jnp.mean(jnp.abs(noise_pred)) + 1e-8
           sensitivity = float(output_diff / output_magnitude)
         else:
-          sensitivity = float('inf')
+          sensitivity = float("inf")
 
         prev_noise_pred = noise_pred
         consecutive_cached = 0
 
       latents, scheduler_state = scheduler.step(scheduler_state, noise_pred, t, latents).to_tuple()
 
-    print(f"[SenCache] Cached {cache_count}/{num_inference_steps} steps "
-          f"({100*cache_count/num_inference_steps:.1f}% cache ratio)")
+    print(
+        f"[StepCache] Cached {cache_count}/{num_inference_steps} steps "
+        f"({100*cache_count/num_inference_steps:.1f}% cache ratio)"
+    )
     return latents
 
   # ── CFG cache path ──