add SP deny list instead of allow

kashif · kashif · commit aec2c90bd1bf · 2026-03-05T13:35:55.000Z
Signed-off-by: Kashif Rasul &lt;kashif.rasul@gmail.com&gt;
diff --git a/deepspeed/runtime/sequence_parallel/ulysses_sp.py b/deepspeed/runtime/sequence_parallel/ulysses_sp.py
@@ -411,15 +411,13 @@ def register_with_transformers(
             # if we don't have the model yet at this stage
             hf_model_config = AutoConfig.from_pretrained(model_name_or_path)
 
-        supported_attn_implementation = ["flash_attention_2", "flash_attention_3", "sdpa"]
-        if core_attn_implementation not in supported_attn_implementation:
-            # notes on the excluded ones:
-            # - eager: The problem is that `eager` wants an attention_mask and it creates the wrong attention mask it seems if we don't provide one - it's possible that we could somehow solve this, but it's also unlikely someone will want to use the slow eager attention with sequence parallelism
-            # - flex_attention: haven't tried
-
+        # eager requires attention_mask which SP doesn't support; flex_attention is untested
+        unsupported_attn_implementation = ["eager", "flex_attention"]
+        if core_attn_implementation in unsupported_attn_implementation:
             raise ValueError(
                 f"{core_attn_implementation} attn_implementation isn't currently supported by Ulysses sequence"
-                f" parallelism. Set core_attn_implementation arg to one of {supported_attn_implementation}.")
+                f" parallelism. Use 'flash_attention_2', 'flash_attention_3', 'sdpa',"
+                f" or a hub-hosted kernel (e.g. 'kernels-community/flash-attn2').")
 
         if core_attn_implementation not in ALL_ATTENTION_FUNCTIONS:
             raise ValueError(