Skip to content

Commit aec2c90

Browse files
committed
add SP deny list instead of allow
Signed-off-by: Kashif Rasul <kashif.rasul@gmail.com>
1 parent 4dba1e2 commit aec2c90

1 file changed

Lines changed: 5 additions & 7 deletions

File tree

deepspeed/runtime/sequence_parallel/ulysses_sp.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -411,15 +411,13 @@ def register_with_transformers(
411411
# if we don't have the model yet at this stage
412412
hf_model_config = AutoConfig.from_pretrained(model_name_or_path)
413413

414-
supported_attn_implementation = ["flash_attention_2", "flash_attention_3", "sdpa"]
415-
if core_attn_implementation not in supported_attn_implementation:
416-
# notes on the excluded ones:
417-
# - eager: The problem is that `eager` wants an attention_mask and it creates the wrong attention mask it seems if we don't provide one - it's possible that we could somehow solve this, but it's also unlikely someone will want to use the slow eager attention with sequence parallelism
418-
# - flex_attention: haven't tried
419-
414+
# eager requires attention_mask which SP doesn't support; flex_attention is untested
415+
unsupported_attn_implementation = ["eager", "flex_attention"]
416+
if core_attn_implementation in unsupported_attn_implementation:
420417
raise ValueError(
421418
f"{core_attn_implementation} attn_implementation isn't currently supported by Ulysses sequence"
422-
f" parallelism. Set core_attn_implementation arg to one of {supported_attn_implementation}.")
419+
f" parallelism. Use 'flash_attention_2', 'flash_attention_3', 'sdpa',"
420+
f" or a hub-hosted kernel (e.g. 'kernels-community/flash-attn2').")
423421

424422
if core_attn_implementation not in ALL_ATTENTION_FUNCTIONS:
425423
raise ValueError(

0 commit comments

Comments
 (0)