Skip to content

Commit 8d9f7c9

Browse files
committed
remove allreduce fusion arg
1 parent 33bb71a commit 8d9f7c9

2 files changed

Lines changed: 26 additions & 1 deletion

File tree

.github/configs/nvidia-master.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,6 +1804,31 @@ qwen3.5-bf16-b200-sglang:
18041804
search-space:
18051805
- { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
18061806

1807+
qwen3.5-fp8-b200-sglang:
1808+
image: lmsysorg/sglang:v0.5.9-cu129-amd64
1809+
model: Qwen/Qwen3.5-397B-A17B-FP8
1810+
model-prefix: qwen3.5
1811+
runner: b200
1812+
precision: fp8
1813+
framework: sglang
1814+
multinode: false
1815+
seq-len-configs:
1816+
- isl: 1024
1817+
osl: 1024
1818+
search-space:
1819+
- { tp: 4, ep: 4, conc-start: 4, conc-end: 16 }
1820+
- { tp: 4, ep: 4, conc-start: 64, conc-end: 64 }
1821+
- isl: 1024
1822+
osl: 8192
1823+
search-space:
1824+
- { tp: 8, ep: 1, conc-start: 4, conc-end: 8 }
1825+
- { tp: 4, ep: 4, conc-start: 8, conc-end: 64}
1826+
- isl: 8192
1827+
osl: 1024
1828+
search-space:
1829+
- { tp: 8, ep: 1, conc-start: 4, conc-end: 4}
1830+
- { tp: 4, ep: 4, conc-start: 8, conc-end: 64 }
1831+
18071832
qwen3.5-fp4-b200-sglang:
18081833
image: lmsysorg/sglang:v0.5.9-cu129-amd64
18091834
model: nvidia/Qwen3.5-397B-A17B-NVFP4

benchmarks/single_node/qwen3.5_fp4_b200.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ PYTHONNOUSERSITE=1 python3 -m sglang.launch_server --model-path=$MODEL --host=0.
4242
--mem-fraction-static $MEM_FRAC_STATIC --chunked-prefill-size 32768 --max-prefill-tokens 32768 \
4343
--context-length $CONTEXT_LENGTH --disable-radix-cache \
4444
--attention-backend trtllm_mha --moe-runner-backend flashinfer_trtllm \
45-
--enable-flashinfer-allreduce-fusion --scheduler-recv-interval 30 \
45+
--scheduler-recv-interval 30 \
4646
--stream-interval 30 --quantization modelopt_fp4 \
4747
--kv-cache-dtype fp8_e4m3 --fp4-gemm-backend flashinfer_cutlass > $SERVER_LOG 2>&1 &
4848

0 commit comments

Comments
 (0)