Skip to content

Commit 5b05c79

Browse files
committed
adds config
1 parent f33d4f3 commit 5b05c79

1 file changed

Lines changed: 52 additions & 0 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3131,6 +3131,58 @@ dsr1-fp4-gb200-dynamo-sglang:
31313131
additional-settings:
31323132
- "DECODE_NODES=8"
31333133

3134+
# 1k8k configurations (srtctl-based)
3135+
- isl: 1024
3136+
osl: 8192
3137+
search-space:
3138+
# Low latency (1 prefill node, 2 decode nodes)
3139+
- spec-decoding: "none"
3140+
conc-list: [ 4, 8, 16, 32 ]
3141+
prefill:
3142+
num-worker: 1
3143+
tp: 4
3144+
ep: 1
3145+
dp-attn: false
3146+
additional-settings:
3147+
- "CONFIG_FILE=recipes/gb200-fp4/1k8k/low-latency.yaml"
3148+
decode:
3149+
num-worker: 2
3150+
tp: 4
3151+
ep: 1
3152+
dp-attn: false
3153+
3154+
# Mid curve (4 prefill nodes, 8 decode nodes)
3155+
- spec-decoding: "none"
3156+
conc-list: [ 2048, 4096, 8192 ]
3157+
prefill:
3158+
num-worker: 4
3159+
tp: 4
3160+
ep: 4
3161+
dp-attn: true
3162+
additional-settings:
3163+
- "CONFIG_FILE=recipes/gb200-fp4/1k8k/mid-curve.yaml"
3164+
decode:
3165+
num-worker: 1
3166+
tp: 32
3167+
ep: 32
3168+
dp-attn: true
3169+
3170+
# Max throughput (4 prefill nodes, 12 decode nodes)
3171+
- spec-decoding: "none"
3172+
conc-list: [ 256, 512, 1024, 2048 ]
3173+
prefill:
3174+
num-worker: 4
3175+
tp: 4
3176+
ep: 4
3177+
dp-attn: true
3178+
additional-settings:
3179+
- "CONFIG_FILE=recipes/gb200-fp4/1k8k/max-tpt.yaml"
3180+
decode:
3181+
num-worker: 1
3182+
tp: 48
3183+
ep: 48
3184+
dp-attn: true
3185+
31343186
dsr1-fp4-gb300-dynamo-trt:
31353187
image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
31363188
model: nvidia/DeepSeek-R1-0528-NVFP4-v2

0 commit comments

Comments
 (0)