File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -3131,6 +3131,58 @@ dsr1-fp4-gb200-dynamo-sglang:
31313131 additional-settings :
31323132 - " DECODE_NODES=8"
31333133
3134+ # 1k8k configurations (srtctl-based)
3135+ - isl : 1024
3136+ osl : 8192
3137+ search-space :
3138+ # Low latency (1 prefill node, 2 decode nodes)
3139+ - spec-decoding : " none"
3140+ conc-list : [ 4, 8, 16, 32 ]
3141+ prefill :
3142+ num-worker : 1
3143+ tp : 4
3144+ ep : 1
3145+ dp-attn : false
3146+ additional-settings :
3147+ - " CONFIG_FILE=recipes/gb200-fp4/1k8k/low-latency.yaml"
3148+ decode :
3149+ num-worker : 2
3150+ tp : 4
3151+ ep : 1
3152+ dp-attn : false
3153+
3154+ # Mid curve (4 prefill nodes, 8 decode nodes)
3155+ - spec-decoding : " none"
3156+ conc-list : [ 2048, 4096, 8192 ]
3157+ prefill :
3158+ num-worker : 4
3159+ tp : 4
3160+ ep : 4
3161+ dp-attn : true
3162+ additional-settings :
3163+ - " CONFIG_FILE=recipes/gb200-fp4/1k8k/mid-curve.yaml"
3164+ decode :
3165+ num-worker : 1
3166+ tp : 32
3167+ ep : 32
3168+ dp-attn : true
3169+
3170+ # Max throughput (4 prefill nodes, 12 decode nodes)
3171+ - spec-decoding : " none"
3172+ conc-list : [ 256, 512, 1024, 2048 ]
3173+ prefill :
3174+ num-worker : 4
3175+ tp : 4
3176+ ep : 4
3177+ dp-attn : true
3178+ additional-settings :
3179+ - " CONFIG_FILE=recipes/gb200-fp4/1k8k/max-tpt.yaml"
3180+ decode :
3181+ num-worker : 1
3182+ tp : 48
3183+ ep : 48
3184+ dp-attn : true
3185+
31343186dsr1-fp4-gb300-dynamo-trt :
31353187 image : nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
31363188 model : nvidia/DeepSeek-R1-0528-NVFP4-v2
You can’t perform that action at this time.
0 commit comments