adds config

Elnifio · Elnifio · commit 5b05c79c4617 · 2026-02-04T20:07:51.000-08:00
diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
@@ -3131,6 +3131,58 @@ dsr1-fp4-gb200-dynamo-sglang:
         additional-settings:
         - "DECODE_NODES=8"
 
+  # 1k8k configurations (srtctl-based)
+  - isl: 1024
+    osl: 8192
+    search-space:
+    # Low latency (1 prefill node, 2 decode nodes)
+    - spec-decoding: "none"
+      conc-list: [ 4, 8, 16, 32 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "CONFIG_FILE=recipes/gb200-fp4/1k8k/low-latency.yaml"
+      decode:
+        num-worker: 2
+        tp: 4
+        ep: 1
+        dp-attn: false
+
+    # Mid curve (4 prefill nodes, 8 decode nodes)
+    - spec-decoding: "none"
+      conc-list: [ 2048, 4096, 8192 ]
+      prefill:
+        num-worker: 4
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "CONFIG_FILE=recipes/gb200-fp4/1k8k/mid-curve.yaml"
+      decode:
+        num-worker: 1
+        tp: 32
+        ep: 32
+        dp-attn: true
+
+    # Max throughput (4 prefill nodes, 12 decode nodes)
+    - spec-decoding: "none"
+      conc-list: [ 256, 512, 1024, 2048 ]
+      prefill:
+        num-worker: 4
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "CONFIG_FILE=recipes/gb200-fp4/1k8k/max-tpt.yaml"
+      decode:
+        num-worker: 1
+        tp: 48
+        ep: 48
+        dp-attn: true
+
 dsr1-fp4-gb300-dynamo-trt:
   image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2
   model: nvidia/DeepSeek-R1-0528-NVFP4-v2