From 5fe1a358c5d2efae950d28b893991c07b4cedf04 Mon Sep 17 00:00:00 2001
From: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
Date: Tue, 5 May 2026 21:45:53 -0500
Subject: [PATCH 1/4] Update kimi k2.5 mtp support by using modelrunnerV2 and
 vllm v0.20.1

Signed-off-by: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
---
 .github/configs/amd-master.yaml               | 2421 ++++++++---------
 .../single_node/kimik2.5_int4_mi300x_mtp.sh   |   86 +
 .../single_node/kimik2.5_int4_mi325x_mtp.sh   |   86 +
 .../single_node/kimik2.5_int4_mi355x_mtp.sh   |   86 +
 4 files changed, 1371 insertions(+), 1308 deletions(-)
 create mode 100644 benchmarks/single_node/kimik2.5_int4_mi300x_mtp.sh
 create mode 100644 benchmarks/single_node/kimik2.5_int4_mi325x_mtp.sh
 create mode 100644 benchmarks/single_node/kimik2.5_int4_mi355x_mtp.sh

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 44045c274..b8b84d535 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -6,21 +6,16 @@ dsr1-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    agentic-coding:
-    - duration: 1800
-      search-space:
-      - { tp: 8, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 32, 64, 128, 256] }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp4-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -30,18 +25,17 @@ dsr1-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
 
 dsr1-fp4-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.0-ubuntu24.04-pytorch2.9-atom0.1.1
@@ -52,18 +46,17 @@ dsr1-fp4-mi355x-atom-mtp:
   # WIP framework (no customers yet)
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
-      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
@@ -73,16 +66,15 @@ dsr1-fp8-mi300x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
@@ -92,16 +84,15 @@ dsr1-fp8-mi325x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-mi355x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi35x
@@ -111,17 +102,16 @@ dsr1-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 32, conc-end: 64 }
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 4, conc-start: 32, conc-end: 64 }
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -131,16 +121,15 @@ qwen3.5-bf16-mi355x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 qwen3.5-bf16-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -150,16 +139,15 @@ qwen3.5-bf16-mi355x-sglang-mtp:
   precision: bf16
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-bf16-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -169,16 +157,15 @@ qwen3.5-bf16-mi300x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -188,16 +175,15 @@ qwen3.5-bf16-mi325x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -207,16 +193,15 @@ qwen3.5-fp8-mi325x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414
@@ -226,19 +211,18 @@ qwen3.5-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-      - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 }
-      - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 }
-      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+    - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 }
+    - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 }
+    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
 
 qwen3.5-fp8-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414
@@ -248,63 +232,18 @@ qwen3.5-fp8-mi355x-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-      - { tp: 8, ep: 8, conc-start: 64, conc-end: 256, spec-decoding: mtp }
-      - { tp: 2, ep: 2, conc-start: 128, conc-end: 256, spec-decoding: mtp }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, ep: 2, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256, spec-decoding: mtp }
-
-qwen3.5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: Qwen/Qwen3.5-397B-A17B-FP8
-  model-prefix: qwen3.5
-  runner: mi355x
-  precision: fp8
-  framework: atom
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
-      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
-      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-
-qwen3.5-fp8-mi355x-atom-mtp:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: Qwen/Qwen3.5-397B-A17B-FP8
-  model-prefix: qwen3.5
-  runner: mi355x
-  precision: fp8
-  framework: atom
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+    - { tp: 8, ep: 8, conc-start: 64, conc-end: 256, spec-decoding: mtp }
+    - { tp: 2, ep: 2, conc-start: 128, conc-end: 256, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, ep: 2, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-fp4-mi355x-sglang:
   image: rocm/sgl-dev:v0.5.10rc0-rocm720-mi35x-20260413
@@ -314,39 +253,17 @@ qwen3.5-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 16 }
-
-qwen3.5-fp4-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: amd/Qwen3.5-397B-A17B-MXFP4
-  model-prefix: qwen3.5
-  runner: mi355x
-  precision: fp4
-  framework: atom
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 16 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 256 }
+    - { tp: 4, conc-start: 4, conc-end: 16 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 256 }
+    - { tp: 4, conc-start: 4, conc-end: 16 }
 
 qwen3.5-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -356,16 +273,15 @@ qwen3.5-fp8-mi300x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 glm5-fp8-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413
@@ -375,58 +291,51 @@ glm5-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 glm5-fp8-mi355x-sglang-mtp:
-  image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
+  image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: mi355x
   precision: fp8
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-      - { tp: 8, conc-start: 4, conc-end: 8, spec-decoding: mtp }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 128, spec-decoding: mtp }
-      - { tp: 8, conc-start: 4, conc-end: 8, spec-decoding: mtp }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 glm5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2.post
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: mi355x
   precision: fp8
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 256 }
-      - { tp: 8, conc-start: 4, conc-end: 256 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 256 }
-      - { tp: 8, conc-start: 4, conc-end: 256 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 256 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 256 }
 
 glm5.1-fp4-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -436,37 +345,17 @@ glm5.1-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 16 }
-
-glm5.1-fp4-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: amd/GLM-5.1-MXFP4
-  model-prefix: glm5.1
-  runner: mi355x
-  precision: fp4
-  framework: atom
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 256 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 4, conc-start: 4, conc-end: 256 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 256 }
+    - { tp: 4, conc-start: 4, conc-end: 16 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 256 }
+    - { tp: 4, conc-start: 4, conc-end: 16 }
 
 kimik2.5-int4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -476,16 +365,15 @@ kimik2.5-int4-mi355x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -495,16 +383,15 @@ kimik2.5-int4-mi325x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -514,17 +401,70 @@ kimik2.5-int4-mi300x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+
+kimik2.5-int4-mi355x-vllm-mtp:
+  image: vllm/vllm-openai-rocm:v0.20.1
+  model: moonshotai/Kimi-K2.5
+  model-prefix: kimik2.5
+  runner: mi355x
+  precision: int4
+  framework: vllm
+  multinode: false
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+
+kimik2.5-int4-mi300x-vllm-mtp:
+  image: vllm/vllm-openai-rocm:v0.20.1
+  model: moonshotai/Kimi-K2.5
+  model-prefix: kimik2.5
+  runner: mi300x
+  precision: int4
+  framework: vllm
+  multinode: false
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+
+kimik2.5-int4-mi325x-vllm-mtp:
+  image: vllm/vllm-openai-rocm:v0.20.1
+  model: moonshotai/Kimi-K2.5
+  model-prefix: kimik2.5
+  runner: mi325x
+  precision: int4
+  framework: vllm
+  multinode: false
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    
 kimik2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
   model: amd/Kimi-K2.5-MXFP4
@@ -533,18 +473,17 @@ kimik2.5-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
 
 kimik2.5-fp4-mi355x-atom:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -554,18 +493,17 @@ kimik2.5-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 128 }
-      - { tp: 4, conc-start: 4, conc-end: 128 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 128 }
-      - { tp: 4, conc-start: 4, conc-end: 128 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 128 }
+    - { tp: 4, conc-start: 4, conc-end: 128 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 128 }
+    - { tp: 4, conc-start: 4, conc-end: 128 }
 
 minimaxm2.5-fp8-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.0
@@ -575,66 +513,41 @@ minimaxm2.5-fp8-mi355x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, ep: 2, conc-start: 2, conc-end: 512 }
-      - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 }
-      - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
-      - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
-      - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 2, ep: 2, conc-start: 2, conc-end: 512 }
+    - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 }
+    - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
+    - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
+    - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
 
 minimaxm2.5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: mi355x
   precision: fp8
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 256 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 256 }
-      - { tp: 4, conc-start: 4, conc-end: 256 }
-
-minimaxm2.5-fp4-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: amd/MiniMax-M2.5-MXFP4
-  model-prefix: minimaxm2.5
-  runner: mi355x
-  precision: fp4
-  framework: atom
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 1024 }
-      - { tp: 2, conc-start: 4, conc-end: 1024 }
-      - { tp: 4, conc-start: 4, conc-end: 128 }
-      - { tp: 8, conc-start: 4, conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 1024 }
-      - { tp: 2, conc-start: 4, conc-end: 1024 }
-      - { tp: 4, conc-start: 4, conc-end: 128 }
-      - { tp: 8, conc-start: 4, conc-end: 16 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 128 }
+    - { tp: 4, conc-start: 4, conc-end: 128 }
+    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 128 }
+    - { tp: 4, conc-start: 4, conc-end: 128 }
+    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
 
 minimaxm2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.1
@@ -644,20 +557,19 @@ minimaxm2.5-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 32 }
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 32 }
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 32 }
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 32 }
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.16.0
@@ -667,18 +579,17 @@ minimaxm2.5-fp8-mi300x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -688,18 +599,17 @@ minimaxm2.5-fp8-mi325x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
 
 gptoss-fp4-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
@@ -709,22 +619,21 @@ gptoss-fp4-mi300x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 64, conc-end: 256 }
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-      - { tp: 8, conc-start: 1, conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 64 }
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-      - { tp: 8, conc-start: 1, conc-end: 16 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 64, conc-end: 256 }
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+    - { tp: 8, conc-start: 1, conc-end: 16 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 64 }
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+    - { tp: 8, conc-start: 1, conc-end: 16 }
 
 gptoss-fp4-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
@@ -734,22 +643,21 @@ gptoss-fp4-mi325x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 64 }
-      - { tp: 2, conc-start: 4, conc-end: 64 }
-      - { tp: 4, conc-start: 4, conc-end: 64 }
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 64 }
-      - { tp: 2, conc-start: 4, conc-end: 8 }
-      - { tp: 4, conc-start: 4, conc-end: 8 }
-      - { tp: 8, conc-start: 4, conc-end: 16 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 64 }
+    - { tp: 2, conc-start: 4, conc-end: 64 }
+    - { tp: 4, conc-start: 4, conc-end: 64 }
+    - { tp: 8, conc-start: 4, conc-end: 64 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 64 }
+    - { tp: 2, conc-start: 4, conc-end: 8 }
+    - { tp: 4, conc-start: 4, conc-end: 8 }
+    - { tp: 8, conc-start: 4, conc-end: 16 }
 
 gptoss-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
@@ -759,41 +667,39 @@ gptoss-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 128 }
-      - { tp: 4, conc-start: 4, conc-end: 8 }
-      - { tp: 8, conc-start: 4, conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 128 }
-      - { tp: 4, conc-start: 4, conc-end: 4 }
-      - { tp: 8, conc-start: 4, conc-end: 8 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 128 }
+    - { tp: 4, conc-start: 4, conc-end: 8 }
+    - { tp: 8, conc-start: 4, conc-end: 16 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 128 }
+    - { tp: 4, conc-start: 4, conc-end: 4 }
+    - { tp: 8, conc-start: 4, conc-end: 8 }
 
 gptoss-fp4-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: mi355x
   precision: fp4
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 16, conc-end: 256 }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 1, conc-start: 4, conc-end: 256 }
-      - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 16, conc-end: 128 }
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 1, conc-start: 4, conc-end: 128 }
+    - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
 
 dsr1-fp8-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -804,16 +710,15 @@ dsr1-fp8-mi355x-atom:
   # WIP framework (no customers yet)
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 128 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 128 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 128 }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 128 }
 
 dsr1-fp8-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -823,16 +728,15 @@ dsr1-fp8-mi355x-atom-mtp:
   precision: fp8
   framework: atom
   multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  - isl: 8192
+    osl: 1024
+    search-space:
+    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
 
 dsr1-fp8-mi355x-sglang-disagg:
   image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2
@@ -843,151 +747,150 @@ dsr1-fp8-mi355x-sglang-disagg:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      # non-MTP configurations
-      # "Top of curve" (1 prefill workers each at DEP8 and 1 decode workers at DEP16)
-      - spec-decoding: "none"
-        conc-list: [ 1024, 2048 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      # "Middle of curve" (1 prefill workers each at TP8 and 2 decode workers at DEP8)
-      - spec-decoding: "none"
-        conc-list: [ 1536, 1024, 512 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-
-      # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
-      - spec-decoding: "none"
-        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      - spec-decoding: "none"
-        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
-
-    - isl: 8192
-      osl: 1024
-      search-space:
-      # non-MTP configurations
-      # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
-      - spec-decoding: "none"
-        conc-list: [ 1024, 2048 ]
-        prefill:
-          num-worker: 2
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=2"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
-
-      # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
-      - spec-decoding: "none"
-        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      - spec-decoding: "none"
-        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    # non-MTP configurations
+    # "Top of curve" (1 prefill workers each at DEP8 and 1 decode workers at DEP16)
+    - spec-decoding: "none"
+      conc-list: [ 1024, 2048 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    # "Middle of curve" (1 prefill workers each at TP8 and 2 decode workers at DEP8)
+    - spec-decoding: "none"
+      conc-list: [ 1536, 1024, 512 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+
+    # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
+    - spec-decoding: "none"
+      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    - spec-decoding: "none"
+      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
+
+  - isl: 8192
+    osl: 1024
+    search-space:
+    # non-MTP configurations
+    # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
+    - spec-decoding: "none"
+      conc-list: [ 1024, 2048 ]
+      prefill:
+        num-worker: 2
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=2"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
+
+    # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
+    - spec-decoding: "none"
+      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    - spec-decoding: "none"
+      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
 
 
 dsr1-fp8-mi355x-sglang-disagg-mtp:
@@ -999,151 +902,150 @@ dsr1-fp8-mi355x-sglang-disagg-mtp:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      # MTP configurations
-      # "Top of curve" (1 prefill worker at DEP8 and 1 decode worker at DEP16)
-      - spec-decoding: "mtp"
-        conc-list: [ 1024, 2048 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=1"
-
-      # "Middle of curve" (1 prefill worker at TP8 and 2 decode workers each at DEP8)
-      - spec-decoding: "mtp"
-        conc-list: [ 1536, 1024, 512, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=1"
-
-
-      # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
-      - spec-decoding: "mtp"
-        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=2"
-
-      - spec-decoding: "mtp"
-        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=2"
-
-    - isl: 8192
-      osl: 1024
-      search-space:
-      # MTP configurations
-      # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
-      - spec-decoding: "mtp"
-        conc-list: [ 1024, 2048 ]
-        prefill:
-          num-worker: 2
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=2"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=1"
-
-      # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
-      - spec-decoding: "mtp"
-        conc-list: [ 256, 128, 64, 32, 16, 8, 4, 2 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=2"
-
-      - spec-decoding: "mtp"
-        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=2"
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    # MTP configurations
+    # "Top of curve" (1 prefill worker at DEP8 and 1 decode worker at DEP16)
+    - spec-decoding: "mtp"
+      conc-list: [ 1024, 2048 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=1"
+
+    # "Middle of curve" (1 prefill worker at TP8 and 2 decode workers each at DEP8)
+    - spec-decoding: "mtp"
+      conc-list: [ 1536, 1024, 512, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=1"
+
+
+    # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
+    - spec-decoding: "mtp"
+      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=2"
+
+    - spec-decoding: "mtp"
+      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=2"
+
+  - isl: 8192
+    osl: 1024
+    search-space:
+    # MTP configurations
+    # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
+    - spec-decoding: "mtp"
+      conc-list: [ 1024, 2048 ]
+      prefill:
+        num-worker: 2
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=2"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=1"
+
+    # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
+    - spec-decoding: "mtp"
+      conc-list: [ 256, 128, 64, 32, 16, 8, 4, 2 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=2"
+
+    - spec-decoding: "mtp"
+      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=2"
 
 
 dsr1-fp4-mi355x-sglang-disagg:
@@ -1155,205 +1057,204 @@ dsr1-fp4-mi355x-sglang-disagg:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      # non-MTP configurations
-      # 1P1D TP8
-      - spec-decoding: "none"
-        conc-list: [ 1, 2, 4, 8 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
-
-      # 1P2D TP8
-      - spec-decoding: "none"
-        conc-list: [ 2, 4, 8, 16, 32 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      # 1P2D TP8
-      - spec-decoding: "none" 
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      # 1P2D TP4
-      - spec-decoding: "none" 
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    # non-MTP configurations
+    # 1P1D TP8
+    - spec-decoding: "none"
+      conc-list: [ 1, 2, 4, 8 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
+
+    # 1P2D TP8
+    - spec-decoding: "none"
+      conc-list: [ 2, 4, 8, 16, 32 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    # 1P2D TP8
+    - spec-decoding: "none" 
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    # 1P2D TP4
+    - spec-decoding: "none" 
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
     
-      # 1*DEP4+ 1*DEP8
-      - spec-decoding: "none"
-        conc-list: [ 1024, 2048 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 4
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
-
-    - isl: 8192
-      osl: 1024
-      search-space:
-      # non-MTP configurations
-      # 1P1D pure TP8
-      - spec-decoding: "none"
-        conc-list: [ 1, 2, 4, 8 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
-
-      # 1P2D TP8
-      - spec-decoding: "none"
-        conc-list: [ 2, 4, 8, 16, 32 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      # 1P2D TP8
-      - spec-decoding: "none"
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      # 1P2D TP4
-      - spec-decoding: "none"
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=0"
-
-      # 4*DEP4 + 1*DEP8
-      - spec-decoding: "none"
-        conc-list: [ 1024, 2048, 4096 ]
-        prefill:
-          num-worker: 4
-          tp: 4
-          ep: 4
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=4"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=0"
+    # 1*DEP4+ 1*DEP8
+    - spec-decoding: "none"
+      conc-list: [ 1024, 2048 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
+
+  - isl: 8192
+    osl: 1024
+    search-space:
+    # non-MTP configurations
+    # 1P1D pure TP8
+    - spec-decoding: "none"
+      conc-list: [ 1, 2, 4, 8 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
+
+    # 1P2D TP8
+    - spec-decoding: "none"
+      conc-list: [ 2, 4, 8, 16, 32 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    # 1P2D TP8
+    - spec-decoding: "none"
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    # 1P2D TP4
+    - spec-decoding: "none"
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=0"
+
+    # 4*DEP4 + 1*DEP8
+    - spec-decoding: "none"
+      conc-list: [ 1024, 2048, 4096 ]
+      prefill:
+        num-worker: 4
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=4"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=0"
 
 dsr1-fp4-mi355x-sglang-disagg-mtp:
   image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
@@ -1364,299 +1265,203 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      # MTP configurations
-      # 1P1D TP8
-      - spec-decoding: "mtp"
-        conc-list: [ 1, 2, 4, 8 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=3"
-
-      # 1P2D TP8
-      - spec-decoding: "mtp" 
-        conc-list: [ 2, 4, 8, 16, 32 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=3"
-
-      # 1P2D TP8
-      - spec-decoding: "mtp" 
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=1"
-
-      # 1P2D TP4
-      - spec-decoding: "mtp" 
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=1"
-
-      # 1*DEP4+ 1*DEP8
-      - spec-decoding: "mtp"
-        conc-list: [ 1024, 2048 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 4
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=1"
-
-
-    - isl: 8192
-      osl: 1024
-      search-space:
-      # MTP configurations
-      # 1P1D pure TP8
-      - spec-decoding: "mtp"
-        conc-list: [ 1, 2, 4, 8 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=3"
-
-
-      # 1P2D TP8
-      - spec-decoding: "mtp"
-        conc-list: [ 2, 4, 8, 16, 32 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=3"
-
-      # 1P2D TP8
-      - spec-decoding: "mtp"
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=1"
-
-      # 1P2D TP4
-      - spec-decoding: "mtp"
-        conc-list: [ 64, 128, 256 ]
-        prefill:
-          num-worker: 1
-          tp: 4
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "PREFILL_NODES=1"
-        decode:
-          num-worker: 2
-          tp: 8
-          ep: 1
-          dp-attn: false
-          additional-settings:
-          - "DECODE_NODES=2"
-          - "DECODE_MTP_SIZE=1"
-
-      # 4*DEP4 + 1*DEP8
-      - spec-decoding: "mtp"
-        conc-list: [ 1024, 2048, 4096 ]
-        prefill:
-          num-worker: 4
-          tp: 4
-          ep: 4
-          dp-attn: true
-          additional-settings:
-          - "PREFILL_NODES=4"
-        decode:
-          num-worker: 1
-          tp: 8
-          ep: 8
-          dp-attn: true
-          additional-settings:
-          - "DECODE_NODES=1"
-          - "DECODE_MTP_SIZE=1"
-
-dsv4-fp8-mi355x-sglang:
-  image: rocm/sgl-dev:deepseek-v4-mi35x
-  model: sgl-project/DeepSeek-V4-Pro-FP8
-  model-prefix: dsv4
-  runner: mi355x
-  precision: fp8
-  framework: sglang
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 4, conc-end: 64 }
-
-# DSv4-Pro FP4 on MI355X via SGLang. Uses a rocm720 mi35x image built off the
-# amd/deepseek_v4 branch in sgl-project/sglang; the SHA is encoded in the
-# image tag, so bumping sglang is just an image tag bump here. Sweeps
-# DP-attention on/off and EP=8.
-dsv4-fp4-mi355x-sglang:
-  image: rocm/sgl-dev:rocm720-mi35x-a8410de-20260502-DSv4
-  model: deepseek-ai/DeepSeek-V4-Pro
-  model-prefix: dsv4
-  runner: mi355x
-  precision: fp4
-  framework: sglang
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, dp-attn: true, conc-start: 16, conc-end: 256 }
-      - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 16 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, dp-attn: true, conc-start: 16, conc-end: 128 }
-      - { tp: 8, dp-attn: false, conc-start: 1, conc-end: 16 }
-
-# vLLM with AITER MLA decode for DSv4 on MI355X (vllm-project/vllm#40889,
-# stacked on #40871). Uses the ATOM MI355X image (ROCm 7.2.2, aiter with
-# MLA decode, MI355X GPU detection); vLLM is rebuilt from the PR branch
-# at runtime by benchmarks/single_node/dsv4_fp8_mi355x_vllm.sh at a
-# pinned SHA. Once both PRs merge into a release, switch to a vLLM ROCm
-# MI355X image and remove the build step.
-dsv4-fp8-mi355x-vllm:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: deepseek-ai/DeepSeek-V4-Pro
-  model-prefix: dsv4
-  runner: mi355x
-  precision: fp8
-  framework: vllm
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 1, conc-end: 1 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, conc-start: 1, conc-end: 1 }
-
-# Day-0 single-sequence marker for DeepSeek-V4 on ATOM (ROCm/ATOM#650).
-# PR1 of the ATOM DSv4 series still uses torch sparse-attention fallbacks
-# that OOM once warmup/prefill batches multiple requests; keep CONC=1 until
-# the AITER sparse-attention kernel / multi-request path lands upstream.
-# --enforce-eager and ATOM_USE_TRITON_MOE=1 are required on gfx950. Image is
-# the standard atom0.1.2.post MI355X base (matching qwen3.5-fp8-mi355x-atom);
-# the DSv4 PR is overlaid at runtime by dsv4_fp4_mi355x_atom.sh at a pinned SHA.
-dsv4-fp4-mi355x-atom:
-  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
-  model: deepseek-ai/DeepSeek-V4-Pro
-  model-prefix: dsv4
-  runner: mi355x
-  precision: fp4
-  framework: atom
-  multinode: false
-  scenarios:
-    fixed-seq-len:
-    - isl: 1024
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
-    - isl: 8192
-      osl: 1024
-      search-space:
-      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    # MTP configurations
+    # 1P1D TP8
+    - spec-decoding: "mtp"
+      conc-list: [ 1, 2, 4, 8 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=3"
+
+    # 1P2D TP8
+    - spec-decoding: "mtp" 
+      conc-list: [ 2, 4, 8, 16, 32 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=3"
+
+    # 1P2D TP8
+    - spec-decoding: "mtp" 
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=1"
+
+    # 1P2D TP4
+    - spec-decoding: "mtp" 
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=1"
+
+    # 1*DEP4+ 1*DEP8
+    - spec-decoding: "mtp"
+      conc-list: [ 1024, 2048 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=1"
+
+
+  - isl: 8192
+    osl: 1024
+    search-space:
+    # MTP configurations
+    # 1P1D pure TP8
+    - spec-decoding: "mtp"
+      conc-list: [ 1, 2, 4, 8 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=3"
+
+
+    # 1P2D TP8
+    - spec-decoding: "mtp"
+      conc-list: [ 2, 4, 8, 16, 32 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=3"
+
+    # 1P2D TP8
+    - spec-decoding: "mtp"
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=1"
+
+    # 1P2D TP4
+    - spec-decoding: "mtp"
+      conc-list: [ 64, 128, 256 ]
+      prefill:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+      decode:
+        num-worker: 2
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MTP_SIZE=1"
+
+    # 4*DEP4 + 1*DEP8
+    - spec-decoding: "mtp"
+      conc-list: [ 1024, 2048, 4096 ]
+      prefill:
+        num-worker: 4
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "PREFILL_NODES=4"
+      decode:
+        num-worker: 1
+        tp: 8
+        ep: 8
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MTP_SIZE=1"
diff --git a/benchmarks/single_node/kimik2.5_int4_mi300x_mtp.sh b/benchmarks/single_node/kimik2.5_int4_mi300x_mtp.sh
new file mode 100644
index 000000000..c32ed1ca4
--- /dev/null
+++ b/benchmarks/single_node/kimik2.5_int4_mi300x_mtp.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+source "$(dirname "$0")/../benchmark_lib.sh"
+
+check_env_vars \
+    MODEL \
+    TP \
+    CONC \
+    ISL \
+    OSL \
+    MAX_MODEL_LEN \
+    RANDOM_RANGE_RATIO \
+    RESULT_FILENAME
+
+if [[ -n "$SLURM_JOB_ID" ]]; then
+  echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
+fi
+
+hf download "$MODEL"
+
+# Set HIP_VISIBLE_DEVICES to match ROCR_VISIBLE_DEVICES for Ray compatibility in vLLM 0.14+
+if [ -n "$ROCR_VISIBLE_DEVICES" ]; then
+    export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES"
+fi
+
+SERVER_LOG=/workspace/server.log
+PORT=${PORT:-8888}
+
+if [ "${EVAL_ONLY}" = "true" ]; then
+    setup_eval_context
+    MAX_MODEL_LEN="$EVAL_MAX_MODEL_LEN"
+fi
+# Start GPU monitoring (power, temperature, clocks every second)
+start_gpu_monitor
+
+echo "Ensuring host benchmark dependencies (aiohttp/transformers/numpy/tqdm/huggingface-hub/tiktoken)..."
+python3 -m pip install --no-cache-dir aiohttp transformers numpy tqdm huggingface-hub tiktoken
+
+
+
+set -x
+export VLLM_USE_V2_MODEL_RUNNER=1 
+export VLLM_ROCM_USE_AITER=1
+export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export VLLM_ROCM_USE_AITER_RMSNORM=0
+export VLLM_SPEC_CONFIG="${VLLM_SPEC_CONFIG:-{\"model\": \"nvidia/Kimi-K2.5-Thinking-Eagle3\", \"method\": \"eagle3\", \"num_speculative_tokens\": 3}}"
+
+vllm serve $MODEL --port $PORT \
+--tensor-parallel-size=$TP \
+--gpu-memory-utilization 0.9 \
+--max-model-len $MAX_MODEL_LEN \
+--block-size=64 \
+--trust-remote-code \
+--no-enable-prefix-caching \
+--max-num-seqs 256 \
+--speculative-config "$VLLM_SPEC_CONFIG" \
+--mm-encoder-tp-mode data > $SERVER_LOG 2>&1 &
+
+SERVER_PID=$!
+
+# Wait for server to be ready
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+run_benchmark_serving \
+    --model "$MODEL" \
+    --port "$PORT" \
+    --backend vllm \
+    --input-len "$ISL" \
+    --output-len "$OSL" \
+    --random-range-ratio "$RANDOM_RANGE_RATIO" \
+    --num-prompts "$((CONC * 10))" \
+    --max-concurrency "$CONC" \
+    --result-filename "$RESULT_FILENAME" \
+    --result-dir /workspace/ \
+    --trust-remote-code
+
+# After throughput, run evaluation only if RUN_EVAL is true
+if [ "${RUN_EVAL}" = "true" ]; then
+    run_eval --framework lm-eval --port "$PORT"
+    append_lm_eval_summary
+fi
+
+# Stop GPU monitoring
+stop_gpu_monitor
+set +x
+
diff --git a/benchmarks/single_node/kimik2.5_int4_mi325x_mtp.sh b/benchmarks/single_node/kimik2.5_int4_mi325x_mtp.sh
new file mode 100644
index 000000000..c32ed1ca4
--- /dev/null
+++ b/benchmarks/single_node/kimik2.5_int4_mi325x_mtp.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+source "$(dirname "$0")/../benchmark_lib.sh"
+
+check_env_vars \
+    MODEL \
+    TP \
+    CONC \
+    ISL \
+    OSL \
+    MAX_MODEL_LEN \
+    RANDOM_RANGE_RATIO \
+    RESULT_FILENAME
+
+if [[ -n "$SLURM_JOB_ID" ]]; then
+  echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
+fi
+
+hf download "$MODEL"
+
+# Set HIP_VISIBLE_DEVICES to match ROCR_VISIBLE_DEVICES for Ray compatibility in vLLM 0.14+
+if [ -n "$ROCR_VISIBLE_DEVICES" ]; then
+    export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES"
+fi
+
+SERVER_LOG=/workspace/server.log
+PORT=${PORT:-8888}
+
+if [ "${EVAL_ONLY}" = "true" ]; then
+    setup_eval_context
+    MAX_MODEL_LEN="$EVAL_MAX_MODEL_LEN"
+fi
+# Start GPU monitoring (power, temperature, clocks every second)
+start_gpu_monitor
+
+echo "Ensuring host benchmark dependencies (aiohttp/transformers/numpy/tqdm/huggingface-hub/tiktoken)..."
+python3 -m pip install --no-cache-dir aiohttp transformers numpy tqdm huggingface-hub tiktoken
+
+
+
+set -x
+export VLLM_USE_V2_MODEL_RUNNER=1 
+export VLLM_ROCM_USE_AITER=1
+export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export VLLM_ROCM_USE_AITER_RMSNORM=0
+export VLLM_SPEC_CONFIG="${VLLM_SPEC_CONFIG:-{\"model\": \"nvidia/Kimi-K2.5-Thinking-Eagle3\", \"method\": \"eagle3\", \"num_speculative_tokens\": 3}}"
+
+vllm serve $MODEL --port $PORT \
+--tensor-parallel-size=$TP \
+--gpu-memory-utilization 0.9 \
+--max-model-len $MAX_MODEL_LEN \
+--block-size=64 \
+--trust-remote-code \
+--no-enable-prefix-caching \
+--max-num-seqs 256 \
+--speculative-config "$VLLM_SPEC_CONFIG" \
+--mm-encoder-tp-mode data > $SERVER_LOG 2>&1 &
+
+SERVER_PID=$!
+
+# Wait for server to be ready
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+run_benchmark_serving \
+    --model "$MODEL" \
+    --port "$PORT" \
+    --backend vllm \
+    --input-len "$ISL" \
+    --output-len "$OSL" \
+    --random-range-ratio "$RANDOM_RANGE_RATIO" \
+    --num-prompts "$((CONC * 10))" \
+    --max-concurrency "$CONC" \
+    --result-filename "$RESULT_FILENAME" \
+    --result-dir /workspace/ \
+    --trust-remote-code
+
+# After throughput, run evaluation only if RUN_EVAL is true
+if [ "${RUN_EVAL}" = "true" ]; then
+    run_eval --framework lm-eval --port "$PORT"
+    append_lm_eval_summary
+fi
+
+# Stop GPU monitoring
+stop_gpu_monitor
+set +x
+
diff --git a/benchmarks/single_node/kimik2.5_int4_mi355x_mtp.sh b/benchmarks/single_node/kimik2.5_int4_mi355x_mtp.sh
new file mode 100644
index 000000000..c32ed1ca4
--- /dev/null
+++ b/benchmarks/single_node/kimik2.5_int4_mi355x_mtp.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+source "$(dirname "$0")/../benchmark_lib.sh"
+
+check_env_vars \
+    MODEL \
+    TP \
+    CONC \
+    ISL \
+    OSL \
+    MAX_MODEL_LEN \
+    RANDOM_RANGE_RATIO \
+    RESULT_FILENAME
+
+if [[ -n "$SLURM_JOB_ID" ]]; then
+  echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
+fi
+
+hf download "$MODEL"
+
+# Set HIP_VISIBLE_DEVICES to match ROCR_VISIBLE_DEVICES for Ray compatibility in vLLM 0.14+
+if [ -n "$ROCR_VISIBLE_DEVICES" ]; then
+    export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES"
+fi
+
+SERVER_LOG=/workspace/server.log
+PORT=${PORT:-8888}
+
+if [ "${EVAL_ONLY}" = "true" ]; then
+    setup_eval_context
+    MAX_MODEL_LEN="$EVAL_MAX_MODEL_LEN"
+fi
+# Start GPU monitoring (power, temperature, clocks every second)
+start_gpu_monitor
+
+echo "Ensuring host benchmark dependencies (aiohttp/transformers/numpy/tqdm/huggingface-hub/tiktoken)..."
+python3 -m pip install --no-cache-dir aiohttp transformers numpy tqdm huggingface-hub tiktoken
+
+
+
+set -x
+export VLLM_USE_V2_MODEL_RUNNER=1 
+export VLLM_ROCM_USE_AITER=1
+export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4
+export VLLM_ROCM_USE_AITER_RMSNORM=0
+export VLLM_SPEC_CONFIG="${VLLM_SPEC_CONFIG:-{\"model\": \"nvidia/Kimi-K2.5-Thinking-Eagle3\", \"method\": \"eagle3\", \"num_speculative_tokens\": 3}}"
+
+vllm serve $MODEL --port $PORT \
+--tensor-parallel-size=$TP \
+--gpu-memory-utilization 0.9 \
+--max-model-len $MAX_MODEL_LEN \
+--block-size=64 \
+--trust-remote-code \
+--no-enable-prefix-caching \
+--max-num-seqs 256 \
+--speculative-config "$VLLM_SPEC_CONFIG" \
+--mm-encoder-tp-mode data > $SERVER_LOG 2>&1 &
+
+SERVER_PID=$!
+
+# Wait for server to be ready
+wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID"
+
+run_benchmark_serving \
+    --model "$MODEL" \
+    --port "$PORT" \
+    --backend vllm \
+    --input-len "$ISL" \
+    --output-len "$OSL" \
+    --random-range-ratio "$RANDOM_RANGE_RATIO" \
+    --num-prompts "$((CONC * 10))" \
+    --max-concurrency "$CONC" \
+    --result-filename "$RESULT_FILENAME" \
+    --result-dir /workspace/ \
+    --trust-remote-code
+
+# After throughput, run evaluation only if RUN_EVAL is true
+if [ "${RUN_EVAL}" = "true" ]; then
+    run_eval --framework lm-eval --port "$PORT"
+    append_lm_eval_summary
+fi
+
+# Stop GPU monitoring
+stop_gpu_monitor
+set +x
+

From f696ca8a44de252bc2480ceb1cf550c341c3ea22 Mon Sep 17 00:00:00 2001
From: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
Date: Tue, 5 May 2026 22:11:13 -0500
Subject: [PATCH 2/4] Update kimi k2.5 mtp support by using modelrunnerV2 and
 vllm v0.20.1

Signed-off-by: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
---
 .github/configs/amd-master.yaml | 2365 +++++++++++++++++--------------
 1 file changed, 1307 insertions(+), 1058 deletions(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index b8b84d535..d5ee5d664 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -6,16 +6,21 @@ dsr1-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    agentic-coding:
+    - duration: 1800
+      search-space:
+      - { tp: 8, offloading: none, conc-list: [1, 2, 4, 8, 12, 16, 32, 64, 128, 256] }
 
 dsr1-fp4-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -25,17 +30,18 @@ dsr1-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
 
 dsr1-fp4-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.0-ubuntu24.04-pytorch2.9-atom0.1.1
@@ -46,17 +52,18 @@ dsr1-fp4-mi355x-atom-mtp:
   # WIP framework (no customers yet)
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      #- { tp: 4, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
@@ -66,15 +73,16 @@ dsr1-fp8-mi300x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
@@ -84,15 +92,16 @@ dsr1-fp8-mi325x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-mi355x-sglang:
   image: lmsysorg/sglang:v0.5.9-rocm700-mi35x
@@ -102,16 +111,17 @@ dsr1-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 4, conc-start: 32, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 32, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -121,15 +131,16 @@ qwen3.5-bf16-mi355x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
 
 qwen3.5-bf16-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -139,15 +150,16 @@ qwen3.5-bf16-mi355x-sglang-mtp:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-bf16-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -157,15 +169,16 @@ qwen3.5-bf16-mi300x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-bf16-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -175,15 +188,16 @@ qwen3.5-bf16-mi325x-sglang:
   precision: bf16
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-mi325x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -193,15 +207,16 @@ qwen3.5-fp8-mi325x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414
@@ -211,18 +226,19 @@ qwen3.5-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 }
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 8, ep: 8, conc-start: 64, conc-end: 256 }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 32 }
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256 }
 
 qwen3.5-fp8-mi355x-sglang-mtp:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260414
@@ -232,18 +248,63 @@ qwen3.5-fp8-mi355x-sglang-mtp:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-    - { tp: 8, ep: 8, conc-start: 64, conc-end: 256, spec-decoding: mtp }
-    - { tp: 2, ep: 2, conc-start: 128, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 4, conc-end: 32, spec-decoding: mtp }
-    - { tp: 4, ep: 1, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+      - { tp: 8, ep: 8, conc-start: 64, conc-end: 256, spec-decoding: mtp }
+      - { tp: 2, ep: 2, conc-start: 128, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 4, conc-end: 32, spec-decoding: mtp }
+      - { tp: 4, ep: 1, conc-start: 32, conc-end: 256, spec-decoding: mtp }
+
+qwen3.5-fp8-mi355x-atom:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: Qwen/Qwen3.5-397B-A17B-FP8
+  model-prefix: qwen3.5
+  runner: mi355x
+  precision: fp8
+  framework: atom
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 }
+
+qwen3.5-fp8-mi355x-atom-mtp:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: Qwen/Qwen3.5-397B-A17B-FP8
+  model-prefix: qwen3.5
+  runner: mi355x
+  precision: fp8
+  framework: atom
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 qwen3.5-fp4-mi355x-sglang:
   image: rocm/sgl-dev:v0.5.10rc0-rocm720-mi35x-20260413
@@ -253,17 +314,39 @@ qwen3.5-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+
+qwen3.5-fp4-mi355x-atom:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: amd/Qwen3.5-397B-A17B-MXFP4
+  model-prefix: qwen3.5
+  runner: mi355x
+  precision: fp4
+  framework: atom
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
 
 qwen3.5-fp8-mi300x-sglang:
   image: lmsysorg/sglang:v0.5.10-rocm720-mi30x
@@ -273,15 +356,16 @@ qwen3.5-fp8-mi300x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 glm5-fp8-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413
@@ -291,51 +375,58 @@ glm5-fp8-mi355x-sglang:
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 glm5-fp8-mi355x-sglang-mtp:
-  image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260413
+  image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: mi355x
   precision: fp8
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 8, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 128, spec-decoding: mtp }
+      - { tp: 8, conc-start: 4, conc-end: 8, spec-decoding: mtp }
 
 glm5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2.post
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
   runner: mi355x
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256 }
+      - { tp: 8, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256 }
+      - { tp: 8, conc-start: 4, conc-end: 256 }
 
 glm5.1-fp4-mi355x-sglang:
   image: lmsysorg/sglang-rocm:v0.5.10rc0-rocm720-mi35x-20260415
@@ -345,17 +436,37 @@ glm5.1-fp4-mi355x-sglang:
   precision: fp4
   framework: sglang
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 256 }
-    - { tp: 4, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 16 }
+
+glm5.1-fp4-mi355x-atom:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: amd/GLM-5.1-MXFP4
+  model-prefix: glm5.1
+  runner: mi355x
+  precision: fp4
+  framework: atom
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 4, conc-start: 4, conc-end: 256 }
 
 kimik2.5-int4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -365,15 +476,16 @@ kimik2.5-int4-mi355x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -383,15 +495,16 @@ kimik2.5-int4-mi325x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -401,15 +514,16 @@ kimik2.5-int4-mi300x-vllm:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
 
 kimik2.5-int4-mi355x-vllm-mtp:
   image: vllm/vllm-openai-rocm:v0.20.1
@@ -473,17 +587,18 @@ kimik2.5-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
 
 kimik2.5-fp4-mi355x-atom:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -493,17 +608,18 @@ kimik2.5-fp4-mi355x-atom:
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
 
 minimaxm2.5-fp8-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.0
@@ -513,41 +629,66 @@ minimaxm2.5-fp8-mi355x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 2, conc-end: 512 }
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 }
-    - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
-    - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
-    - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 2, conc-end: 512 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, ep: 2, conc-start: 2, conc-end: 256 }
+      - { tp: 4, ep: 4, conc-start: 4, conc-end: 512 }
+      - { tp: 8, ep: 8, conc-start: 2, conc-end: 2 }
 
 minimaxm2.5-fp8-mi355x-atom:
-  image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
   model: MiniMaxAI/MiniMax-M2.5
   model-prefix: minimaxm2.5
   runner: mi355x
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 8, conc-start: 32, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 256 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 256 }
+      - { tp: 4, conc-start: 4, conc-end: 256 }
+
+minimaxm2.5-fp4-mi355x-atom:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: amd/MiniMax-M2.5-MXFP4
+  model-prefix: minimaxm2.5
+  runner: mi355x
+  precision: fp4
+  framework: atom
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 1024 }
+      - { tp: 2, conc-start: 4, conc-end: 1024 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 1024 }
+      - { tp: 2, conc-start: 4, conc-end: 1024 }
+      - { tp: 4, conc-start: 4, conc-end: 128 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
 
 minimaxm2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.19.1
@@ -557,19 +698,20 @@ minimaxm2.5-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 32 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 32 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.16.0
@@ -579,17 +721,18 @@ minimaxm2.5-fp8-mi300x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
 
 minimaxm2.5-fp8-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
@@ -599,17 +742,18 @@ minimaxm2.5-fp8-mi325x-vllm:
   precision: fp8
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 512 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 }
 
 gptoss-fp4-mi300x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
@@ -619,21 +763,22 @@ gptoss-fp4-mi300x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 64, conc-end: 256 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 1, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 1, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 64, conc-end: 256 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 1, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 1, conc-end: 16 }
 
 gptoss-fp4-mi325x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
@@ -643,21 +788,22 @@ gptoss-fp4-mi325x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 64 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
-    - { tp: 8, conc-start: 4, conc-end: 64 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 64 }
-    - { tp: 2, conc-start: 4, conc-end: 8 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
-    - { tp: 8, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 64 }
+      - { tp: 4, conc-start: 4, conc-end: 64 }
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 64 }
+      - { tp: 2, conc-start: 4, conc-end: 8 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
 
 gptoss-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.17.0
@@ -667,39 +813,41 @@ gptoss-fp4-mi355x-vllm:
   precision: fp4
   framework: vllm
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 8 }
-    - { tp: 8, conc-start: 4, conc-end: 16 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 4, conc-start: 4, conc-end: 4 }
-    - { tp: 8, conc-start: 4, conc-end: 8 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 8 }
+      - { tp: 8, conc-start: 4, conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 128 }
+      - { tp: 4, conc-start: 4, conc-end: 4 }
+      - { tp: 8, conc-start: 4, conc-end: 8 }
 
 gptoss-fp4-mi355x-atom:
-  image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
   model: openai/gpt-oss-120b
   model-prefix: gptoss
   runner: mi355x
   precision: fp4
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 16, conc-end: 128 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 1, conc-start: 4, conc-end: 128 }
-    - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 16, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 32 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 1, conc-start: 4, conc-end: 256 }
+      - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 }
 
 dsr1-fp8-mi355x-atom:
   image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
@@ -710,15 +858,16 @@ dsr1-fp8-mi355x-atom:
   # WIP framework (no customers yet)
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 128 }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 128 }
 
 dsr1-fp8-mi355x-atom-mtp:
   image: rocm/atom:rocm7.2.1-ubuntu24.04-pytorch2.9.1-atom0.1.2
@@ -728,15 +877,16 @@ dsr1-fp8-mi355x-atom-mtp:
   precision: fp8
   framework: atom
   multinode: false
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp  }
 
 dsr1-fp8-mi355x-sglang-disagg:
   image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2
@@ -747,150 +897,151 @@ dsr1-fp8-mi355x-sglang-disagg:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # "Top of curve" (1 prefill workers each at DEP8 and 1 decode workers at DEP16)
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # "Middle of curve" (1 prefill workers each at TP8 and 2 decode workers at DEP8)
-    - spec-decoding: "none"
-      conc-list: [ 1536, 1024, 512 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-
-    # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
-    - spec-decoding: "none"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    - spec-decoding: "none"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=2"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-    # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
-    - spec-decoding: "none"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    - spec-decoding: "none"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # "Top of curve" (1 prefill workers each at DEP8 and 1 decode workers at DEP16)
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # "Middle of curve" (1 prefill workers each at TP8 and 2 decode workers at DEP8)
+      - spec-decoding: "none"
+        conc-list: [ 1536, 1024, 512 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+
+      # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
+      - spec-decoding: "none"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      - spec-decoding: "none"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=2"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+      # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
+      - spec-decoding: "none"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      - spec-decoding: "none"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
 
 
 dsr1-fp8-mi355x-sglang-disagg-mtp:
@@ -902,150 +1053,151 @@ dsr1-fp8-mi355x-sglang-disagg-mtp:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # "Top of curve" (1 prefill worker at DEP8 and 1 decode worker at DEP16)
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # "Middle of curve" (1 prefill worker at TP8 and 2 decode workers each at DEP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 1536, 1024, 512, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-
-    # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=2"
-
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=2"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 2
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=2"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=1"
-
-    # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
-    - spec-decoding: "mtp"
-      conc-list: [ 256, 128, 64, 32, 16, 8, 4, 2 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=2"
-
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=2"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # "Top of curve" (1 prefill worker at DEP8 and 1 decode worker at DEP16)
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # "Middle of curve" (1 prefill worker at TP8 and 2 decode workers each at DEP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 1536, 1024, 512, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+
+      # "Bottom of curve" (1 prefill worker at TEP8 and 2 decode workers at TEP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=2"
+
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=2"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # "Top of curve" (2 prefill worker at DEP8 and 1 decode worker at DEP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 2
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=2"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=1"
+
+      # "Bottom of curve" (1 prefill worker at TP8 and 2 decode workers at TP8)
+      - spec-decoding: "mtp"
+        conc-list: [ 256, 128, 64, 32, 16, 8, 4, 2 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=2"
+
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 32, 16, 8, 4, 2, 1 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=2"
 
 
 dsr1-fp4-mi355x-sglang-disagg:
@@ -1057,204 +1209,205 @@ dsr1-fp4-mi355x-sglang-disagg:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # 1P1D TP8
-    - spec-decoding: "none"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none"
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP4
-    - spec-decoding: "none" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # 1P1D TP8
+      - spec-decoding: "none"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none"
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP4
+      - spec-decoding: "none" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
     
-    # 1*DEP4+ 1*DEP8
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # non-MTP configurations
-    # 1P1D pure TP8
-    - spec-decoding: "none"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none"
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP8
-    - spec-decoding: "none"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 1P2D TP4
-    - spec-decoding: "none"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=0"
-
-    # 4*DEP4 + 1*DEP8
-    - spec-decoding: "none"
-      conc-list: [ 1024, 2048, 4096 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=4"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=0"
+      # 1*DEP4+ 1*DEP8
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # non-MTP configurations
+      # 1P1D pure TP8
+      - spec-decoding: "none"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none"
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP8
+      - spec-decoding: "none"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 1P2D TP4
+      - spec-decoding: "none"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=0"
+
+      # 4*DEP4 + 1*DEP8
+      - spec-decoding: "none"
+        conc-list: [ 1024, 2048, 4096 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=4"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=0"
 
 dsr1-fp4-mi355x-sglang-disagg-mtp:
   image: rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-3
@@ -1265,203 +1418,299 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
   framework: sglang-disagg
   multinode: true
   disagg: true
-  seq-len-configs:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # 1P1D TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=3"
-
-    # 1P2D TP8
-    - spec-decoding: "mtp" 
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=3"
-
-    # 1P2D TP8
-    - spec-decoding: "mtp" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 1P2D TP4
-    - spec-decoding: "mtp" 
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 1*DEP4+ 1*DEP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=1"
-
-
-  - isl: 8192
-    osl: 1024
-    search-space:
-    # MTP configurations
-    # 1P1D pure TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1, 2, 4, 8 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=3"
-
-
-    # 1P2D TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 2, 4, 8, 16, 32 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=3"
-
-    # 1P2D TP8
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 1P2D TP4
-    - spec-decoding: "mtp"
-      conc-list: [ 64, 128, 256 ]
-      prefill:
-        num-worker: 1
-        tp: 4
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "PREFILL_NODES=1"
-      decode:
-        num-worker: 2
-        tp: 8
-        ep: 1
-        dp-attn: false
-        additional-settings:
-        - "DECODE_NODES=2"
-        - "DECODE_MTP_SIZE=1"
-
-    # 4*DEP4 + 1*DEP8
-    - spec-decoding: "mtp"
-      conc-list: [ 1024, 2048, 4096 ]
-      prefill:
-        num-worker: 4
-        tp: 4
-        ep: 4
-        dp-attn: true
-        additional-settings:
-        - "PREFILL_NODES=4"
-      decode:
-        num-worker: 1
-        tp: 8
-        ep: 8
-        dp-attn: true
-        additional-settings:
-        - "DECODE_NODES=1"
-        - "DECODE_MTP_SIZE=1"
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # 1P1D TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=3"
+
+      # 1P2D TP8
+      - spec-decoding: "mtp" 
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=3"
+
+      # 1P2D TP8
+      - spec-decoding: "mtp" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 1P2D TP4
+      - spec-decoding: "mtp" 
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 1*DEP4+ 1*DEP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=1"
+
+
+    - isl: 8192
+      osl: 1024
+      search-space:
+      # MTP configurations
+      # 1P1D pure TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1, 2, 4, 8 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=3"
+
+
+      # 1P2D TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 2, 4, 8, 16, 32 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=3"
+
+      # 1P2D TP8
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 1P2D TP4
+      - spec-decoding: "mtp"
+        conc-list: [ 64, 128, 256 ]
+        prefill:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "PREFILL_NODES=1"
+        decode:
+          num-worker: 2
+          tp: 8
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "DECODE_NODES=2"
+          - "DECODE_MTP_SIZE=1"
+
+      # 4*DEP4 + 1*DEP8
+      - spec-decoding: "mtp"
+        conc-list: [ 1024, 2048, 4096 ]
+        prefill:
+          num-worker: 4
+          tp: 4
+          ep: 4
+          dp-attn: true
+          additional-settings:
+          - "PREFILL_NODES=4"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+          additional-settings:
+          - "DECODE_NODES=1"
+          - "DECODE_MTP_SIZE=1"
+
+dsv4-fp8-mi355x-sglang:
+  image: rocm/sgl-dev:deepseek-v4-mi35x
+  model: sgl-project/DeepSeek-V4-Pro-FP8
+  model-prefix: dsv4
+  runner: mi355x
+  precision: fp8
+  framework: sglang
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64 }
+
+# DSv4-Pro FP4 on MI355X via SGLang. Uses a rocm720 mi35x image built off the
+# amd/deepseek_v4 branch in sgl-project/sglang; the SHA is encoded in the
+# image tag, so bumping sglang is just an image tag bump here. Sweeps
+# DP-attention on/off and EP=8.
+dsv4-fp4-mi355x-sglang:
+  image: rocm/sgl-dev:rocm720-mi35x-a8410de-20260502-DSv4
+  model: deepseek-ai/DeepSeek-V4-Pro
+  model-prefix: dsv4
+  runner: mi355x
+  precision: fp4
+  framework: sglang
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, dp-attn: true, conc-start: 16, conc-end: 256 }
+      - { tp: 8, dp-attn: false, conc-start: 1 , conc-end: 16 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, dp-attn: true, conc-start: 16, conc-end: 128 }
+      - { tp: 8, dp-attn: false, conc-start: 1, conc-end: 16 }
+
+# vLLM with AITER MLA decode for DSv4 on MI355X (vllm-project/vllm#40889,
+# stacked on #40871). Uses the ATOM MI355X image (ROCm 7.2.2, aiter with
+# MLA decode, MI355X GPU detection); vLLM is rebuilt from the PR branch
+# at runtime by benchmarks/single_node/dsv4_fp8_mi355x_vllm.sh at a
+# pinned SHA. Once both PRs merge into a release, switch to a vLLM ROCm
+# MI355X image and remove the build step.
+dsv4-fp8-mi355x-vllm:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: deepseek-ai/DeepSeek-V4-Pro
+  model-prefix: dsv4
+  runner: mi355x
+  precision: fp8
+  framework: vllm
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 1, conc-end: 1 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 1, conc-end: 1 }
+
+# Day-0 single-sequence marker for DeepSeek-V4 on ATOM (ROCm/ATOM#650).
+# PR1 of the ATOM DSv4 series still uses torch sparse-attention fallbacks
+# that OOM once warmup/prefill batches multiple requests; keep CONC=1 until
+# the AITER sparse-attention kernel / multi-request path lands upstream.
+# --enforce-eager and ATOM_USE_TRITON_MOE=1 are required on gfx950. Image is
+# the standard atom0.1.2.post MI355X base (matching qwen3.5-fp8-mi355x-atom);
+# the DSv4 PR is overlaid at runtime by dsv4_fp4_mi355x_atom.sh at a pinned SHA.
+dsv4-fp4-mi355x-atom:
+  image: rocm/atom:rocm7.2.2_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.2.post
+  model: deepseek-ai/DeepSeek-V4-Pro
+  model-prefix: dsv4
+  runner: mi355x
+  precision: fp4
+  framework: atom
+  multinode: false
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, ep: 1, conc-start: 1, conc-end: 1 }

From 512552dd0775361f6a75eb082dc1d790ea7108bf Mon Sep 17 00:00:00 2001
From: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
Date: Tue, 5 May 2026 22:21:07 -0500
Subject: [PATCH 3/4] Update kimi k2.5 mtp support by using modelrunnerV2 and
 vllm v0.20.1

Signed-off-by: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
---
 .github/configs/amd-master.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index d5ee5d664..7c54c5009 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -533,7 +533,8 @@ kimik2.5-int4-mi355x-vllm-mtp:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
+  scenarios:
+    fixed-seq-len:
   - isl: 1024
     osl: 1024
     search-space:
@@ -551,7 +552,8 @@ kimik2.5-int4-mi300x-vllm-mtp:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
+  scenarios:
+    fixed-seq-len:
   - isl: 1024
     osl: 1024
     search-space:
@@ -569,7 +571,8 @@ kimik2.5-int4-mi325x-vllm-mtp:
   precision: int4
   framework: vllm
   multinode: false
-  seq-len-configs:
+  scenarios:
+    fixed-seq-len:
   - isl: 1024
     osl: 1024
     search-space:

From 48b30857db1f07f926e73ef636e7966d43aedc13 Mon Sep 17 00:00:00 2001
From: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
Date: Tue, 5 May 2026 22:39:05 -0500
Subject: [PATCH 4/4] Update kimi k2.5 mtp support by using modelrunnerV2 and
 vllm v0.20.1

Signed-off-by: haic0 <haichzha@gbt350-odcdh2-b05-1.png-odc.dcgpu>
---
 .github/configs/amd-master.yaml | 50 ++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml
index 7c54c5009..7c666fe6d 100644
--- a/.github/configs/amd-master.yaml
+++ b/.github/configs/amd-master.yaml
@@ -535,14 +535,14 @@ kimik2.5-int4-mi355x-vllm-mtp:
   multinode: false
   scenarios:
     fixed-seq-len:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 kimik2.5-int4-mi300x-vllm-mtp:
   image: vllm/vllm-openai-rocm:v0.20.1
@@ -554,14 +554,14 @@ kimik2.5-int4-mi300x-vllm-mtp:
   multinode: false
   scenarios:
     fixed-seq-len:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
 
 kimik2.5-int4-mi325x-vllm-mtp:
   image: vllm/vllm-openai-rocm:v0.20.1
@@ -573,15 +573,15 @@ kimik2.5-int4-mi325x-vllm-mtp:
   multinode: false
   scenarios:
     fixed-seq-len:
-  - isl: 1024
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-  - isl: 8192
-    osl: 1024
-    search-space:
-    - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
-    
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp }
+
 kimik2.5-fp4-mi355x-vllm:
   image: vllm/vllm-openai-rocm:v0.18.0
   model: amd/Kimi-K2.5-MXFP4