SemiAnalysisAI · chunfangamd · Feb 25, 2026 · Feb 25, 2026 · Feb 26, 2026 · Mar 11, 2026
@@ -73,7 +73,7 @@ dsr1-fp4-mi355x-atom-mtp:
     - { tp: 8, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-mi300x-sglang:
-  image: lmsysorg/sglang:v0.5.9-rocm700-mi30x
+  image: lmsysorg/sglang-daily:v0.5.9-rocm720-mi30x-20260310
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: mi300x

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1041,4 +1041,14 @@
     - "Add --exclusive flag to MI355X single-node salloc and multi-node sbatch to prevent node sharing during benchmarks"
     - "Only non-TP8 configs listed; TP8 already uses all GPUs on the node"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/934
+
+- config-keys:
+    - dsr1-fp8-mi300x-sglang
+  description:
+    - "Update MI300X FP8 DSR1 SGLang image from v0.5.8 to lmsysorg/sglang-daily:v0.5.9-rocm720-mi30x-20260310"
+    - "Include configuration files for three GEMM operations: https://github.com/ROCm/aiter/pull/2024"
+    - "Improve TPOT by using fp8 bmm in MLA and MI300X for DSR1/V3: https://github.com/sgl-project/sglang/pull/18624"
+    - "Broaden the optimized paths to all HIP platforms and add tuned FP8 GEMM configs: https://github.com/sgl-project/sglang/pull/18242"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/811
+