From 934cae450b0c7294e07697321e7eb75ad6e4c833 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Tue, 24 Mar 2026 17:14:32 +0800 Subject: [PATCH 1/9] [plugin][oot benchmark] refine the OOT benchmark workflow Signed-off-by: zejunchen-zejun --- .github/benchmark/oot_benchmark_models.json | 10 +- .../workflows/atom-vllm-oot-benchmark.yaml | 132 +++++++----------- 2 files changed, 58 insertions(+), 84 deletions(-) diff --git a/.github/benchmark/oot_benchmark_models.json b/.github/benchmark/oot_benchmark_models.json index e2a54a609..3808e5896 100644 --- a/.github/benchmark/oot_benchmark_models.json +++ b/.github/benchmark/oot_benchmark_models.json @@ -7,7 +7,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { "display": "DeepSeek-R1 MXFP4", @@ -17,7 +17,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { "display": "gpt-oss-120b", @@ -27,7 +27,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "linux-atom-mi355-1", - "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { "display": "Kimi-K2-Thinking-MXFP4 TP4", @@ -38,7 +38,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { "display": "Kimi-K2-Thinking-MXFP4 TP8", @@ -49,6 +49,6 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" } ] diff --git a/.github/workflows/atom-vllm-oot-benchmark.yaml b/.github/workflows/atom-vllm-oot-benchmark.yaml index 321b5d1c4..5c844c07a 100644 --- a/.github/workflows/atom-vllm-oot-benchmark.yaml +++ b/.github/workflows/atom-vllm-oot-benchmark.yaml @@ -43,7 +43,7 @@ on: Example (single set): 1024,1024,128,0.8 Example (multiple sets): 1024,1024,128,0.8;2048,1024,256,0.7" type: string - default: "1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,1024,128,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8;8192,1024,128,0.8" + default: "1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,1024,128,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;1024,8192,128,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8;8192,1024,128,0.8" env: BASE_IMAGE: ${{ inputs.base_image || 'rocm/atom-dev:latest' }} @@ -62,7 +62,7 @@ jobs: - name: Parse parameter lists id: parse-param-lists run: | - PARAM_LISTS="${{ inputs.param_lists || '1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,1024,128,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8;8192,1024,128,0.8' }}" + PARAM_LISTS="${{ inputs.param_lists || '1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,1024,128,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;1024,8192,128,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8;8192,1024,128,0.8' }}" echo "Using param_lists: ${PARAM_LISTS}" printf 'param_lists=%s\n' "${PARAM_LISTS}" >> "$GITHUB_OUTPUT" IFS=';' read -ra SETS <<< "${PARAM_LISTS}" @@ -165,7 +165,7 @@ jobs: docker push "${{ steps.meta.outputs.oot_image_tag }}" benchmark: - name: OOT ${{ matrix.model.display }} + name: OOT ${{ matrix.model.display }} ${{ matrix.params.input_length }}/${{ matrix.params.output_length }} c=${{ matrix.params.concurrency }} needs: [parse-param-lists, load-models, build-oot-image] if: >- always() @@ -176,10 +176,10 @@ jobs: fail-fast: false matrix: model: ${{ fromJson(needs.load-models.outputs.models_json) }} + params: ${{ fromJson(needs.parse-param-lists.outputs.matrix_json) }} runs-on: ${{ matrix.model.runner }} timeout-minutes: 240 env: - PARAM_LISTS: ${{ needs.parse-param-lists.outputs.param_lists }} MODEL_NAME: ${{ matrix.model.display }} DASHBOARD_MODEL_NAME: ${{ matrix.model.dashboard_model || '' }} MODEL_SOURCE_PATH: ${{ matrix.model.source_path || matrix.model.path }} @@ -187,6 +187,11 @@ jobs: OOT_EXTRA_ARGS: ${{ matrix.model.extra_args }} BENCH_EXTRA_ARGS: ${{ matrix.model.bench_args }} RESULT_PREFIX: ${{ matrix.model.prefix }} + ISL: ${{ matrix.params.input_length }} + OSL: ${{ matrix.params.output_length }} + CONC: ${{ matrix.params.concurrency }} + RANDOM_RANGE_RATIO: ${{ matrix.params.random_range_ratio }} + RESULT_FILENAME: ${{ matrix.model.prefix }}-${{ matrix.params.input_length }}-${{ matrix.params.output_length }}-${{ matrix.params.concurrency }}-${{ matrix.params.random_range_ratio }} CONTAINER_NAME: atom_vllm_oot_benchmark_${{ strategy.job-index }} CONTAINER_RESULT_DIR: /tmp/oot-benchmark-results CONTAINER_BENCH_SERVING_DIR: /tmp/oot-benchmark/bench_serving @@ -369,86 +374,55 @@ jobs: TRUST_REMOTE_CODE_ARG="--trust-remote-code" fi - FAIL_COUNT=0 - shopt -s nullglob - rm -f "${RESULT_PREFIX}"-*.json - IFS=';' read -ra SETS <<< "${PARAM_LISTS}" - for SET in "${SETS[@]}"; do - IFS=',' read -ra PARAMS <<< "$SET" - ISL="${PARAMS[0]}" - OSL="${PARAMS[1]}" - CONC="${PARAMS[2]}" - RANDOM_RANGE_RATIO="${PARAMS[3]}" - RESULT_FILENAME="${RESULT_PREFIX}-${ISL}-${OSL}-${CONC}-${RANDOM_RANGE_RATIO}" - - echo "=== Benchmark config: ${MODEL_NAME} ISL=${ISL} OSL=${OSL} CONC=${CONC} RANDOM_RANGE_RATIO=${RANDOM_RANGE_RATIO} ===" - - if ! docker exec \ - -e ISL="${ISL}" \ - -e OSL="${OSL}" \ - -e CONC="${CONC}" \ - -e RANDOM_RANGE_RATIO="${RANDOM_RANGE_RATIO}" \ - -e RESULT_FILENAME="${RESULT_FILENAME}" \ - -e BENCH_EXTRA_ARGS="${BENCH_EXTRA_ARGS}" \ - "$CONTAINER_NAME" bash -lc " - set -euo pipefail - rm -rf \"${CONTAINER_RESULT_DIR}\" - mkdir -p \"${CONTAINER_RESULT_DIR}\" - PYTHONDONTWRITEBYTECODE=1 python \"${CONTAINER_BENCH_SERVING_DIR}/benchmark_serving.py\" \ - --model=\"${OOT_RESOLVED_MODEL_PATH:-$MODEL_PATH}\" \ - --backend=vllm \ - --base-url=http://127.0.0.1:8000 \ - --dataset-name=random \ - --random-input-len=\"${ISL}\" \ - --random-output-len=\"${OSL}\" \ - --random-range-ratio \"${RANDOM_RANGE_RATIO}\" \ - --num-prompts=\"$(( CONC * 10 ))\" \ - --max-concurrency=\"${CONC}\" \ - ${TRUST_REMOTE_CODE_ARG} \ - --num-warmups=\"$(( 2 * CONC ))\" \ - --request-rate=inf \ - --ignore-eos \ - --save-result \ - --percentile-metrics=\"ttft,tpot,itl,e2el\" \ - --result-dir=\"${CONTAINER_RESULT_DIR}\" \ - --result-filename=\"${RESULT_FILENAME}.json\" \ - ${BENCH_EXTRA_ARGS:-} - "; then - echo "::warning::Benchmark failed for ${MODEL_NAME} ISL=${ISL} OSL=${OSL} CONC=${CONC}" - FAIL_COUNT=$((FAIL_COUNT + 1)) - continue - fi + echo "=== Benchmark config: ${MODEL_NAME} ISL=${ISL} OSL=${OSL} CONC=${CONC} RANDOM_RANGE_RATIO=${RANDOM_RANGE_RATIO} ===" - if ! docker exec \ - -e RESULT_PATH="${CONTAINER_RESULT_DIR}/${RESULT_FILENAME}.json" \ - -e ISL="${ISL}" \ - -e OSL="${OSL}" \ - -e EXTRA_ARGS_TEXT="${OOT_EXTRA_ARGS}" \ - -e DASHBOARD_MODEL_NAME="${DASHBOARD_MODEL_NAME}" \ - "$CONTAINER_NAME" python3 -c "import json, os, re; result_path = os.environ['RESULT_PATH']; data = json.load(open(result_path, encoding='utf-8')); data['random_input_len'] = int(os.environ['ISL']); data['random_output_len'] = int(os.environ['OSL']); data['benchmark_backend'] = 'ATOM-vLLM'; display_name = os.environ.get('DASHBOARD_MODEL_NAME', ''); display_name and data.__setitem__('benchmark_model_name', display_name); tp_match = re.search(r'--tensor-parallel-size\\s+(\\d+)', os.environ.get('EXTRA_ARGS_TEXT', '')); tp_match and data.__setitem__('tensor_parallel_size', int(tp_match.group(1))); json.dump(data, open(result_path, 'w', encoding='utf-8'), indent=2)"; then - echo "::warning::Failed to post-process ${RESULT_FILENAME}.json" - FAIL_COUNT=$((FAIL_COUNT + 1)) - continue - fi + docker exec \ + -e ISL="${ISL}" \ + -e OSL="${OSL}" \ + -e CONC="${CONC}" \ + -e RANDOM_RANGE_RATIO="${RANDOM_RANGE_RATIO}" \ + -e RESULT_FILENAME="${RESULT_FILENAME}" \ + -e BENCH_EXTRA_ARGS="${BENCH_EXTRA_ARGS}" \ + "$CONTAINER_NAME" bash -lc " + set -euo pipefail + rm -rf \"${CONTAINER_RESULT_DIR}\" + mkdir -p \"${CONTAINER_RESULT_DIR}\" + PYTHONDONTWRITEBYTECODE=1 python \"${CONTAINER_BENCH_SERVING_DIR}/benchmark_serving.py\" \ + --model=\"${OOT_RESOLVED_MODEL_PATH:-$MODEL_PATH}\" \ + --backend=vllm \ + --base-url=http://127.0.0.1:8000 \ + --dataset-name=random \ + --random-input-len=\"${ISL}\" \ + --random-output-len=\"${OSL}\" \ + --random-range-ratio \"${RANDOM_RANGE_RATIO}\" \ + --num-prompts=\"$(( CONC * 10 ))\" \ + --max-concurrency=\"${CONC}\" \ + ${TRUST_REMOTE_CODE_ARG} \ + --num-warmups=\"$(( 2 * CONC ))\" \ + --request-rate=inf \ + --ignore-eos \ + --save-result \ + --percentile-metrics=\"ttft,tpot,itl,e2el\" \ + --result-dir=\"${CONTAINER_RESULT_DIR}\" \ + --result-filename=\"${RESULT_FILENAME}.json\" \ + ${BENCH_EXTRA_ARGS:-} + " - if ! docker cp "${CONTAINER_NAME}:${CONTAINER_RESULT_DIR}/${RESULT_FILENAME}.json" "./${RESULT_FILENAME}.json"; then - echo "::warning::Failed to copy ${RESULT_FILENAME}.json from container" - FAIL_COUNT=$((FAIL_COUNT + 1)) - continue - fi - done + docker exec \ + -e RESULT_PATH="${CONTAINER_RESULT_DIR}/${RESULT_FILENAME}.json" \ + -e ISL="${ISL}" \ + -e OSL="${OSL}" \ + -e EXTRA_ARGS_TEXT="${OOT_EXTRA_ARGS}" \ + -e DASHBOARD_MODEL_NAME="${DASHBOARD_MODEL_NAME}" \ + "$CONTAINER_NAME" python3 -c "import json, os, re; result_path = os.environ['RESULT_PATH']; data = json.load(open(result_path, encoding='utf-8')); data['random_input_len'] = int(os.environ['ISL']); data['random_output_len'] = int(os.environ['OSL']); data['benchmark_backend'] = 'ATOM-vLLM'; display_name = os.environ.get('DASHBOARD_MODEL_NAME', ''); display_name and data.__setitem__('benchmark_model_name', display_name); tp_match = re.search(r'--tensor-parallel-size\\s+(\\d+)', os.environ.get('EXTRA_ARGS_TEXT', '')); tp_match and data.__setitem__('tensor_parallel_size', int(tp_match.group(1))); json.dump(data, open(result_path, 'w', encoding='utf-8'), indent=2)" - if [ "${FAIL_COUNT}" -gt 0 ]; then - echo "::warning::${FAIL_COUNT} benchmark config(s) failed for ${MODEL_NAME}" - fi + docker cp "${CONTAINER_NAME}:${CONTAINER_RESULT_DIR}/${RESULT_FILENAME}.json" "./${RESULT_FILENAME}.json" - name: Collect benchmark result if: steps.check.outputs.enabled == 'true' run: | - shopt -s nullglob - results=( "${RESULT_PREFIX}"-*.json ) - if [ "${#results[@]}" -eq 0 ]; then - echo "ERROR: No benchmark result files were generated for ${MODEL_NAME}." + if [ ! -f "${RESULT_FILENAME}.json" ]; then + echo "ERROR: Benchmark result file ${RESULT_FILENAME}.json was not generated for ${MODEL_NAME}." exit 1 fi @@ -456,8 +430,8 @@ jobs: if: steps.check.outputs.enabled == 'true' uses: actions/upload-artifact@v7 with: - name: oot-benchmark-${{ env.RESULT_PREFIX }} - path: ${{ env.RESULT_PREFIX }}-*.json + name: oot-benchmark-${{ env.RESULT_FILENAME }} + path: ${{ env.RESULT_FILENAME }}.json - name: Clean up OOT benchmark container if: always() && steps.check.outputs.enabled == 'true' From 27fd782621e8aa22ca202237e5ea5d71d935d044 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 25 Mar 2026 17:19:12 +0800 Subject: [PATCH 2/9] add model qwen3.5 change to manual trigger align env and arguments choice box default false Signed-off-by: zejunchen-zejun --- .github/benchmark/oot_benchmark_models.json | 20 ++- .../workflows/atom-vllm-oot-benchmark.yaml | 137 +++++------------- 2 files changed, 50 insertions(+), 107 deletions(-) diff --git a/.github/benchmark/oot_benchmark_models.json b/.github/benchmark/oot_benchmark_models.json index 3808e5896..9a0e3fe9b 100644 --- a/.github/benchmark/oot_benchmark_models.json +++ b/.github/benchmark/oot_benchmark_models.json @@ -7,7 +7,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { "display": "DeepSeek-R1 MXFP4", @@ -17,7 +17,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { "display": "gpt-oss-120b", @@ -27,7 +27,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "linux-atom-mi355-1", - "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { "display": "Kimi-K2-Thinking-MXFP4 TP4", @@ -38,7 +38,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { "display": "Kimi-K2-Thinking-MXFP4 TP8", @@ -49,6 +49,16 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "VLLM_ROCM_USE_AITER=1\nAITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" + }, + { + "display": "Qwen3.5-397B-A17B-FP8", + "source_path": "Qwen/Qwen3.5-397B-A17B-FP8", + "path": "/models/Qwen3.5-397B-A17B-FP8", + "prefix": "qwen3-5-397b-a17b-fp8", + "extra_args": "--trust-remote-code --tensor-parallel-size 8 --max-num-batched-tokens 16384 --max-model-len 16384", + "bench_args": "", + "runner": "atom-mi355-8gpu.predownload", + "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_DISABLE_VLLM_PLUGIN_ATTENTION=1\nATOM_USE_CUSTOM_ALL_GATHER=0" } ] diff --git a/.github/workflows/atom-vllm-oot-benchmark.yaml b/.github/workflows/atom-vllm-oot-benchmark.yaml index 5c844c07a..cb137c209 100644 --- a/.github/workflows/atom-vllm-oot-benchmark.yaml +++ b/.github/workflows/atom-vllm-oot-benchmark.yaml @@ -10,46 +10,40 @@ on: deepseek-r1-fp8: description: "Benchmark DeepSeek-R1 FP8" type: boolean - default: true + default: false deepseek-r1-mxfp4: description: "Benchmark DeepSeek-R1 MXFP4" type: boolean - default: true + default: false gpt-oss-120b: description: "Benchmark gpt-oss-120b" type: boolean - default: true - kimi-k2-thinking-mxfp4: - description: "Benchmark Kimi-K2-Thinking-MXFP4 (TP4 and TP8)" + default: false + kimi-k2-thinking-mxfp4-tp4: + description: "Benchmark Kimi-K2-Thinking-MXFP4 TP4" type: boolean - default: true - base_image: - description: "ATOM base image used to build the OOT benchmark image" - type: string - default: "rocm/atom-dev:latest" - vllm_commit: - description: "vLLM commit used by the OOT benchmark image" - type: string - default: "b31e9326a7d9394aab8c767f8ebe225c65594b60" - vllm_version: - description: "vLLM version label used in the benchmark image tag" + default: false + kimi-k2-thinking-mxfp4-tp8: + description: "Benchmark Kimi-K2-Thinking-MXFP4 TP8" + type: boolean + default: false + qwen3-5-397b-a17b-fp8: + description: "Benchmark Qwen3.5-397B-A17B-FP8" + type: boolean + default: false + oot_image: + description: "OOT benchmark image to pull directly" type: string - default: "0.17" + default: "rocm/atom-dev:vllm-latest" param_lists: description: | "Benchmark parameter lists. Input as a single or multiple sets (comma-separated, semicolon between sets), format: input_length,output_length,concurrency,random_range_ratio. - Example (single set): 1024,1024,128,0.8 - Example (multiple sets): 1024,1024,128,0.8;2048,1024,256,0.7" + Example (single set): 1024,1024,64,0.8 + Example (multiple sets): 1024,1024,64,0.8;2048,1024,32,0.7" type: string - default: "1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,1024,128,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;1024,8192,128,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8;8192,1024,128,0.8" - -env: - BASE_IMAGE: ${{ inputs.base_image || 'rocm/atom-dev:latest' }} - GITHUB_REPO_URL: https://github.com/ROCm/ATOM.git - GITHUB_COMMIT_SHA: ${{ github.sha }} - VALIDATION_IMAGE_REPO: rocm/atom-dev + default: "1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8" jobs: parse-param-lists: @@ -62,7 +56,7 @@ jobs: - name: Parse parameter lists id: parse-param-lists run: | - PARAM_LISTS="${{ inputs.param_lists || '1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,1024,128,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;1024,8192,128,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8;8192,1024,128,0.8' }}" + PARAM_LISTS="${{ inputs.param_lists || '1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8' }}" echo "Using param_lists: ${PARAM_LISTS}" printf 'param_lists=%s\n' "${PARAM_LISTS}" >> "$GITHUB_OUTPUT" IFS=';' read -ra SETS <<< "${PARAM_LISTS}" @@ -74,6 +68,13 @@ jobs: OUTPUT_LEN="${PARAMS[1]}" CONCURRENCY="${PARAMS[2]}" RANDOM_RANGE_RATIO="${PARAMS[3]}" + case "${CONCURRENCY}" in + 4|8|16|32|64) ;; + *) + echo "Unsupported concurrency: ${CONCURRENCY}. Allowed values: 4,8,16,32,64" + exit 1 + ;; + esac MATRIX_JSON="${MATRIX_JSON}${SEP}{\"input_length\":${INPUT_LEN},\"output_length\":${OUTPUT_LEN},\"concurrency\":${CONCURRENCY},\"random_range_ratio\":${RANDOM_RANGE_RATIO}}" SEP="," done @@ -94,84 +95,13 @@ jobs: - id: load run: echo "models_json=$(jq -c . .github/benchmark/oot_benchmark_models.json)" >> "$GITHUB_OUTPUT" - build-oot-image: - name: Build OOT benchmark image - runs-on: build-only-atom - outputs: - oot_image_tag: ${{ steps.meta.outputs.oot_image_tag }} - steps: - - name: Checkout ATOM repo - uses: actions/checkout@v6 - - - name: Docker Login - run: | - echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - - - name: Generate OOT base Dockerfile - run: | - cat < Dockerfile.mod - FROM ${{ env.BASE_IMAGE }} - RUN pip install -U lm-eval[api] - RUN pip show lm-eval || true - RUN pip install hf_transfer - RUN pip show hf_transfer || true - RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true - RUN pip uninstall -y amd-aiter - RUN pip install --upgrade "pybind11>=3.0.1" - RUN pip show pybind11 - RUN rm -rf /app/aiter-test - RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\ - cd /app/aiter-test && \\ - git checkout HEAD && \\ - git submodule sync && git submodule update --init --recursive && \\ - MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop - RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true - RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true - RUN pip uninstall -y atom - RUN rm -rf /app/ATOM - RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ - cd /app/ATOM && \\ - git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ - pip install -e . - RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true - EOF - - - name: Build OOT base image - run: | - docker build --pull --network=host \ - --no-cache \ - -t atom_oot_base:benchmark \ - -f Dockerfile.mod . - - - name: Build OOT vLLM image from current commit - id: meta - run: | - SHORT_SHA="${GITHUB_COMMIT_SHA::12}" - OOT_IMAGE_TAG="${VALIDATION_IMAGE_REPO}:oot-vllm-benchmark-v${{ inputs.vllm_version || '0.17' }}-${SHORT_SHA}-${{ github.run_id }}" - docker build --network=host \ - --no-cache \ - -t "${OOT_IMAGE_TAG}" \ - --target atom_oot \ - --build-arg OOT_BASE_IMAGE="atom_oot_base:benchmark" \ - --build-arg MAX_JOBS=64 \ - --build-arg VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \ - --build-arg INSTALL_LM_EVAL=1 \ - --build-arg INSTALL_FASTSAFETENSORS=1 \ - -f docker/Dockerfile . - echo "oot_image_tag=${OOT_IMAGE_TAG}" >> "$GITHUB_OUTPUT" - - - name: Push OOT benchmark image - run: | - docker push "${{ steps.meta.outputs.oot_image_tag }}" - benchmark: name: OOT ${{ matrix.model.display }} ${{ matrix.params.input_length }}/${{ matrix.params.output_length }} c=${{ matrix.params.concurrency }} - needs: [parse-param-lists, load-models, build-oot-image] + needs: [parse-param-lists, load-models] if: >- always() && needs.parse-param-lists.result == 'success' && needs.load-models.result == 'success' - && needs.build-oot-image.result == 'success' strategy: fail-fast: false matrix: @@ -195,7 +125,7 @@ jobs: CONTAINER_NAME: atom_vllm_oot_benchmark_${{ strategy.job-index }} CONTAINER_RESULT_DIR: /tmp/oot-benchmark-results CONTAINER_BENCH_SERVING_DIR: /tmp/oot-benchmark/bench_serving - OOT_IMAGE_TAG: ${{ needs.build-oot-image.outputs.oot_image_tag }} + OOT_IMAGE_TAG: ${{ inputs.oot_image || 'rocm/atom-dev:vllm-latest' }} BENCH_SERVING_REPO_URL: https://github.com/kimbochen/bench_serving.git steps: - name: Check if model is enabled @@ -205,7 +135,9 @@ jobs: deepseek-r1-fp8) echo "enabled=${{ inputs.deepseek-r1-fp8 }}" >> "$GITHUB_OUTPUT" ;; deepseek-r1-mxfp4) echo "enabled=${{ inputs.deepseek-r1-mxfp4 }}" >> "$GITHUB_OUTPUT" ;; gpt-oss-120b) echo "enabled=${{ inputs.gpt-oss-120b }}" >> "$GITHUB_OUTPUT" ;; - kimi-k2-thinking-mxfp4-tp4|kimi-k2-thinking-mxfp4-tp8) echo "enabled=${{ inputs.kimi-k2-thinking-mxfp4 }}" >> "$GITHUB_OUTPUT" ;; + kimi-k2-thinking-mxfp4-tp4) echo "enabled=${{ inputs.kimi-k2-thinking-mxfp4-tp4 }}" >> "$GITHUB_OUTPUT" ;; + kimi-k2-thinking-mxfp4-tp8) echo "enabled=${{ inputs.kimi-k2-thinking-mxfp4-tp8 }}" >> "$GITHUB_OUTPUT" ;; + qwen3-5-397b-a17b-fp8) echo "enabled=${{ inputs.qwen3-5-397b-a17b-fp8 }}" >> "$GITHUB_OUTPUT" ;; *) echo "enabled=true" >> "$GITHUB_OUTPUT" ;; esac @@ -233,9 +165,10 @@ jobs: if: steps.check.outputs.enabled == 'true' run: echo "HF_TOKEN=${HF_TOKEN:-${{ secrets.AMD_HF_TOKEN }}}" >> "$GITHUB_ENV" - - name: Pull built OOT image + - name: Pull OOT benchmark image if: steps.check.outputs.enabled == 'true' run: | + echo "Pulling OOT benchmark image: ${OOT_IMAGE_TAG}" docker pull "${OOT_IMAGE_TAG}" - name: Prepare model cache mount From 5f9cf75fed9acea65004e4d44b1bd7814fa3d655 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 25 Mar 2026 17:29:28 +0800 Subject: [PATCH 3/9] set 4 GPU machine for Kimi-K2 TP4 Signed-off-by: zejunchen-zejun --- .github/benchmark/oot_benchmark_models.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/benchmark/oot_benchmark_models.json b/.github/benchmark/oot_benchmark_models.json index 9a0e3fe9b..da388c2ed 100644 --- a/.github/benchmark/oot_benchmark_models.json +++ b/.github/benchmark/oot_benchmark_models.json @@ -37,7 +37,7 @@ "prefix": "kimi-k2-thinking-mxfp4-tp4", "extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", - "runner": "atom-mi355-8gpu.predownload", + "runner": "linux-atom-mi355-4", "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { From f236d2c35984f1221c3f0e2e0ef5794614c723ea Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 25 Mar 2026 17:40:47 +0800 Subject: [PATCH 4/9] if the model has not been chosen, the gpu runner will not be dispatched Signed-off-by: zejunchen-zejun --- .../workflows/atom-vllm-oot-benchmark.yaml | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/.github/workflows/atom-vllm-oot-benchmark.yaml b/.github/workflows/atom-vllm-oot-benchmark.yaml index cb137c209..a14645b8d 100644 --- a/.github/workflows/atom-vllm-oot-benchmark.yaml +++ b/.github/workflows/atom-vllm-oot-benchmark.yaml @@ -90,10 +90,36 @@ jobs: runs-on: ubuntu-latest outputs: models_json: ${{ steps.load.outputs.models_json }} + has_enabled_models: ${{ steps.load.outputs.has_enabled_models }} steps: - uses: actions/checkout@v6 - id: load - run: echo "models_json=$(jq -c . .github/benchmark/oot_benchmark_models.json)" >> "$GITHUB_OUTPUT" + env: + ENABLE_DEEPSEEK_R1_FP8: ${{ inputs.deepseek-r1-fp8 }} + ENABLE_DEEPSEEK_R1_MXFP4: ${{ inputs.deepseek-r1-mxfp4 }} + ENABLE_GPT_OSS_120B: ${{ inputs.gpt-oss-120b }} + ENABLE_KIMI_K2_TP4: ${{ inputs.kimi-k2-thinking-mxfp4-tp4 }} + ENABLE_KIMI_K2_TP8: ${{ inputs.kimi-k2-thinking-mxfp4-tp8 }} + ENABLE_QWEN3_5_397B_A17B_FP8: ${{ inputs.qwen3-5-397b-a17b-fp8 }} + run: | + MODELS_JSON="$(jq -c ' + map(select( + (.prefix == "deepseek-r1-fp8" and env.ENABLE_DEEPSEEK_R1_FP8 == "true") + or (.prefix == "deepseek-r1-mxfp4" and env.ENABLE_DEEPSEEK_R1_MXFP4 == "true") + or (.prefix == "gpt-oss-120b" and env.ENABLE_GPT_OSS_120B == "true") + or (.prefix == "kimi-k2-thinking-mxfp4-tp4" and env.ENABLE_KIMI_K2_TP4 == "true") + or (.prefix == "kimi-k2-thinking-mxfp4-tp8" and env.ENABLE_KIMI_K2_TP8 == "true") + or (.prefix == "qwen3-5-397b-a17b-fp8" and env.ENABLE_QWEN3_5_397B_A17B_FP8 == "true") + )) + ' .github/benchmark/oot_benchmark_models.json)" + echo "models_json=${MODELS_JSON}" >> "$GITHUB_OUTPUT" + if [ "${MODELS_JSON}" = "[]" ]; then + echo "has_enabled_models=false" >> "$GITHUB_OUTPUT" + echo "No models selected for OOT benchmark." + else + echo "has_enabled_models=true" >> "$GITHUB_OUTPUT" + echo "Selected models: ${MODELS_JSON}" + fi benchmark: name: OOT ${{ matrix.model.display }} ${{ matrix.params.input_length }}/${{ matrix.params.output_length }} c=${{ matrix.params.concurrency }} @@ -102,6 +128,7 @@ jobs: always() && needs.parse-param-lists.result == 'success' && needs.load-models.result == 'success' + && needs.load-models.outputs.has_enabled_models == 'true' strategy: fail-fast: false matrix: From 62437ef37bbd70b6b196fa334efdf50945c940e0 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 25 Mar 2026 17:56:40 +0800 Subject: [PATCH 5/9] change the config Signed-off-by: zejunchen-zejun --- .github/benchmark/oot_benchmark_models.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/benchmark/oot_benchmark_models.json b/.github/benchmark/oot_benchmark_models.json index da388c2ed..c7ee28daf 100644 --- a/.github/benchmark/oot_benchmark_models.json +++ b/.github/benchmark/oot_benchmark_models.json @@ -26,7 +26,7 @@ "prefix": "gpt-oss-120b", "extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", - "runner": "linux-atom-mi355-1", + "runner": "atom-mi355-8gpu.predownload", "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { @@ -37,7 +37,7 @@ "prefix": "kimi-k2-thinking-mxfp4-tp4", "extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", - "runner": "linux-atom-mi355-4", + "runner": "atom-mi355-8gpu.predownload", "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { From f8c77fb270b23867bc7452afc45aa48313eca583 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 25 Mar 2026 17:58:40 +0800 Subject: [PATCH 6/9] remove redundant env flag for gptoss Signed-off-by: zejunchen-zejun --- .github/benchmark/oot_benchmark_models.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/benchmark/oot_benchmark_models.json b/.github/benchmark/oot_benchmark_models.json index c7ee28daf..a2781102c 100644 --- a/.github/benchmark/oot_benchmark_models.json +++ b/.github/benchmark/oot_benchmark_models.json @@ -27,7 +27,7 @@ "extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 16384 --max-model-len 16384", "bench_args": "", "runner": "atom-mi355-8gpu.predownload", - "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" + "env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1" }, { "display": "Kimi-K2-Thinking-MXFP4 TP4", From 7a11c93c806380b587636a9585075afc82459e5a Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 26 Mar 2026 11:50:00 +0800 Subject: [PATCH 7/9] add specific branch trigger OOT benchmark for acceptance test when upgrading vLLM Signed-off-by: zejunchen-zejun --- .github/scripts/oot_benchmark_to_dashboard.py | 12 + .../workflows/atom-vllm-oot-benchmark.yaml | 361 +++++++++++++++++- 2 files changed, 353 insertions(+), 20 deletions(-) diff --git a/.github/scripts/oot_benchmark_to_dashboard.py b/.github/scripts/oot_benchmark_to_dashboard.py index ba5c009ba..5e5f92fab 100644 --- a/.github/scripts/oot_benchmark_to_dashboard.py +++ b/.github/scripts/oot_benchmark_to_dashboard.py @@ -50,6 +50,15 @@ def append_metric( entries.append(entry) +def is_dashboard_publish_allowed(payload: dict) -> bool: + publish_flag = payload.get("dashboard_publish_allowed") + if publish_flag is None: + return True + if isinstance(publish_flag, bool): + return publish_flag + return str(publish_flag).strip().lower() not in {"0", "false", "no"} + + def build_entries(result_dir: Path, run_url: str | None) -> list[dict]: entries: list[dict] = [] @@ -62,6 +71,9 @@ def build_entries(result_dir: Path, run_url: str | None) -> list[dict]: except (OSError, UnicodeDecodeError, json.JSONDecodeError): continue + if not is_dashboard_publish_allowed(payload): + continue + if "output_throughput" not in payload: continue diff --git a/.github/workflows/atom-vllm-oot-benchmark.yaml b/.github/workflows/atom-vllm-oot-benchmark.yaml index a14645b8d..0ba8beb9d 100644 --- a/.github/workflows/atom-vllm-oot-benchmark.yaml +++ b/.github/workflows/atom-vllm-oot-benchmark.yaml @@ -1,7 +1,7 @@ name: ATOM vLLM OOT Benchmark concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.repository }}-${{ github.ref_name }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} on: @@ -32,7 +32,7 @@ on: type: boolean default: false oot_image: - description: "OOT benchmark image to pull directly" + description: "Prebuilt OOT benchmark image to pull when no custom rebuild is needed" type: string default: "rocm/atom-dev:vllm-latest" param_lists: @@ -46,6 +46,111 @@ on: default: "1024,1024,4,0.8;1024,1024,8,0.8;1024,1024,16,0.8;1024,1024,32,0.8;1024,1024,64,0.8;1024,8192,4,0.8;1024,8192,8,0.8;1024,8192,16,0.8;1024,8192,32,0.8;1024,8192,64,0.8;8192,1024,4,0.8;8192,1024,8,0.8;8192,1024,16,0.8;8192,1024,32,0.8;8192,1024,64,0.8" jobs: + resolve-atom-source: + name: Resolve ATOM benchmark source + runs-on: ubuntu-latest + outputs: + atom_repository: ${{ steps.resolve.outputs.atom_repository }} + atom_ref: ${{ steps.resolve.outputs.atom_ref }} + normalized_ref: ${{ steps.resolve.outputs.normalized_ref }} + rebuild_oot_image: ${{ steps.resolve.outputs.rebuild_oot_image }} + prebuilt_oot_image: ${{ steps.resolve.outputs.prebuilt_oot_image }} + selected_vllm_commit: ${{ steps.resolve.outputs.selected_vllm_commit }} + selected_vllm_version: ${{ steps.resolve.outputs.selected_vllm_version }} + oot_base_image: ${{ steps.resolve.outputs.oot_base_image }} + oot_image_source: ${{ steps.resolve.outputs.oot_image_source }} + publish_to_dashboard: ${{ steps.resolve.outputs.publish_to_dashboard }} + steps: + - name: Checkout selected branch + uses: actions/checkout@v6 + + - name: Resolve benchmark source + id: resolve + env: + INPUT_OOT_IMAGE: ${{ inputs.oot_image || '' }} + run: | + set -euo pipefail + + ATOM_REPOSITORY="${GITHUB_REPOSITORY}" + ATOM_REF="${GITHUB_REF_NAME}" + PREBUILT_OOT_IMAGE="${INPUT_OOT_IMAGE:-rocm/atom-dev:vllm-latest}" + OOT_BASE_IMAGE="rocm/atom-dev:latest" + + NORMALIZED_REF="${ATOM_REF#refs/heads/}" + NORMALIZED_REF="${NORMALIZED_REF#refs/tags/}" + + REBUILD_OOT_IMAGE=false + if [[ "${NORMALIZED_REF}" != "main" ]]; then + REBUILD_OOT_IMAGE=true + fi + + OOT_IMAGE_SOURCE="prebuilt" + if [[ "${REBUILD_OOT_IMAGE}" == "true" ]]; then + OOT_IMAGE_SOURCE="rebuild" + fi + + PUBLISH_TO_DASHBOARD=false + if [[ "${REBUILD_OOT_IMAGE}" != "true" && "${NORMALIZED_REF}" == "main" ]]; then + PUBLISH_TO_DASHBOARD=true + fi + + mapfile -t VLLM_META < <(python3 - <<'PY' + import re + from pathlib import Path + + text = Path(".github/workflows/docker-release.yaml").read_text(encoding="utf-8") + commit_match = re.search(r'^\s*VLLM_COMMIT:\s*"([^"]+)"', text, re.MULTILINE) + version_match = re.search(r'^\s*VLLM_VERSION:\s*"([^"]+)"', text, re.MULTILINE) + + if not commit_match or not version_match: + raise SystemExit("Failed to read VLLM_COMMIT/VLLM_VERSION from .github/workflows/docker-release.yaml") + + print(commit_match.group(1)) + print(version_match.group(1)) + PY + ) + + SELECTED_VLLM_COMMIT="${VLLM_META[0]}" + SELECTED_VLLM_VERSION="${VLLM_META[1]}" + + { + echo "atom_repository=${ATOM_REPOSITORY}" + echo "atom_ref=${ATOM_REF}" + echo "normalized_ref=${NORMALIZED_REF}" + echo "rebuild_oot_image=${REBUILD_OOT_IMAGE}" + echo "prebuilt_oot_image=${PREBUILT_OOT_IMAGE}" + echo "selected_vllm_commit=${SELECTED_VLLM_COMMIT}" + echo "selected_vllm_version=${SELECTED_VLLM_VERSION}" + echo "oot_base_image=${OOT_BASE_IMAGE}" + echo "oot_image_source=${OOT_IMAGE_SOURCE}" + echo "publish_to_dashboard=${PUBLISH_TO_DASHBOARD}" + } >> "$GITHUB_OUTPUT" + + printf '### OOT benchmark source\n- Repository: `%s`\n- Ref: `%s`\n- Image mode: `%s`\n' \ + "${ATOM_REPOSITORY}" \ + "${ATOM_REF}" \ + "${OOT_IMAGE_SOURCE}" >> "$GITHUB_STEP_SUMMARY" + + if [[ "${REBUILD_OOT_IMAGE}" == "true" ]]; then + printf -- '- Base image: `%s`\n- vLLM version: `%s`\n- vLLM commit: `%s`\n- vLLM source: `.github/workflows/docker-release.yaml`\n' \ + "${OOT_BASE_IMAGE}" \ + "${SELECTED_VLLM_VERSION}" \ + "${SELECTED_VLLM_COMMIT}" >> "$GITHUB_STEP_SUMMARY" + else + printf -- '- Prebuilt image: `%s`\n- Expected vLLM version: `%s`\n- Expected vLLM commit: `%s`\n' \ + "${PREBUILT_OOT_IMAGE}" \ + "${SELECTED_VLLM_VERSION}" \ + "${SELECTED_VLLM_COMMIT}" >> "$GITHUB_STEP_SUMMARY" + fi + + printf -- '- Upload to dashboard: `%s`\n' \ + "${PUBLISH_TO_DASHBOARD}" >> "$GITHUB_STEP_SUMMARY" + + if [[ "${PUBLISH_TO_DASHBOARD}" != "true" ]]; then + printf '\nNon-main or overridden ATOM sources keep benchmark results in artifacts and run summaries only.\n' \ + >> "$GITHUB_STEP_SUMMARY" + fi + parse-param-lists: name: Parse parameter lists runs-on: ubuntu-latest @@ -121,13 +226,118 @@ jobs: echo "Selected models: ${MODELS_JSON}" fi + build-oot-image: + name: Build custom OOT benchmark image + needs: [resolve-atom-source] + if: needs.resolve-atom-source.outputs.rebuild_oot_image == 'true' + runs-on: build-only-atom + outputs: + oot_image_tag: ${{ steps.image-meta.outputs.oot_image_tag }} + atom_source_sha: ${{ steps.source-meta.outputs.atom_source_sha }} + env: + ATOM_SOURCE_REPOSITORY: ${{ needs.resolve-atom-source.outputs.atom_repository }} + ATOM_SOURCE_REF: ${{ needs.resolve-atom-source.outputs.atom_ref }} + OOT_BASE_IMAGE: ${{ needs.resolve-atom-source.outputs.oot_base_image }} + VLLM_COMMIT: ${{ needs.resolve-atom-source.outputs.selected_vllm_commit }} + VLLM_VERSION: ${{ needs.resolve-atom-source.outputs.selected_vllm_version }} + OOT_IMAGE_REPO: rocm/atom-dev + steps: + - name: Checkout benchmark ATOM source + uses: actions/checkout@v6 + with: + repository: ${{ env.ATOM_SOURCE_REPOSITORY }} + ref: ${{ env.ATOM_SOURCE_REF }} + fetch-depth: 1 + + - name: Record source metadata + id: source-meta + run: | + ATOM_SOURCE_SHA="$(git rev-parse HEAD)" + echo "atom_source_sha=${ATOM_SOURCE_SHA}" >> "$GITHUB_OUTPUT" + printf '### Building custom OOT image\n- Repository: `%s`\n- Ref: `%s`\n- Commit: `%s`\n- Base image: `%s`\n- vLLM version: `%s`\n- vLLM commit: `%s`\n' \ + "${ATOM_SOURCE_REPOSITORY}" \ + "${ATOM_SOURCE_REF}" \ + "${ATOM_SOURCE_SHA}" \ + "${OOT_BASE_IMAGE}" \ + "${VLLM_VERSION}" \ + "${VLLM_COMMIT}" >> "$GITHUB_STEP_SUMMARY" + + - name: Docker Login + run: | + echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + + - name: Generate custom OOT base Dockerfile + run: | + cat > Dockerfile.oot-base <=3.0.1" + RUN pip show pybind11 + RUN rm -rf /app/aiter-test + RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\ + cd /app/aiter-test && \\ + git checkout HEAD && \\ + git submodule sync && git submodule update --init --recursive && \\ + MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop + RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true + RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true + RUN pip uninstall -y atom || true + RUN rm -rf /app/ATOM + COPY . /app/ATOM + RUN cd /app/ATOM && pip install -e . + RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true + EOF + + - name: Build custom OOT base image + run: | + docker build --pull --network=host \ + --no-cache \ + -t atom_oot_base:ci \ + -f Dockerfile.oot-base . + + - name: Build custom OOT vLLM image + id: image-meta + run: | + SHORT_SHA="$(git rev-parse --short HEAD)" + SHORT_VLLM="$(printf '%s' "${VLLM_COMMIT}" | cut -c1-12)" + SAFE_VLLM_VERSION="$(printf '%s' "${VLLM_VERSION}" | tr '/:' '--')" + OOT_IMAGE_TAG="${OOT_IMAGE_REPO}:oot-benchmark-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${SHORT_SHA}-${SAFE_VLLM_VERSION}-${SHORT_VLLM}" + docker build --network=host \ + --no-cache \ + -t "${OOT_IMAGE_TAG}" \ + --target atom_oot \ + --build-arg OOT_BASE_IMAGE="atom_oot_base:ci" \ + --build-arg MAX_JOBS=64 \ + --build-arg VLLM_COMMIT="${VLLM_COMMIT}" \ + --build-arg INSTALL_LM_EVAL=1 \ + --build-arg INSTALL_FASTSAFETENSORS=1 \ + -f docker/Dockerfile . + echo "oot_image_tag=${OOT_IMAGE_TAG}" >> "$GITHUB_OUTPUT" + + - name: Push custom OOT image + run: | + docker push "${{ steps.image-meta.outputs.oot_image_tag }}" + + - name: Clean up build images + if: always() + run: | + docker rmi "${{ steps.image-meta.outputs.oot_image_tag }}" || true + docker rmi atom_oot_base:ci || true + benchmark: name: OOT ${{ matrix.model.display }} ${{ matrix.params.input_length }}/${{ matrix.params.output_length }} c=${{ matrix.params.concurrency }} - needs: [parse-param-lists, load-models] + needs: [resolve-atom-source, parse-param-lists, load-models, build-oot-image] if: >- always() + && needs.resolve-atom-source.result == 'success' && needs.parse-param-lists.result == 'success' && needs.load-models.result == 'success' + && (needs.build-oot-image.result == 'success' || needs.build-oot-image.result == 'skipped') && needs.load-models.outputs.has_enabled_models == 'true' strategy: fail-fast: false @@ -152,8 +362,14 @@ jobs: CONTAINER_NAME: atom_vllm_oot_benchmark_${{ strategy.job-index }} CONTAINER_RESULT_DIR: /tmp/oot-benchmark-results CONTAINER_BENCH_SERVING_DIR: /tmp/oot-benchmark/bench_serving - OOT_IMAGE_TAG: ${{ inputs.oot_image || 'rocm/atom-dev:vllm-latest' }} + OOT_IMAGE_TAG: ${{ needs.build-oot-image.outputs.oot_image_tag || needs.resolve-atom-source.outputs.prebuilt_oot_image }} + OOT_IMAGE_SOURCE: ${{ needs.resolve-atom-source.outputs.oot_image_source }} BENCH_SERVING_REPO_URL: https://github.com/kimbochen/bench_serving.git + ATOM_SOURCE_REPOSITORY: ${{ needs.resolve-atom-source.outputs.atom_repository }} + ATOM_SOURCE_REF: ${{ needs.resolve-atom-source.outputs.atom_ref }} + VLLM_COMMIT_USED: ${{ needs.resolve-atom-source.outputs.selected_vllm_commit }} + VLLM_VERSION_USED: ${{ needs.resolve-atom-source.outputs.selected_vllm_version }} + PUBLISH_TO_DASHBOARD: ${{ needs.resolve-atom-source.outputs.publish_to_dashboard }} steps: - name: Check if model is enabled id: check @@ -179,9 +395,20 @@ jobs: docker rm -f "$CONTAINER_NAME" 2>/dev/null || true docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "shopt -s dotglob && ls -la /workspace/ && rm -rf /workspace/*" || true - - name: Checkout ATOM repo + - name: Checkout benchmark ATOM source if: steps.check.outputs.enabled == 'true' uses: actions/checkout@v6 + with: + repository: ${{ env.ATOM_SOURCE_REPOSITORY }} + ref: ${{ needs.build-oot-image.outputs.atom_source_sha || github.sha }} + fetch-depth: 1 + + - name: Record benchmark source revision + if: steps.check.outputs.enabled == 'true' + run: | + SOURCE_SHA="$(git rev-parse HEAD)" + echo "ATOM_SOURCE_SHA=${SOURCE_SHA}" >> "$GITHUB_ENV" + echo "Benchmarking ${ATOM_SOURCE_REPOSITORY}@${ATOM_SOURCE_REF} (${SOURCE_SHA}) with ${OOT_IMAGE_SOURCE} image ${OOT_IMAGE_TAG}" - name: Docker Login if: steps.check.outputs.enabled == 'true' @@ -368,13 +595,56 @@ jobs: ${BENCH_EXTRA_ARGS:-} " - docker exec \ + docker exec -i \ -e RESULT_PATH="${CONTAINER_RESULT_DIR}/${RESULT_FILENAME}.json" \ -e ISL="${ISL}" \ -e OSL="${OSL}" \ -e EXTRA_ARGS_TEXT="${OOT_EXTRA_ARGS}" \ -e DASHBOARD_MODEL_NAME="${DASHBOARD_MODEL_NAME}" \ - "$CONTAINER_NAME" python3 -c "import json, os, re; result_path = os.environ['RESULT_PATH']; data = json.load(open(result_path, encoding='utf-8')); data['random_input_len'] = int(os.environ['ISL']); data['random_output_len'] = int(os.environ['OSL']); data['benchmark_backend'] = 'ATOM-vLLM'; display_name = os.environ.get('DASHBOARD_MODEL_NAME', ''); display_name and data.__setitem__('benchmark_model_name', display_name); tp_match = re.search(r'--tensor-parallel-size\\s+(\\d+)', os.environ.get('EXTRA_ARGS_TEXT', '')); tp_match and data.__setitem__('tensor_parallel_size', int(tp_match.group(1))); json.dump(data, open(result_path, 'w', encoding='utf-8'), indent=2)" + -e ATOM_SOURCE_REPOSITORY="${ATOM_SOURCE_REPOSITORY}" \ + -e ATOM_SOURCE_REF="${ATOM_SOURCE_REF}" \ + -e ATOM_SOURCE_SHA="${ATOM_SOURCE_SHA}" \ + -e VLLM_COMMIT_USED="${VLLM_COMMIT_USED}" \ + -e VLLM_VERSION_USED="${VLLM_VERSION_USED}" \ + -e OOT_IMAGE_SOURCE="${OOT_IMAGE_SOURCE}" \ + -e PUBLISH_TO_DASHBOARD="${PUBLISH_TO_DASHBOARD}" \ + "$CONTAINER_NAME" python3 - <<'PY' + import json + import os + import re + + result_path = os.environ["RESULT_PATH"] + with open(result_path, encoding="utf-8") as f: + data = json.load(f) + + data["random_input_len"] = int(os.environ["ISL"]) + data["random_output_len"] = int(os.environ["OSL"]) + data["benchmark_backend"] = "ATOM-vLLM" + + display_name = os.environ.get("DASHBOARD_MODEL_NAME", "") + if display_name: + data["benchmark_model_name"] = display_name + + tp_match = re.search( + r"--tensor-parallel-size\s+(\d+)", + os.environ.get("EXTRA_ARGS_TEXT", ""), + ) + if tp_match: + data["tensor_parallel_size"] = int(tp_match.group(1)) + + data["atom_source_repository"] = os.environ.get("ATOM_SOURCE_REPOSITORY", "") + data["atom_source_ref"] = os.environ.get("ATOM_SOURCE_REF", "") + data["atom_source_sha"] = os.environ.get("ATOM_SOURCE_SHA", "") + data["vllm_commit"] = os.environ.get("VLLM_COMMIT_USED", "") + data["vllm_version"] = os.environ.get("VLLM_VERSION_USED", "") + data["oot_image_source"] = os.environ.get("OOT_IMAGE_SOURCE", "") + data["dashboard_publish_allowed"] = ( + os.environ.get("PUBLISH_TO_DASHBOARD", "false").lower() == "true" + ) + + with open(result_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + PY docker cp "${CONTAINER_NAME}:${CONTAINER_RESULT_DIR}/${RESULT_FILENAME}.json" "./${RESULT_FILENAME}.json" @@ -399,11 +669,12 @@ jobs: docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true docker stop "$CONTAINER_NAME" || true docker rm "$CONTAINER_NAME" || true + docker rmi "$OOT_IMAGE_TAG" || true summarize-benchmark-result: if: always() name: Summarize OOT benchmark result - needs: [benchmark] + needs: [resolve-atom-source, benchmark] runs-on: ubuntu-latest outputs: has_regression: ${{ steps.check-regression.outputs.has_regression }} @@ -437,22 +708,64 @@ jobs: if: steps.check-results.outputs.has_results == 'true' id: baseline run: | - PREV_RUN_ID=$(gh run list \ + mapfile -t CANDIDATE_RUN_IDS < <(gh run list \ --workflow="ATOM vLLM OOT Benchmark" \ --branch=main \ --event=workflow_dispatch \ --status=success \ - --limit=1 \ + --limit=10 \ --json databaseId \ - --jq '.[0].databaseId // empty') + --jq '.[].databaseId // empty') + + BASELINE_DIR="" + for PREV_RUN_ID in "${CANDIDATE_RUN_IDS[@]}"; do + if [ -z "$PREV_RUN_ID" ] || [ "$PREV_RUN_ID" = "${{ github.run_id }}" ]; then + continue + fi + + echo "Checking baseline candidate run #$PREV_RUN_ID" + rm -rf /tmp/baseline_candidate /tmp/baseline + mkdir -p /tmp/baseline_candidate + + if ! gh run download "$PREV_RUN_ID" --dir /tmp/baseline_candidate; then + echo "::warning::Failed to download baseline artifacts from run #$PREV_RUN_ID" + continue + fi + + if python3 - <<'PY' + import json + from pathlib import Path + + valid_payload_found = False + for path in Path("/tmp/baseline_candidate").rglob("*.json"): + if path.name == "regression_report.json": + continue + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, UnicodeDecodeError, json.JSONDecodeError): + continue + if "output_throughput" not in payload: + continue + valid_payload_found = True + if payload.get("dashboard_publish_allowed", True) is False: + raise SystemExit(1) + + raise SystemExit(0 if valid_payload_found else 1) + PY + then + mv /tmp/baseline_candidate /tmp/baseline + BASELINE_DIR="/tmp/baseline" + echo "Using baseline from run #$PREV_RUN_ID" + break + fi + + echo "Skipping run #$PREV_RUN_ID because it contains custom/non-dashboard benchmark results." + done - if [ -n "$PREV_RUN_ID" ] && [ "$PREV_RUN_ID" != "${{ github.run_id }}" ]; then - echo "Downloading baseline from run #$PREV_RUN_ID" - mkdir -p /tmp/baseline - gh run download "$PREV_RUN_ID" --dir /tmp/baseline || echo "::warning::Failed to download baseline artifacts" - echo "baseline_dir=/tmp/baseline" >> "$GITHUB_OUTPUT" + if [ -n "$BASELINE_DIR" ]; then + echo "baseline_dir=${BASELINE_DIR}" >> "$GITHUB_OUTPUT" else - echo "No previous successful manual run found" + echo "No previous successful dashboard-eligible manual run found" echo "baseline_dir=" >> "$GITHUB_OUTPUT" fi env: @@ -488,8 +801,16 @@ jobs: name: oot-benchmark-regression-report path: regression_report.json + - name: Note dashboard upload skipped + if: steps.check-results.outputs.has_results == 'true' && needs.resolve-atom-source.outputs.publish_to_dashboard != 'true' + run: | + printf '### Dashboard upload skipped\nResults from `%s@%s` using `%s` OOT image mode are intentionally kept out of the benchmark dashboard.\n' \ + "${{ needs.resolve-atom-source.outputs.atom_repository }}" \ + "${{ needs.resolve-atom-source.outputs.atom_ref }}" \ + "${{ needs.resolve-atom-source.outputs.oot_image_source }}" >> "$GITHUB_STEP_SUMMARY" + - name: Transform results for benchmark dashboard - if: steps.check-results.outputs.has_results == 'true' + if: steps.check-results.outputs.has_results == 'true' && needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' run: | python3 .github/scripts/oot_benchmark_to_dashboard.py \ . \ @@ -497,7 +818,7 @@ jobs: --run-url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" - name: Store benchmark result to dashboard - if: steps.check-results.outputs.has_results == 'true' + if: steps.check-results.outputs.has_results == 'true' && needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' uses: benchmark-action/github-action-benchmark@v1 with: tool: customBiggerIsBetter @@ -512,7 +833,7 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} - name: Deploy custom dashboard to gh-pages - if: steps.check-results.outputs.has_results == 'true' + if: steps.check-results.outputs.has_results == 'true' && needs.resolve-atom-source.outputs.publish_to_dashboard == 'true' run: | git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" From 8c8833ca9facdc2a8eafe8ea63984df618532082 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 26 Mar 2026 12:03:06 +0800 Subject: [PATCH 8/9] change the oot benchmark behavior Signed-off-by: zejunchen-zejun --- .github/benchmark/oot_benchmark_models.json | 8 +++---- .../workflows/atom-vllm-oot-benchmark.yaml | 21 +++++++++++-------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/benchmark/oot_benchmark_models.json b/.github/benchmark/oot_benchmark_models.json index a2781102c..6469e9b15 100644 --- a/.github/benchmark/oot_benchmark_models.json +++ b/.github/benchmark/oot_benchmark_models.json @@ -1,6 +1,6 @@ [ { - "display": "DeepSeek-R1 FP8", + "display": "DeepSeek-R1 FP8 TP8", "source_path": "deepseek-ai/DeepSeek-R1-0528", "path": "/models/DeepSeek-R1-0528", "prefix": "deepseek-r1-fp8", @@ -10,7 +10,7 @@ "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { - "display": "DeepSeek-R1 MXFP4", + "display": "DeepSeek-R1 MXFP4 TP8", "source_path": "amd/DeepSeek-R1-0528-MXFP4", "path": "/models/DeepSeek-R1-0528-MXFP4", "prefix": "deepseek-r1-mxfp4", @@ -20,7 +20,7 @@ "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { - "display": "gpt-oss-120b", + "display": "gpt-oss-120b TP1", "source_path": "amd/gpt-oss-120b-w-mxfp4-a-fp8", "path": "/models/gpt-oss-120b", "prefix": "gpt-oss-120b", @@ -52,7 +52,7 @@ "env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4" }, { - "display": "Qwen3.5-397B-A17B-FP8", + "display": "Qwen3.5-397B-A17B-FP8 TP8", "source_path": "Qwen/Qwen3.5-397B-A17B-FP8", "path": "/models/Qwen3.5-397B-A17B-FP8", "prefix": "qwen3-5-397b-a17b-fp8", diff --git a/.github/workflows/atom-vllm-oot-benchmark.yaml b/.github/workflows/atom-vllm-oot-benchmark.yaml index 0ba8beb9d..8a2245214 100644 --- a/.github/workflows/atom-vllm-oot-benchmark.yaml +++ b/.github/workflows/atom-vllm-oot-benchmark.yaml @@ -8,31 +8,31 @@ on: workflow_dispatch: inputs: deepseek-r1-fp8: - description: "Benchmark DeepSeek-R1 FP8" + description: "DeepSeek-R1 FP8 TP8" type: boolean default: false deepseek-r1-mxfp4: - description: "Benchmark DeepSeek-R1 MXFP4" + description: "DeepSeek-R1 MXFP4 TP8" type: boolean default: false gpt-oss-120b: - description: "Benchmark gpt-oss-120b" + description: "gpt-oss-120b TP1" type: boolean default: false kimi-k2-thinking-mxfp4-tp4: - description: "Benchmark Kimi-K2-Thinking-MXFP4 TP4" + description: "Kimi-K2-Thinking-MXFP4 TP4" type: boolean default: false kimi-k2-thinking-mxfp4-tp8: - description: "Benchmark Kimi-K2-Thinking-MXFP4 TP8" + description: "Kimi-K2-Thinking-MXFP4 TP8" type: boolean default: false qwen3-5-397b-a17b-fp8: - description: "Benchmark Qwen3.5-397B-A17B-FP8" + description: "Qwen3.5-397B-A17B-FP8 TP8" type: boolean default: false oot_image: - description: "Prebuilt OOT benchmark image to pull when no custom rebuild is needed" + description: "Prebuilt OOT benchmark image used only for main-branch runs; ignored for non-main branches because a custom image is rebuilt from the selected branch" type: string default: "rocm/atom-dev:vllm-latest" param_lists: @@ -132,10 +132,13 @@ jobs: "${OOT_IMAGE_SOURCE}" >> "$GITHUB_STEP_SUMMARY" if [[ "${REBUILD_OOT_IMAGE}" == "true" ]]; then - printf -- '- Base image: `%s`\n- vLLM version: `%s`\n- vLLM commit: `%s`\n- vLLM source: `.github/workflows/docker-release.yaml`\n' \ + printf -- '- Base image: `%s`\n- vLLM version: `%s`\n- vLLM commit: `%s`\n- vLLM source: `.github/workflows/docker-release.yaml`\n- Ignored prebuilt image input: `%s`\n' \ "${OOT_BASE_IMAGE}" \ "${SELECTED_VLLM_VERSION}" \ - "${SELECTED_VLLM_COMMIT}" >> "$GITHUB_STEP_SUMMARY" + "${SELECTED_VLLM_COMMIT}" \ + "${PREBUILT_OOT_IMAGE}" >> "$GITHUB_STEP_SUMMARY" + printf -- '\nBecause branch `%s` is not `main`, a custom OOT docker is rebuilt from the selected branch and the prebuilt image input is ignored.\n' \ + "${ATOM_REF}" >> "$GITHUB_STEP_SUMMARY" else printf -- '- Prebuilt image: `%s`\n- Expected vLLM version: `%s`\n- Expected vLLM commit: `%s`\n' \ "${PREBUILT_OOT_IMAGE}" \ From 80bff24ac293a715909e2fe30995d4bef63d8278 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 26 Mar 2026 12:13:56 +0800 Subject: [PATCH 9/9] refine the docker remove logic and rebuild logic Signed-off-by: zejunchen-zejun --- .github/workflows/atom-vllm-oot-benchmark.yaml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/atom-vllm-oot-benchmark.yaml b/.github/workflows/atom-vllm-oot-benchmark.yaml index 8a2245214..ea601ac9c 100644 --- a/.github/workflows/atom-vllm-oot-benchmark.yaml +++ b/.github/workflows/atom-vllm-oot-benchmark.yaml @@ -231,8 +231,11 @@ jobs: build-oot-image: name: Build custom OOT benchmark image - needs: [resolve-atom-source] - if: needs.resolve-atom-source.outputs.rebuild_oot_image == 'true' + needs: [resolve-atom-source, load-models] + if: >- + needs.resolve-atom-source.outputs.rebuild_oot_image == 'true' + && needs.load-models.result == 'success' + && needs.load-models.outputs.has_enabled_models == 'true' runs-on: build-only-atom outputs: oot_image_tag: ${{ steps.image-meta.outputs.oot_image_tag }} @@ -672,7 +675,11 @@ jobs: docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true docker stop "$CONTAINER_NAME" || true docker rm "$CONTAINER_NAME" || true - docker rmi "$OOT_IMAGE_TAG" || true + if [[ "${OOT_IMAGE_SOURCE}" == "rebuild" ]]; then + docker rmi "$OOT_IMAGE_TAG" || true + else + echo "Keeping prebuilt OOT image cached on runner: ${OOT_IMAGE_TAG}" + fi summarize-benchmark-result: if: always()