Revert "[BE] Update upload bm results GHA (#7612)"

atalman · web-flow · commit ebcfe297d295 · 2025-12-22T21:01:53.000-05:00
This reverts commit 702cfd6.
diff --git a/.github/actions/upload-benchmark-results/action.yml b/.github/actions/upload-benchmark-results/action.yml
@@ -3,10 +3,7 @@ name: Upload benchmark results
 inputs:
   benchmark-results-dir:
     description: 'The path to the directory with all the results in JSON format'
-    required: true
-  benchmark-name:
-    description: 'Manually set the name of the benchmark'
-    default: ''
+    required: True
   dry-run:
     default: 'true'
   schema-version:
@@ -15,6 +12,7 @@ inputs:
     default: ''
   venv:
     description: 'Path to virtual environment to activate'
+    required: false
     default: ''
 
 runs:
@@ -28,53 +26,26 @@ runs:
         if [[ -n "${{ inputs.venv }}" ]]; then
           source "${{ inputs.venv }}"
         fi
-        python3 -mpip install boto3==1.35.33 psutil==7.0.0 nvidia-ml-py==13.580.82
+        python3 -mpip install boto3==1.35.33 psutil==7.0.0 pynvml==12.0.0
 
-    - name: Get device name
-      shell: bash
-      run: |
-        set -eux
+        DEVICE_NAME=""
+        DEVICE_TYPE=""
 
         if command -v nvidia-smi; then
-          DEVICE_NAME=cuda
-          nvidia-smi
-        elif command -v rocm-smi; then
+          # NB: I'm using PyTorch here to get the device name, however, it needs to
+          # install the correct version of PyTorch manually for now. Any PyTorch
+          # version is fine, I just use 2.7.1 to satify PYPIDEP linter
+          python3 -mpip install torch==2.7.1
+        elif command -v rocminfo; then
+          # NB: Installing torch on ROCm runner with pip here causes CI to fail
+          # with a memoryview is too large error only on MI300 runners. Is pip
+          # version on ROCm runner there too old? As a workaround, let's use the
+          # GPU device name coming from rocminfo instead
           DEVICE_NAME=rocm
-          rocm-smi
-        elif command -v hl-smi; then
-          DEVICE_NAME=hpu
-          hl-smi
-        else
-          arch=$(uname -m)
-
-          case "$arch" in
-            aarch64|arm64)
-              DEVICE_NAME=arm64-cpu
-              ;;
-            *)
-              DEVICE_NAME=cpu
-              ;;
-          esac
-          lscpu
-        fi
-        echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
-
-    - name: Get device type
-      shell: bash
-      run: |
-        set -eux
-
-        if [[ "${DEVICE_NAME}" == "cuda" ]]; then
-          DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
-        elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
           DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
-        elif [[ "${DEVICE_NAME}" == "hpu" ]]; then
-          DEVICE_TYPE="Intel Gaudi3 "$(hl-smi -q | grep "Product Name" | head -n 1 | awk -F ':' '{print $2}' | sed 's/^ *//')
-        elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
-          DEVICE_TYPE="$(lscpu | grep "Model name" | sed -E 's/.*Model name:[[:space:]]*//; s/Intel\(R\)//g; s/\(R\)//g; s/\(TM\)//g; s/CPU//g; s/Processor//g; s/[[:space:]]+/ /g; s/^ //; s/ $//; s/ /_/g')_$(awk -F: '/Core\(s\) per socket/ {c=$2} /Socket\(s\)/ {s=$2} END {gsub(/ /,"",c); gsub(/ /,"",s); printf "%sc", c*s}' < <(lscpu))"
-        elif [[ "${DEVICE_NAME}" == "arm64-cpu" ]]; then
-          DEVICE_TYPE=$(lscpu | grep 'Vendor ID' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
         fi
+
+        echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
         echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
 
     - name: Check that GITHUB_TOKEN is defined
@@ -108,7 +79,6 @@ runs:
         RUN_ATTEMPT: ${{ github.run_attempt }}
         JOB_ID: ${{ inputs.github-token != '' && steps.get-job-id.outputs.job-id || '0' }}
         JOB_NAME: ${{ inputs.github-token != '' && steps.get-job-id.outputs.job-name || '' }}
-        BENCHMARK_NAME: ${{ inputs.benchmark-name || '' }}
       run: |
         set -eux
 
@@ -151,7 +121,6 @@ runs:
       shell: bash
       env:
         BENCHMARK_RESULTS_DIR: ${{ inputs.benchmark-results-dir }}
-        BENCHMARK_NAME: ${{ inputs.benchmark-name || '' }}
         DRY_RUN: ${{ inputs.dry-run }}
         # Additional information about the benchmarks
         BENCHMARK_METADATA: ${{ steps.gather-metadata.outputs.metadata }}
diff --git a/.github/scripts/benchmarks/gather_metadata.py b/.github/scripts/benchmarks/gather_metadata.py
@@ -83,7 +83,7 @@ def main() -> None:
     metadata = {
         "timestamp": int(time.time()),
         "schema_version": args.schema_version,
-        "name": os.getenv("BENCHMARK_NAME", args.job_name),
+        "name": args.job_name,
         "repo": args.repo,
         "head_branch": args.head_branch,
         "head_sha": args.head_sha,
diff --git a/.github/scripts/upload_benchmark_results.py b/.github/scripts/upload_benchmark_results.py
@@ -10,7 +10,6 @@
 import json
 import logging
 import os
-import sys
 import time
 from argparse import Action, ArgumentParser, Namespace
 from decimal import Decimal
@@ -186,7 +185,7 @@ def upload_to_dynamodb(
 
 
 def read_benchmark_results(filepath: str) -> List[Dict[str, Any]]:
-    benchmark_results: List[Dict[str, Any]] = []
+    benchmark_results = []
     with open(filepath) as f:
         try:
             r = json.load(f)
@@ -217,15 +216,6 @@ def read_benchmark_results(filepath: str) -> List[Dict[str, Any]]:
                 except JSONDecodeError:
                     warn(f"Invalid JSON {line}, skipping")
 
-    # Overwrite the benchmark name if needed
-    if os.getenv("BENCHMARK_NAME"):
-        benchmark_name = os.getenv("BENCHMARK_NAME")
-        for bresult in benchmark_results:
-            if bresult.get("benchmark", {}) and bresult.get("benchmark", {}).get(
-                "name"
-            ):
-                bresult["benchmark"]["name"] = benchmark_name
-
     return benchmark_results
 
 
@@ -329,7 +319,6 @@ def upload_to_s3(
 def main() -> None:
     args = parse_args()
 
-    has_results_uploaded = False
     for file in os.listdir(args.benchmark_results_dir):
         if not file.endswith(".json"):
             continue
@@ -360,7 +349,6 @@ def main() -> None:
         if not benchmark_results:
             continue
 
-        has_results_uploaded = True
         upload_to_s3(
             s3_bucket=OSSCI_BENCHMARKS_BUCKET,
             filepath=filepath,
@@ -369,12 +357,6 @@ def main() -> None:
             dry_run=args.dry_run,
         )
 
-    # When there is no benchmark results, treat it as a failure. This is better
-    # than failing silently.
-    if not has_results_uploaded:
-        warn(f"Find no benchmark results in {args.benchmark_results}")
-        sys.exit(1)
-
 
 if __name__ == "__main__":
     main()