From 376dc82bd4b24a394e14c4603a3691dc500a0ea7 Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Mon, 27 Apr 2026 15:01:05 +0000 Subject: [PATCH 1/5] [CI] Refactor Docker workflow to single source of truth for config Move all configurable defaults into the workflow-level env block using inputs || 'fallback' expressions. Every job and step now references env.XXX instead of the previous DEFAULT_* variables or scattered inputs.xxx || env.DEFAULT_XXX patterns. - Consolidate defaults: PYTORCH_REPO, PYTORCH_BRANCH, PYTHON_VERSION, AMDGPU_FAMILY, ROCM_VERSION, INDEX_URL, BASE_IMAGE all live in env - Remove the "Resolve config" / "Resolve inputs with defaults" steps; only truly computed values (commit SHA, discovered ROCm version, docker tag) remain as step outputs - Change default AMDGPU family from gfx950-dcgpu to gfx94X-dcgpu - Add GFX arch and Python version to nightly matrix job names - Add comments explaining that run-name and job name fields cannot access the env context (GitHub Actions limitation) --- .../build_portable_linux_pytorch_dockers.yml | 179 ++++++++---------- 1 file changed, 76 insertions(+), 103 deletions(-) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index d5c9a94c3b1ad..43d28432f4159 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -25,8 +25,8 @@ on: amdgpu_family: type: choice options: - - gfx950-dcgpu - gfx94X-dcgpu + - gfx950-dcgpu - gfx90X-dcgpu - gfx120X-all - gfx110X-all @@ -45,21 +45,31 @@ on: permissions: contents: read +# run-name and job name cannot access the env context, so the +# inputs || 'fallback' defaults are necessarily repeated here. run-name: >- ${{ github.event_name == 'schedule' && 'Nightly Docker builds' || format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', - inputs.amdgpu_family || 'gfx950-dcgpu', + inputs.amdgpu_family || 'gfx94X-dcgpu', inputs.pytorch_repo || 'pytorch/pytorch', inputs.pytorch_branch || 'nightly', inputs.rocm_version || 'auto') }} +# ── Single source of truth for every configurable value ────────────── +# For workflow_dispatch the user's input wins; for schedule (or when an +# input is left blank) the fallback after || applies. All jobs / steps +# reference env.XXX — never raw inputs (except in run-name and job name +# where the env context is unavailable). env: REGISTRY: docker.io IMAGE_NAME: rocm/pytorch-private - DEFAULT_AMDGPU_FAMILY: gfx950-dcgpu - DEFAULT_PYTHON_VERSION: "3.12" - DEFAULT_INDEX_URL: "https://rocm.nightlies.amd.com/v2-staging" - DEFAULT_BASE_IMAGE: "ubuntu:24.04" + PYTORCH_REPO: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} + PYTORCH_BRANCH: ${{ inputs.pytorch_branch || 'nightly' }} + PYTHON_VERSION: ${{ inputs.python_version || '3.12' }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} + ROCM_VERSION: ${{ inputs.rocm_version }} + INDEX_URL: ${{ inputs.index_url || 'https://rocm.nightlies.amd.com/v2-staging' }} + BASE_IMAGE: "ubuntu:24.04" jobs: # ── Nightly matrix build (schedule only) ───────────────────────────────── @@ -81,7 +91,8 @@ jobs: - pytorch_repo: ROCm/pytorch pytorch_branch: release/2.9 label: "2.9" - name: "Nightly | torch ${{ matrix.label }} | MI355" + # job name cannot access env context — inputs || defaults repeated here + name: "Nightly | torch ${{ matrix.label }} | py${{ inputs.python_version || '3.12' }} | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }}" runs-on: ubuntu-latest steps: - name: Checkout workflow files @@ -110,8 +121,8 @@ jobs: - name: Discover ROCm version from index id: discover run: | - python3 - "${{ env.DEFAULT_INDEX_URL }}" "${{ env.DEFAULT_AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' - import re, sys, urllib.request, urllib.parse + python3 - "${{ env.INDEX_URL }}" "${{ env.AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' + import re, sys, urllib.request, urllib.parse, os index_url, gpu_family = sys.argv[1], sys.argv[2] prefix = sys.argv[3] if len(sys.argv) > 3 else "" @@ -146,24 +157,14 @@ jobs: print(f"Latest torch wheel: {latest}") print(f"Discovered ROCm version: {rocm_ver}") - import os with open(os.environ["GITHUB_OUTPUT"], "a") as f: f.write(f"rocm_version={rocm_ver}\n") f.write(f"torch_wheel_version={latest}\n") PYEOF - - name: Resolve config - id: cfg + - name: Resolve commit SHA + id: meta run: | - echo "amdgpu_family=${{ env.DEFAULT_AMDGPU_FAMILY }}" >> $GITHUB_OUTPUT - echo "python_version=${{ env.DEFAULT_PYTHON_VERSION }}" >> $GITHUB_OUTPUT - echo "rocm_version=${{ steps.discover.outputs.rocm_version }}" >> $GITHUB_OUTPUT - echo "index_url=${{ env.DEFAULT_INDEX_URL }}" >> $GITHUB_OUTPUT - echo "base_image=${{ env.DEFAULT_BASE_IMAGE }}" >> $GITHUB_OUTPUT - echo "torch_prefix=${{ steps.prefix.outputs.value }}" >> $GITHUB_OUTPUT - echo "pytorch_repo=${{ matrix.pytorch_repo }}" >> $GITHUB_OUTPUT - echo "pytorch_branch=${{ matrix.pytorch_branch }}" >> $GITHUB_OUTPUT - COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT @@ -172,14 +173,10 @@ jobs: run: | BRANCH="${{ matrix.pytorch_branch }}" BRANCH_SAFE="${BRANCH//\//-}" - COMMIT="${{ steps.cfg.outputs.pytorch_commit }}" - ROCM_VERSION="${{ steps.cfg.outputs.rocm_version }}" - PYTHON_VERSION="${{ steps.cfg.outputs.python_version }}" - GFX="${{ steps.cfg.outputs.amdgpu_family }}" - BASE_IMAGE="${{ steps.cfg.outputs.base_image }}" - OS=$(echo "${BASE_IMAGE}" | tr -d ':' | tr '/' '-') - - IMAGE_TAG="pytorch-${BRANCH_SAFE}-${COMMIT}-rocm${ROCM_VERSION}-${OS}-py${PYTHON_VERSION}-${GFX}" + ROCM_VERSION="${{ steps.discover.outputs.rocm_version }}" + OS=$(echo "${{ env.BASE_IMAGE }}" | tr -d ':' | tr '/' '-') + + IMAGE_TAG="pytorch-${BRANCH_SAFE}-${{ steps.meta.outputs.pytorch_commit }}-rocm${ROCM_VERSION}-${OS}-py${{ env.PYTHON_VERSION }}-${{ env.AMDGPU_FAMILY }}" IMAGE_TAG="${IMAGE_TAG//+/-}" echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT echo "Generated image tag: ${IMAGE_TAG}" @@ -206,27 +203,21 @@ jobs: --tag "${IMAGE}" \ --label "pytorch.repo=${{ matrix.pytorch_repo }}" \ --label "pytorch.branch=${{ matrix.pytorch_branch }}" \ - --label "pytorch.commit=${{ steps.cfg.outputs.pytorch_commit }}" \ - --build-arg "BASE_IMAGE=${{ steps.cfg.outputs.base_image }}" \ - --build-arg "ROCM_VERSION=${{ steps.cfg.outputs.rocm_version }}" \ - --build-arg "AMDGPU_FAMILY=${{ steps.cfg.outputs.amdgpu_family }}" \ - --build-arg "PYTHON_VERSION=${{ steps.cfg.outputs.python_version }}" \ - --build-arg "INDEX_URL=${{ steps.cfg.outputs.index_url }}" \ + --label "pytorch.commit=${{ steps.meta.outputs.pytorch_commit }}" \ + --build-arg "BASE_IMAGE=${{ env.BASE_IMAGE }}" \ + --build-arg "ROCM_VERSION=${{ steps.discover.outputs.rocm_version }}" \ + --build-arg "AMDGPU_FAMILY=${{ env.AMDGPU_FAMILY }}" \ + --build-arg "PYTHON_VERSION=${{ env.PYTHON_VERSION }}" \ + --build-arg "INDEX_URL=${{ env.INDEX_URL }}" \ --build-arg "TORCH_VERSION_PREFIX=${{ steps.prefix.outputs.value }}" \ pytorch-src echo "Docker image built successfully: ${IMAGE}" - name: Get ROCm packages info - id: rocm-packages run: | IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - ROCM_PACKAGES=$(docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found") - echo "rocm_packages<> $GITHUB_OUTPUT - echo "${ROCM_PACKAGES}" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - echo "ROCm packages:" - echo "${ROCM_PACKAGES}" + docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" - name: Push Docker image run: | @@ -236,7 +227,7 @@ jobs: - name: Post-build summary run: | IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - echo "## PyTorch Docker Build Summary — ${{ matrix.label }}" >> $GITHUB_STEP_SUMMARY + echo "## PyTorch Docker Build — ${{ matrix.label }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY @@ -244,15 +235,16 @@ jobs: echo "| Torch Wheel | ${{ steps.discover.outputs.torch_wheel_version }} |" >> $GITHUB_STEP_SUMMARY echo "| PyTorch Repo | ${{ matrix.pytorch_repo }} |" >> $GITHUB_STEP_SUMMARY echo "| PyTorch Branch | ${{ matrix.pytorch_branch }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Commit | ${{ steps.cfg.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY - echo "| AMDGPU Family | ${{ steps.cfg.outputs.amdgpu_family }} |" >> $GITHUB_STEP_SUMMARY - echo "| Python | ${{ steps.cfg.outputs.python_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| ROCm (discovered) | ${{ steps.cfg.outputs.rocm_version }} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Commit | ${{ steps.meta.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY + echo "| AMDGPU Family | ${{ env.AMDGPU_FAMILY }} |" >> $GITHUB_STEP_SUMMARY + echo "| Python | ${{ env.PYTHON_VERSION }} |" >> $GITHUB_STEP_SUMMARY + echo "| ROCm (discovered) | ${{ steps.discover.outputs.rocm_version }} |" >> $GITHUB_STEP_SUMMARY # ── Single image build (manual dispatch) ────────────────────────────────── build-docker: if: github.event_name == 'workflow_dispatch' - name: "Build | ${{ inputs.amdgpu_family }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" + # job name cannot access env context — inputs || defaults repeated here + name: "Build | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" runs-on: ubuntu-latest steps: - name: Checkout workflow files @@ -261,29 +253,29 @@ jobs: - name: Checkout PyTorch source uses: actions/checkout@v4 with: - repository: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} - ref: ${{ inputs.pytorch_branch || 'nightly' }} + repository: ${{ env.PYTORCH_REPO }} + ref: ${{ env.PYTORCH_BRANCH }} path: pytorch-src fetch-depth: 1 - name: Derive torch version prefix from branch id: prefix run: | - BRANCH="${{ inputs.pytorch_branch || 'nightly' }}" + BRANCH="${{ env.PYTORCH_BRANCH }}" if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT echo "Derived torch prefix: ${BASH_REMATCH[1]}" else echo "value=" >> $GITHUB_OUTPUT - echo "No prefix (nightly/main branch)" + echo "No prefix (nightly branch)" fi - name: Discover ROCm version from index id: discover - if: ${{ !inputs.rocm_version }} + if: ${{ !env.ROCM_VERSION }} run: | - python3 - "${{ inputs.index_url || env.DEFAULT_INDEX_URL }}" "${{ inputs.amdgpu_family || env.DEFAULT_AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' - import re, sys, urllib.request, urllib.parse + python3 - "${{ env.INDEX_URL }}" "${{ env.AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' + import re, sys, urllib.request, urllib.parse, os index_url, gpu_family = sys.argv[1], sys.argv[2] prefix = sys.argv[3] if len(sys.argv) > 3 else "" @@ -318,44 +310,32 @@ jobs: print(f"Latest torch wheel: {latest}") print(f"Discovered ROCm version: {rocm_ver}") - import os with open(os.environ["GITHUB_OUTPUT"], "a") as f: f.write(f"rocm_version={rocm_ver}\n") f.write(f"torch_wheel_version={latest}\n") PYEOF - - name: Resolve inputs with defaults - id: cfg + - name: Resolve ROCm version + id: rocm run: | - echo "amdgpu_family=${{ inputs.amdgpu_family || env.DEFAULT_AMDGPU_FAMILY }}" >> $GITHUB_OUTPUT - echo "python_version=${{ inputs.python_version || env.DEFAULT_PYTHON_VERSION }}" >> $GITHUB_OUTPUT - - # Use explicit rocm_version if provided, otherwise use discovered version - ROCM="${{ inputs.rocm_version || steps.discover.outputs.rocm_version }}" - echo "rocm_version=${ROCM}" >> $GITHUB_OUTPUT - - echo "index_url=${{ inputs.index_url || env.DEFAULT_INDEX_URL }}" >> $GITHUB_OUTPUT - echo "base_image=${{ env.DEFAULT_BASE_IMAGE }}" >> $GITHUB_OUTPUT - echo "torch_prefix=${{ steps.prefix.outputs.value }}" >> $GITHUB_OUTPUT - echo "pytorch_repo=${{ inputs.pytorch_repo || 'pytorch/pytorch' }}" >> $GITHUB_OUTPUT - echo "pytorch_branch=${{ inputs.pytorch_branch || 'nightly' }}" >> $GITHUB_OUTPUT + ROCM="${{ env.ROCM_VERSION || steps.discover.outputs.rocm_version }}" + echo "version=${ROCM}" >> $GITHUB_OUTPUT + - name: Resolve commit SHA + id: meta + run: | COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT - name: Generate Docker image tag id: docker-tag run: | - BRANCH="${{ steps.cfg.outputs.pytorch_branch }}" + BRANCH="${{ env.PYTORCH_BRANCH }}" BRANCH_SAFE="${BRANCH//\//-}" - COMMIT="${{ steps.cfg.outputs.pytorch_commit }}" - ROCM_VERSION="${{ steps.cfg.outputs.rocm_version }}" - PYTHON_VERSION="${{ steps.cfg.outputs.python_version }}" - GFX="${{ steps.cfg.outputs.amdgpu_family }}" - BASE_IMAGE="${{ steps.cfg.outputs.base_image }}" - OS=$(echo "${BASE_IMAGE}" | tr -d ':' | tr '/' '-') - - IMAGE_TAG="pytorch-${BRANCH_SAFE}-${COMMIT}-rocm${ROCM_VERSION}-${OS}-py${PYTHON_VERSION}-${GFX}" + ROCM_VERSION="${{ steps.rocm.outputs.version }}" + OS=$(echo "${{ env.BASE_IMAGE }}" | tr -d ':' | tr '/' '-') + + IMAGE_TAG="pytorch-${BRANCH_SAFE}-${{ steps.meta.outputs.pytorch_commit }}-rocm${ROCM_VERSION}-${OS}-py${{ env.PYTHON_VERSION }}-${{ env.AMDGPU_FAMILY }}" IMAGE_TAG="${IMAGE_TAG//+/-}" echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT echo "Generated image tag: ${IMAGE_TAG}" @@ -380,29 +360,23 @@ jobs: docker build \ --file pytorch-src/Dockerfile \ --tag "${IMAGE}" \ - --label "pytorch.repo=${{ steps.cfg.outputs.pytorch_repo }}" \ - --label "pytorch.branch=${{ steps.cfg.outputs.pytorch_branch }}" \ - --label "pytorch.commit=${{ steps.cfg.outputs.pytorch_commit }}" \ - --build-arg "BASE_IMAGE=${{ steps.cfg.outputs.base_image }}" \ - --build-arg "ROCM_VERSION=${{ steps.cfg.outputs.rocm_version }}" \ - --build-arg "AMDGPU_FAMILY=${{ steps.cfg.outputs.amdgpu_family }}" \ - --build-arg "PYTHON_VERSION=${{ steps.cfg.outputs.python_version }}" \ - --build-arg "INDEX_URL=${{ steps.cfg.outputs.index_url }}" \ - --build-arg "TORCH_VERSION_PREFIX=${{ steps.cfg.outputs.torch_prefix }}" \ + --label "pytorch.repo=${{ env.PYTORCH_REPO }}" \ + --label "pytorch.branch=${{ env.PYTORCH_BRANCH }}" \ + --label "pytorch.commit=${{ steps.meta.outputs.pytorch_commit }}" \ + --build-arg "BASE_IMAGE=${{ env.BASE_IMAGE }}" \ + --build-arg "ROCM_VERSION=${{ steps.rocm.outputs.version }}" \ + --build-arg "AMDGPU_FAMILY=${{ env.AMDGPU_FAMILY }}" \ + --build-arg "PYTHON_VERSION=${{ env.PYTHON_VERSION }}" \ + --build-arg "INDEX_URL=${{ env.INDEX_URL }}" \ + --build-arg "TORCH_VERSION_PREFIX=${{ steps.prefix.outputs.value }}" \ pytorch-src echo "Docker image built successfully: ${IMAGE}" - name: Get ROCm packages info - id: rocm-packages run: | IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - ROCM_PACKAGES=$(docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found") - echo "rocm_packages<> $GITHUB_OUTPUT - echo "${ROCM_PACKAGES}" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - echo "ROCm packages:" - echo "${ROCM_PACKAGES}" + docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" - name: Push Docker image run: | @@ -417,11 +391,10 @@ jobs: echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Repo | ${{ steps.cfg.outputs.pytorch_repo }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Branch | ${{ steps.cfg.outputs.pytorch_branch }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Commit | ${{ steps.cfg.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY - echo "| AMDGPU Family | ${{ steps.cfg.outputs.amdgpu_family }} |" >> $GITHUB_STEP_SUMMARY - echo "| Python | ${{ steps.cfg.outputs.python_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| ROCm | ${{ steps.cfg.outputs.rocm_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| Torch Version Prefix | ${{ steps.cfg.outputs.torch_prefix || 'latest' }} |" >> $GITHUB_STEP_SUMMARY - echo "| Index URL | ${{ steps.cfg.outputs.index_url }} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Repo | ${{ env.PYTORCH_REPO }} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Branch | ${{ env.PYTORCH_BRANCH }} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Commit | ${{ steps.meta.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY + echo "| AMDGPU Family | ${{ env.AMDGPU_FAMILY }} |" >> $GITHUB_STEP_SUMMARY + echo "| Python | ${{ env.PYTHON_VERSION }} |" >> $GITHUB_STEP_SUMMARY + echo "| ROCm | ${{ steps.rocm.outputs.version }} |" >> $GITHUB_STEP_SUMMARY + echo "| Index URL | ${{ env.INDEX_URL }} |" >> $GITHUB_STEP_SUMMARY From e4c51ef64606511a3b43181f4a8d6c8fe7a6c35c Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 6 May 2026 04:10:38 +0000 Subject: [PATCH 2/5] Refactor Docker workflow structure for clarity and reusability - Split the Docker workflow into a parent file for scheduling and manual triggers, and a reusable callee for build logic. - Added comments to clarify the purpose of each workflow and the handling of inputs. - Ensured that all required inputs are supplied by the parent workflow for better maintainability. --- .github/workflows/build_portable_linux_pytorch_dockers.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index 43d28432f4159..194ea4a44032d 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -1,5 +1,12 @@ name: Build Portable Linux PyTorch Dockers +# Workflow split (parent vs reusable callee): +# - Parent (this file): build_portable_linux_pytorch_dockers.yml — schedule + workflow_dispatch entrypoints. +# - Callee: .github/workflows/_build_portable_linux_pytorch_docker.yml — workflow_call only; build logic moves here. +# - Defaults: Strategy B — callee workflow_call inputs are required with no default: on the callee; the parent +# supplies every value (workflow_dispatch.defaults for manual UI + explicit literals / matrix with: on schedule). +# Nested run-name / job names in the callee use inputs.* only (no duplicated || 'fallback' strings there). + on: schedule: - cron: "0 6 * * *" # daily at 06:00 UTC From 1ae4908913408777a61a52d3591930c25e4bd8d3 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 6 May 2026 04:46:28 +0000 Subject: [PATCH 3/5] Refactor ROCm version discovery in Docker workflow - Replace inline Python script with a dedicated script (`discover_rocm_from_torch_index.py`) for better maintainability and clarity. - Update environment variable handling to streamline the discovery process for ROCm versions in the Docker workflow. - Ensure consistent usage of environment variables across different jobs for improved readability. --- .../scripts/discover_rocm_from_torch_index.py | 112 ++++++++++++++++++ .../build_portable_linux_pytorch_dockers.yml | 96 +++------------ 2 files changed, 128 insertions(+), 80 deletions(-) create mode 100644 .github/scripts/discover_rocm_from_torch_index.py diff --git a/.github/scripts/discover_rocm_from_torch_index.py b/.github/scripts/discover_rocm_from_torch_index.py new file mode 100644 index 0000000000000..d94870c0c550b --- /dev/null +++ b/.github/scripts/discover_rocm_from_torch_index.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +discover_rocm_from_torch_index.py + +Parse a ROCm PyTorch wheel index HTML, pick the latest torch wheel matching an +optional PEP 440-style prefix (from release/x.y branches), and emit step outputs +via GITHUB_OUTPUT (or legacy ::set-output when that variable is unset). + +Usage (from repo root, as in GitHub Actions): + + python3 .github/scripts/discover_rocm_from_torch_index.py \\ + --index-url \\ + --amdgpu-family \\ + [--torch-version-prefix ] +""" + +from __future__ import annotations + +import argparse +import os +import re +import urllib.parse +import urllib.request +from typing import Any + + +def _version_sort_key(v: str) -> tuple[int, ...]: + try: + return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) + except (ValueError, AttributeError): + return (0,) + + +def discover_rocm_version( + index_url: str, + gpu_family: str, + torch_version_prefix: str, + *, + timeout_s: int = 60, +) -> tuple[str, str]: + """Return (rocm_version, latest_torch_wheel_version_string).""" + url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" + print(f"Fetching torch index: {url}") + html = urllib.request.urlopen(url, timeout=timeout_s).read().decode() + + pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) + versions: list[str] = [] + for m in pattern.finditer(html): + ver = urllib.parse.unquote(m.group(1).split("-")[0]) + if "+rocm" in ver: + versions.append(ver) + + if torch_version_prefix: + versions = [v for v in versions if v.split("+")[0].startswith(torch_version_prefix)] + + if not versions: + print(f"::error::No torch wheels found (prefix={torch_version_prefix!r})") + raise SystemExit(1) + + latest = max(versions, key=_version_sort_key) + match = re.search(r"\+rocm(.+)", latest) + if not match: + print(f"::error::Could not parse ROCm suffix from wheel version {latest!r}") + raise SystemExit(1) + rocm_ver = match.group(1) + + print(f"Latest torch wheel: {latest}") + print(f"Discovered ROCm version: {rocm_ver}") + return rocm_ver, latest + + +def set_output(name: str, val: Any) -> None: + print(f"Setting output {name}={val}") + if os.getenv("GITHUB_OUTPUT"): + with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env: + print(f"{name}={val}", file=env) + else: + print(f"::set-output name={name}::{val}") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Discover ROCm version from a PyTorch ROCm wheel index page.", + ) + parser.add_argument( + "--index-url", + required=True, + help="Base index URL (e.g. https://rocm.nightlies.amd.com/v2-staging)", + ) + parser.add_argument( + "--amdgpu-family", + required=True, + help="GPU family subdirectory under the index (e.g. gfx94X-dcgpu)", + ) + parser.add_argument( + "--torch-version-prefix", + default="", + help="If set, only wheels whose version starts with this prefix (e.g. 2.11)", + ) + args = parser.parse_args() + + rocm_ver, latest = discover_rocm_version( + args.index_url, + args.amdgpu_family, + args.torch_version_prefix, + ) + set_output("rocm_version", rocm_ver) + set_output("torch_wheel_version", latest) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index 194ea4a44032d..ee197e50df53b 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -127,47 +127,15 @@ jobs: - name: Discover ROCm version from index id: discover + env: + INDEX_URL: ${{ env.INDEX_URL }} + AMDGPU_FAMILY: ${{ env.AMDGPU_FAMILY }} + PREFIX_VALUE: ${{ steps.prefix.outputs.value }} run: | - python3 - "${{ env.INDEX_URL }}" "${{ env.AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' - import re, sys, urllib.request, urllib.parse, os - - index_url, gpu_family = sys.argv[1], sys.argv[2] - prefix = sys.argv[3] if len(sys.argv) > 3 else "" - - url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" - print(f"Fetching torch index: {url}") - html = urllib.request.urlopen(url, timeout=60).read().decode() - - pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) - versions = [] - for m in pattern.finditer(html): - ver = urllib.parse.unquote(m.group(1).split("-")[0]) - if "+rocm" in ver: - versions.append(ver) - - if prefix: - versions = [v for v in versions if v.split("+")[0].startswith(prefix)] - - if not versions: - print(f"::error::No torch wheels found (prefix={prefix!r})") - sys.exit(1) - - def key(v): - try: - return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) - except (ValueError, AttributeError): - return (0,) - - latest = max(versions, key=key) - rocm_ver = re.search(r"\+rocm(.+)", latest).group(1) - - print(f"Latest torch wheel: {latest}") - print(f"Discovered ROCm version: {rocm_ver}") - - with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write(f"rocm_version={rocm_ver}\n") - f.write(f"torch_wheel_version={latest}\n") - PYEOF + python3 .github/scripts/discover_rocm_from_torch_index.py \ + --index-url "$INDEX_URL" \ + --amdgpu-family "$AMDGPU_FAMILY" \ + --torch-version-prefix "$PREFIX_VALUE" - name: Resolve commit SHA id: meta @@ -280,47 +248,15 @@ jobs: - name: Discover ROCm version from index id: discover if: ${{ !env.ROCM_VERSION }} + env: + INDEX_URL: ${{ env.INDEX_URL }} + AMDGPU_FAMILY: ${{ env.AMDGPU_FAMILY }} + PREFIX_VALUE: ${{ steps.prefix.outputs.value }} run: | - python3 - "${{ env.INDEX_URL }}" "${{ env.AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' - import re, sys, urllib.request, urllib.parse, os - - index_url, gpu_family = sys.argv[1], sys.argv[2] - prefix = sys.argv[3] if len(sys.argv) > 3 else "" - - url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" - print(f"Fetching torch index: {url}") - html = urllib.request.urlopen(url, timeout=60).read().decode() - - pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) - versions = [] - for m in pattern.finditer(html): - ver = urllib.parse.unquote(m.group(1).split("-")[0]) - if "+rocm" in ver: - versions.append(ver) - - if prefix: - versions = [v for v in versions if v.split("+")[0].startswith(prefix)] - - if not versions: - print(f"::error::No torch wheels found (prefix={prefix!r})") - sys.exit(1) - - def key(v): - try: - return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) - except (ValueError, AttributeError): - return (0,) - - latest = max(versions, key=key) - rocm_ver = re.search(r"\+rocm(.+)", latest).group(1) - - print(f"Latest torch wheel: {latest}") - print(f"Discovered ROCm version: {rocm_ver}") - - with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write(f"rocm_version={rocm_ver}\n") - f.write(f"torch_wheel_version={latest}\n") - PYEOF + python3 .github/scripts/discover_rocm_from_torch_index.py \ + --index-url "$INDEX_URL" \ + --amdgpu-family "$AMDGPU_FAMILY" \ + --torch-version-prefix "$PREFIX_VALUE" - name: Resolve ROCm version id: rocm From a9eb101b045e0274582971043ed67a748e49a3ad Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Fri, 8 May 2026 04:54:54 +0000 Subject: [PATCH 4/5] Add .github/workflows/_build_portable_linux_pytorch_docker.yml as callable workflow --- .../_build_portable_linux_pytorch_docker.yml | 231 ++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 .github/workflows/_build_portable_linux_pytorch_docker.yml diff --git a/.github/workflows/_build_portable_linux_pytorch_docker.yml b/.github/workflows/_build_portable_linux_pytorch_docker.yml new file mode 100644 index 0000000000000..e0c3c36072d13 --- /dev/null +++ b/.github/workflows/_build_portable_linux_pytorch_docker.yml @@ -0,0 +1,231 @@ +# Reusable workflow: build and push one Portable Linux PyTorch Docker image. +# Triggered only via workflow_call (see build_portable_linux_pytorch_dockers.yml parent). +# Strategy B — inputs are required on the callee (no default: here); callers supply literals. +# Use rocm_version: auto to run index discovery (parent normalizes empty dispatch input to auto). + +name: Build Portable Linux PyTorch Docker (reusable) + +on: + workflow_call: + inputs: + pytorch_repo: + description: GitHub repo to clone into the image (e.g. pytorch/pytorch) + required: true + type: string + pytorch_branch: + description: Branch or ref to clone + required: true + type: string + python_version: + required: true + type: string + amdgpu_family: + required: true + type: string + rocm_version: + description: Concrete ROCm version string, or 'auto' to discover from the wheel index + required: true + type: string + index_url: + description: Base URL for PyTorch wheels index + required: true + type: string + base_image: + description: Base image for the Dockerfile (e.g. ubuntu:24.04) + required: true + type: string + registry: + description: Docker registry host (e.g. docker.io) + required: true + type: string + image_name: + description: Image repository path without registry (e.g. org/name) + required: true + type: string + job_display_name: + description: Value for jobs.build-docker.name (parent-defined for UI) + required: true + type: string + summary_title: + description: Markdown heading title for the post-build summary (without leading ##) + required: true + type: string + +permissions: + contents: read + +run-name: >- + ${{ format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', + inputs.amdgpu_family, inputs.pytorch_repo, inputs.pytorch_branch, inputs.rocm_version) }} + +jobs: + build-docker: + name: ${{ inputs.job_display_name }} + runs-on: ubuntu-latest + steps: + - name: Checkout workflow files + uses: actions/checkout@v4 + + - name: Checkout PyTorch source + uses: actions/checkout@v4 + with: + repository: ${{ inputs.pytorch_repo }} + ref: ${{ inputs.pytorch_branch }} + path: pytorch-src + fetch-depth: 1 + + - name: Derive torch version prefix from branch + id: prefix + env: + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + run: | + BRANCH="$PYTORCH_BRANCH" + if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then + echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT + echo "Derived torch prefix: ${BASH_REMATCH[1]}" + else + echo "value=" >> $GITHUB_OUTPUT + echo "No prefix (nightly/main branch)" + fi + + - name: Discover ROCm version from index + id: discover + if: ${{ inputs.rocm_version == 'auto' || inputs.rocm_version == '' }} + env: + INDEX_URL: ${{ inputs.index_url }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + PREFIX_VALUE: ${{ steps.prefix.outputs.value }} + run: | + python3 .github/scripts/discover_rocm_from_torch_index.py \ + --index-url "$INDEX_URL" \ + --amdgpu-family "$AMDGPU_FAMILY" \ + --torch-version-prefix "$PREFIX_VALUE" + + - name: Resolve ROCm version + id: rocm + env: + ROCM_INPUT: ${{ inputs.rocm_version }} + DISCOVER_ROCM: ${{ steps.discover.outputs.rocm_version }} + DISCOVER_TORCH: ${{ steps.discover.outputs.torch_wheel_version }} + run: | + if [ "$ROCM_INPUT" = "auto" ] || [ -z "$ROCM_INPUT" ]; then + echo "version=${DISCOVER_ROCM}" >> $GITHUB_OUTPUT + echo "torch_wheel=${DISCOVER_TORCH}" >> $GITHUB_OUTPUT + else + echo "version=${ROCM_INPUT}" >> $GITHUB_OUTPUT + echo "torch_wheel=" >> $GITHUB_OUTPUT + fi + + - name: Resolve commit SHA + id: meta + run: | + COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" + echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT + + - name: Generate Docker image tag + id: docker-tag + env: + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + BASE_IMAGE: ${{ inputs.base_image }} + PYTHON_VERSION: ${{ inputs.python_version }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + REGISTRY: ${{ inputs.registry }} + IMAGE_NAME: ${{ inputs.image_name }} + ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} + PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} + run: | + BRANCH="$PYTORCH_BRANCH" + BRANCH_SAFE="${BRANCH//\//-}" + OS=$(echo "$BASE_IMAGE" | tr -d ':' | tr '/' '-') + + IMAGE_TAG="pytorch-${BRANCH_SAFE}-${PYTORCH_COMMIT}-rocm${ROCM_RESOLVED_VERSION}-${OS}-py${PYTHON_VERSION}-${AMDGPU_FAMILY}" + IMAGE_TAG="${IMAGE_TAG//+/-}" + IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" + echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "image=${IMAGE}" >> $GITHUB_OUTPUT + echo "Generated image tag: ${IMAGE_TAG}" + echo "Full image ref: ${IMAGE}" + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERUSERNAME }} + password: ${{ secrets.DOCKERTOKEN }} + + - name: Prepare build context + run: | + cp dockerfiles/Dockerfile pytorch-src/ + mkdir -p pytorch-src/.github/scripts + cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ + cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ + + - name: Build Docker image + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + PYTORCH_REPO: ${{ inputs.pytorch_repo }} + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} + BASE_IMAGE: ${{ inputs.base_image }} + ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + PYTHON_VERSION: ${{ inputs.python_version }} + INDEX_URL: ${{ inputs.index_url }} + TORCH_VERSION_PREFIX: ${{ steps.prefix.outputs.value }} + run: | + docker build \ + --file pytorch-src/Dockerfile \ + --tag "${IMAGE}" \ + --label "pytorch.repo=${PYTORCH_REPO}" \ + --label "pytorch.branch=${PYTORCH_BRANCH}" \ + --label "pytorch.commit=${PYTORCH_COMMIT}" \ + --build-arg "BASE_IMAGE=${BASE_IMAGE}" \ + --build-arg "ROCM_VERSION=${ROCM_RESOLVED_VERSION}" \ + --build-arg "AMDGPU_FAMILY=${AMDGPU_FAMILY}" \ + --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \ + --build-arg "INDEX_URL=${INDEX_URL}" \ + --build-arg "TORCH_VERSION_PREFIX=${TORCH_VERSION_PREFIX}" \ + pytorch-src + + echo "Docker image built successfully: ${IMAGE}" + + - name: Get ROCm packages info + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + run: | + docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" + + - name: Push Docker image + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + run: | + docker push "${IMAGE}" + echo "Docker image pushed successfully" + + - name: Post-build summary + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + TORCH_WHEEL: ${{ steps.rocm.outputs.torch_wheel }} + SUMMARY_TITLE: ${{ inputs.summary_title }} + PYTORCH_REPO: ${{ inputs.pytorch_repo }} + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + PYTHON_VERSION: ${{ inputs.python_version }} + INDEX_URL: ${{ inputs.index_url }} + ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} + PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} + run: | + echo "## ${SUMMARY_TITLE}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY + echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY + echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY + if [ -n "$TORCH_WHEEL" ]; then + echo "| Torch Wheel | ${TORCH_WHEEL} |" >> $GITHUB_STEP_SUMMARY + fi + echo "| PyTorch Repo | ${PYTORCH_REPO} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Branch | ${PYTORCH_BRANCH} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Commit | ${PYTORCH_COMMIT} |" >> $GITHUB_STEP_SUMMARY + echo "| AMDGPU Family | ${AMDGPU_FAMILY} |" >> $GITHUB_STEP_SUMMARY + echo "| Python | ${PYTHON_VERSION} |" >> $GITHUB_STEP_SUMMARY + echo "| ROCm | ${ROCM_RESOLVED_VERSION} |" >> $GITHUB_STEP_SUMMARY + echo "| Index URL | ${INDEX_URL} |" >> $GITHUB_STEP_SUMMARY From 8818b6db1cbb70255f361ebf57f898fbe932ad2e Mon Sep 17 00:00:00 2001 From: ethanwee1 Date: Mon, 11 May 2026 17:25:15 +0000 Subject: [PATCH 5/5] [CI] Call reusable Docker workflow from parent Reduce the parent Docker workflow to schedule and workflow_dispatch callers that pass all required inputs into the reusable workflow. This completes the Strategy B split so default literals live in the parent while build logic uses inputs.* in the callee. --- .../build_portable_linux_pytorch_dockers.yml | 452 +++++------------- 1 file changed, 109 insertions(+), 343 deletions(-) diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index ee197e50df53b..bfb0f56965d8d 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -1,343 +1,109 @@ -name: Build Portable Linux PyTorch Dockers - -# Workflow split (parent vs reusable callee): -# - Parent (this file): build_portable_linux_pytorch_dockers.yml — schedule + workflow_dispatch entrypoints. -# - Callee: .github/workflows/_build_portable_linux_pytorch_docker.yml — workflow_call only; build logic moves here. -# - Defaults: Strategy B — callee workflow_call inputs are required with no default: on the callee; the parent -# supplies every value (workflow_dispatch.defaults for manual UI + explicit literals / matrix with: on schedule). -# Nested run-name / job names in the callee use inputs.* only (no duplicated || 'fallback' strings there). - -on: - schedule: - - cron: "0 6 * * *" # daily at 06:00 UTC - workflow_dispatch: - inputs: - pytorch_repo: - description: "GitHub repo to clone into the image (e.g. 'pytorch/pytorch' or 'ROCm/pytorch')" - type: string - default: "pytorch/pytorch" - pytorch_branch: - description: "Branch to clone. Default 'nightly' matches theRock wheel builds. For releases use ROCm/pytorch with 'release/2.11', 'release/2.10', etc." - type: string - default: "nightly" - python_version: - type: choice - options: - - "3.12" - - "3.10" - - "3.11" - - "3.13" - - "3.14" - default: "3.12" - amdgpu_family: - type: choice - options: - - gfx94X-dcgpu - - gfx950-dcgpu - - gfx90X-dcgpu - - gfx120X-all - - gfx110X-all - - gfx110X-dgpu - - gfx103X-dgpu - - gfx101X-dgpu - default: gfx94X-dcgpu - rocm_version: - description: "ROCm version (e.g. '7.13.0a20260413'). Leave empty to auto-discover from the latest available torch wheel." - type: string - index_url: - description: Base URL for PyTorch wheels index - type: string - default: "https://rocm.nightlies.amd.com/v2-staging" - -permissions: - contents: read - -# run-name and job name cannot access the env context, so the -# inputs || 'fallback' defaults are necessarily repeated here. -run-name: >- - ${{ github.event_name == 'schedule' && 'Nightly Docker builds' || - format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', - inputs.amdgpu_family || 'gfx94X-dcgpu', - inputs.pytorch_repo || 'pytorch/pytorch', - inputs.pytorch_branch || 'nightly', - inputs.rocm_version || 'auto') }} - -# ── Single source of truth for every configurable value ────────────── -# For workflow_dispatch the user's input wins; for schedule (or when an -# input is left blank) the fallback after || applies. All jobs / steps -# reference env.XXX — never raw inputs (except in run-name and job name -# where the env context is unavailable). -env: - REGISTRY: docker.io - IMAGE_NAME: rocm/pytorch-private - PYTORCH_REPO: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} - PYTORCH_BRANCH: ${{ inputs.pytorch_branch || 'nightly' }} - PYTHON_VERSION: ${{ inputs.python_version || '3.12' }} - AMDGPU_FAMILY: ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} - ROCM_VERSION: ${{ inputs.rocm_version }} - INDEX_URL: ${{ inputs.index_url || 'https://rocm.nightlies.amd.com/v2-staging' }} - BASE_IMAGE: "ubuntu:24.04" - -jobs: - # ── Nightly matrix build (schedule only) ───────────────────────────────── - nightly-matrix: - if: github.event_name == 'schedule' - strategy: - fail-fast: false - matrix: - include: - - pytorch_repo: pytorch/pytorch - pytorch_branch: nightly - label: nightly - - pytorch_repo: ROCm/pytorch - pytorch_branch: release/2.11 - label: "2.11" - - pytorch_repo: ROCm/pytorch - pytorch_branch: release/2.10 - label: "2.10" - - pytorch_repo: ROCm/pytorch - pytorch_branch: release/2.9 - label: "2.9" - # job name cannot access env context — inputs || defaults repeated here - name: "Nightly | torch ${{ matrix.label }} | py${{ inputs.python_version || '3.12' }} | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }}" - runs-on: ubuntu-latest - steps: - - name: Checkout workflow files - uses: actions/checkout@v4 - - - name: Checkout PyTorch source - uses: actions/checkout@v4 - with: - repository: ${{ matrix.pytorch_repo }} - ref: ${{ matrix.pytorch_branch }} - path: pytorch-src - fetch-depth: 1 - - - name: Derive torch version prefix from branch - id: prefix - run: | - BRANCH="${{ matrix.pytorch_branch }}" - if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then - echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT - echo "Derived torch prefix: ${BASH_REMATCH[1]}" - else - echo "value=" >> $GITHUB_OUTPUT - echo "No prefix (nightly/main branch)" - fi - - - name: Discover ROCm version from index - id: discover - env: - INDEX_URL: ${{ env.INDEX_URL }} - AMDGPU_FAMILY: ${{ env.AMDGPU_FAMILY }} - PREFIX_VALUE: ${{ steps.prefix.outputs.value }} - run: | - python3 .github/scripts/discover_rocm_from_torch_index.py \ - --index-url "$INDEX_URL" \ - --amdgpu-family "$AMDGPU_FAMILY" \ - --torch-version-prefix "$PREFIX_VALUE" - - - name: Resolve commit SHA - id: meta - run: | - COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" - echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT - - - name: Generate Docker image tag - id: docker-tag - run: | - BRANCH="${{ matrix.pytorch_branch }}" - BRANCH_SAFE="${BRANCH//\//-}" - ROCM_VERSION="${{ steps.discover.outputs.rocm_version }}" - OS=$(echo "${{ env.BASE_IMAGE }}" | tr -d ':' | tr '/' '-') - - IMAGE_TAG="pytorch-${BRANCH_SAFE}-${{ steps.meta.outputs.pytorch_commit }}-rocm${ROCM_VERSION}-${OS}-py${{ env.PYTHON_VERSION }}-${{ env.AMDGPU_FAMILY }}" - IMAGE_TAG="${IMAGE_TAG//+/-}" - echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT - echo "Generated image tag: ${IMAGE_TAG}" - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERUSERNAME }} - password: ${{ secrets.DOCKERTOKEN }} - - - name: Prepare build context - run: | - cp dockerfiles/Dockerfile pytorch-src/ - mkdir -p pytorch-src/.github/scripts - cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ - cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ - - - name: Build Docker image - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - - docker build \ - --file pytorch-src/Dockerfile \ - --tag "${IMAGE}" \ - --label "pytorch.repo=${{ matrix.pytorch_repo }}" \ - --label "pytorch.branch=${{ matrix.pytorch_branch }}" \ - --label "pytorch.commit=${{ steps.meta.outputs.pytorch_commit }}" \ - --build-arg "BASE_IMAGE=${{ env.BASE_IMAGE }}" \ - --build-arg "ROCM_VERSION=${{ steps.discover.outputs.rocm_version }}" \ - --build-arg "AMDGPU_FAMILY=${{ env.AMDGPU_FAMILY }}" \ - --build-arg "PYTHON_VERSION=${{ env.PYTHON_VERSION }}" \ - --build-arg "INDEX_URL=${{ env.INDEX_URL }}" \ - --build-arg "TORCH_VERSION_PREFIX=${{ steps.prefix.outputs.value }}" \ - pytorch-src - - echo "Docker image built successfully: ${IMAGE}" - - - name: Get ROCm packages info - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" - - - name: Push Docker image - run: | - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} - echo "Docker image pushed successfully" - - - name: Post-build summary - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - echo "## PyTorch Docker Build — ${{ matrix.label }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY - echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY - echo "| Torch Wheel | ${{ steps.discover.outputs.torch_wheel_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Repo | ${{ matrix.pytorch_repo }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Branch | ${{ matrix.pytorch_branch }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Commit | ${{ steps.meta.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY - echo "| AMDGPU Family | ${{ env.AMDGPU_FAMILY }} |" >> $GITHUB_STEP_SUMMARY - echo "| Python | ${{ env.PYTHON_VERSION }} |" >> $GITHUB_STEP_SUMMARY - echo "| ROCm (discovered) | ${{ steps.discover.outputs.rocm_version }} |" >> $GITHUB_STEP_SUMMARY - - # ── Single image build (manual dispatch) ────────────────────────────────── - build-docker: - if: github.event_name == 'workflow_dispatch' - # job name cannot access env context — inputs || defaults repeated here - name: "Build | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" - runs-on: ubuntu-latest - steps: - - name: Checkout workflow files - uses: actions/checkout@v4 - - - name: Checkout PyTorch source - uses: actions/checkout@v4 - with: - repository: ${{ env.PYTORCH_REPO }} - ref: ${{ env.PYTORCH_BRANCH }} - path: pytorch-src - fetch-depth: 1 - - - name: Derive torch version prefix from branch - id: prefix - run: | - BRANCH="${{ env.PYTORCH_BRANCH }}" - if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then - echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT - echo "Derived torch prefix: ${BASH_REMATCH[1]}" - else - echo "value=" >> $GITHUB_OUTPUT - echo "No prefix (nightly branch)" - fi - - - name: Discover ROCm version from index - id: discover - if: ${{ !env.ROCM_VERSION }} - env: - INDEX_URL: ${{ env.INDEX_URL }} - AMDGPU_FAMILY: ${{ env.AMDGPU_FAMILY }} - PREFIX_VALUE: ${{ steps.prefix.outputs.value }} - run: | - python3 .github/scripts/discover_rocm_from_torch_index.py \ - --index-url "$INDEX_URL" \ - --amdgpu-family "$AMDGPU_FAMILY" \ - --torch-version-prefix "$PREFIX_VALUE" - - - name: Resolve ROCm version - id: rocm - run: | - ROCM="${{ env.ROCM_VERSION || steps.discover.outputs.rocm_version }}" - echo "version=${ROCM}" >> $GITHUB_OUTPUT - - - name: Resolve commit SHA - id: meta - run: | - COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" - echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT - - - name: Generate Docker image tag - id: docker-tag - run: | - BRANCH="${{ env.PYTORCH_BRANCH }}" - BRANCH_SAFE="${BRANCH//\//-}" - ROCM_VERSION="${{ steps.rocm.outputs.version }}" - OS=$(echo "${{ env.BASE_IMAGE }}" | tr -d ':' | tr '/' '-') - - IMAGE_TAG="pytorch-${BRANCH_SAFE}-${{ steps.meta.outputs.pytorch_commit }}-rocm${ROCM_VERSION}-${OS}-py${{ env.PYTHON_VERSION }}-${{ env.AMDGPU_FAMILY }}" - IMAGE_TAG="${IMAGE_TAG//+/-}" - echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT - echo "Generated image tag: ${IMAGE_TAG}" - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERUSERNAME }} - password: ${{ secrets.DOCKERTOKEN }} - - - name: Prepare build context - run: | - cp dockerfiles/Dockerfile pytorch-src/ - mkdir -p pytorch-src/.github/scripts - cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ - cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ - - - name: Build Docker image - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - - docker build \ - --file pytorch-src/Dockerfile \ - --tag "${IMAGE}" \ - --label "pytorch.repo=${{ env.PYTORCH_REPO }}" \ - --label "pytorch.branch=${{ env.PYTORCH_BRANCH }}" \ - --label "pytorch.commit=${{ steps.meta.outputs.pytorch_commit }}" \ - --build-arg "BASE_IMAGE=${{ env.BASE_IMAGE }}" \ - --build-arg "ROCM_VERSION=${{ steps.rocm.outputs.version }}" \ - --build-arg "AMDGPU_FAMILY=${{ env.AMDGPU_FAMILY }}" \ - --build-arg "PYTHON_VERSION=${{ env.PYTHON_VERSION }}" \ - --build-arg "INDEX_URL=${{ env.INDEX_URL }}" \ - --build-arg "TORCH_VERSION_PREFIX=${{ steps.prefix.outputs.value }}" \ - pytorch-src - - echo "Docker image built successfully: ${IMAGE}" - - - name: Get ROCm packages info - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" - - - name: Push Docker image - run: | - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} - echo "Docker image pushed successfully" - - - name: Post-build summary - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - echo "## PyTorch Docker Build Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY - echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Repo | ${{ env.PYTORCH_REPO }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Branch | ${{ env.PYTORCH_BRANCH }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Commit | ${{ steps.meta.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY - echo "| AMDGPU Family | ${{ env.AMDGPU_FAMILY }} |" >> $GITHUB_STEP_SUMMARY - echo "| Python | ${{ env.PYTHON_VERSION }} |" >> $GITHUB_STEP_SUMMARY - echo "| ROCm | ${{ steps.rocm.outputs.version }} |" >> $GITHUB_STEP_SUMMARY - echo "| Index URL | ${{ env.INDEX_URL }} |" >> $GITHUB_STEP_SUMMARY +name: Build Portable Linux PyTorch Dockers + +on: + schedule: + - cron: "0 6 * * *" # daily at 06:00 UTC + workflow_dispatch: + inputs: + pytorch_repo: + description: "GitHub repo to clone into the image (e.g. 'pytorch/pytorch' or 'ROCm/pytorch')" + type: string + default: "pytorch/pytorch" + pytorch_branch: + description: "Branch to clone. Default 'nightly' matches theRock wheel builds. For releases use ROCm/pytorch with 'release/2.11', 'release/2.10', etc." + type: string + default: "nightly" + python_version: + type: choice + options: + - "3.12" + - "3.10" + - "3.11" + - "3.13" + - "3.14" + default: "3.12" + amdgpu_family: + type: choice + options: + - gfx94X-dcgpu + - gfx950-dcgpu + - gfx90X-dcgpu + - gfx120X-all + - gfx110X-all + - gfx110X-dgpu + - gfx103X-dgpu + - gfx101X-dgpu + default: gfx94X-dcgpu + rocm_version: + description: "ROCm version (e.g. '7.13.0a20260413'). Leave empty to auto-discover from the latest available torch wheel." + type: string + index_url: + description: Base URL for PyTorch wheels index + type: string + default: "https://rocm.nightlies.amd.com/v2-staging" + +permissions: + contents: read + +# Parent workflow owns all default literals. The reusable callee receives every +# input explicitly and uses inputs.* everywhere internally. +run-name: >- + ${{ github.event_name == 'schedule' && 'Nightly Docker builds' || + format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', + inputs.amdgpu_family || 'gfx94X-dcgpu', + inputs.pytorch_repo || 'pytorch/pytorch', + inputs.pytorch_branch || 'nightly', + inputs.rocm_version || 'auto') }} + +jobs: + nightly-matrix: + if: github.event_name == 'schedule' + strategy: + fail-fast: false + matrix: + include: + - pytorch_repo: pytorch/pytorch + pytorch_branch: nightly + label: nightly + - pytorch_repo: ROCm/pytorch + pytorch_branch: release/2.11 + label: "2.11" + - pytorch_repo: ROCm/pytorch + pytorch_branch: release/2.10 + label: "2.10" + - pytorch_repo: ROCm/pytorch + pytorch_branch: release/2.9 + label: "2.9" + name: "Nightly | torch ${{ matrix.label }} | py3.12 | gfx94X-dcgpu" + uses: ./.github/workflows/_build_portable_linux_pytorch_docker.yml + secrets: inherit + with: + pytorch_repo: ${{ matrix.pytorch_repo }} + pytorch_branch: ${{ matrix.pytorch_branch }} + python_version: "3.12" + amdgpu_family: gfx94X-dcgpu + rocm_version: auto + index_url: "https://rocm.nightlies.amd.com/v2-staging" + base_image: "ubuntu:24.04" + registry: docker.io + image_name: rocm/pytorch-private + job_display_name: "Nightly | torch ${{ matrix.label }} | py3.12 | gfx94X-dcgpu" + summary_title: "PyTorch Docker Build - ${{ matrix.label }}" + + build-docker: + if: github.event_name == 'workflow_dispatch' + name: "Build | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" + uses: ./.github/workflows/_build_portable_linux_pytorch_docker.yml + secrets: inherit + with: + pytorch_repo: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} + pytorch_branch: ${{ inputs.pytorch_branch || 'nightly' }} + python_version: ${{ inputs.python_version || '3.12' }} + amdgpu_family: ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} + rocm_version: ${{ inputs.rocm_version || 'auto' }} + index_url: ${{ inputs.index_url || 'https://rocm.nightlies.amd.com/v2-staging' }} + base_image: "ubuntu:24.04" + registry: docker.io + image_name: rocm/pytorch-private + job_display_name: "Build | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" + summary_title: "PyTorch Docker Build Summary"