diff --git a/.github/scripts/discover_rocm_from_torch_index.py b/.github/scripts/discover_rocm_from_torch_index.py new file mode 100644 index 0000000000000..d94870c0c550b --- /dev/null +++ b/.github/scripts/discover_rocm_from_torch_index.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +discover_rocm_from_torch_index.py + +Parse a ROCm PyTorch wheel index HTML, pick the latest torch wheel matching an +optional PEP 440-style prefix (from release/x.y branches), and emit step outputs +via GITHUB_OUTPUT (or legacy ::set-output when that variable is unset). + +Usage (from repo root, as in GitHub Actions): + + python3 .github/scripts/discover_rocm_from_torch_index.py \\ + --index-url \\ + --amdgpu-family \\ + [--torch-version-prefix ] +""" + +from __future__ import annotations + +import argparse +import os +import re +import urllib.parse +import urllib.request +from typing import Any + + +def _version_sort_key(v: str) -> tuple[int, ...]: + try: + return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) + except (ValueError, AttributeError): + return (0,) + + +def discover_rocm_version( + index_url: str, + gpu_family: str, + torch_version_prefix: str, + *, + timeout_s: int = 60, +) -> tuple[str, str]: + """Return (rocm_version, latest_torch_wheel_version_string).""" + url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" + print(f"Fetching torch index: {url}") + html = urllib.request.urlopen(url, timeout=timeout_s).read().decode() + + pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) + versions: list[str] = [] + for m in pattern.finditer(html): + ver = urllib.parse.unquote(m.group(1).split("-")[0]) + if "+rocm" in ver: + versions.append(ver) + + if torch_version_prefix: + versions = [v for v in versions if v.split("+")[0].startswith(torch_version_prefix)] + + if not versions: + print(f"::error::No torch wheels found (prefix={torch_version_prefix!r})") + raise SystemExit(1) + + latest = max(versions, key=_version_sort_key) + match = re.search(r"\+rocm(.+)", latest) + if not match: + print(f"::error::Could not parse ROCm suffix from wheel version {latest!r}") + raise SystemExit(1) + rocm_ver = match.group(1) + + print(f"Latest torch wheel: {latest}") + print(f"Discovered ROCm version: {rocm_ver}") + return rocm_ver, latest + + +def set_output(name: str, val: Any) -> None: + print(f"Setting output {name}={val}") + if os.getenv("GITHUB_OUTPUT"): + with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env: + print(f"{name}={val}", file=env) + else: + print(f"::set-output name={name}::{val}") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Discover ROCm version from a PyTorch ROCm wheel index page.", + ) + parser.add_argument( + "--index-url", + required=True, + help="Base index URL (e.g. https://rocm.nightlies.amd.com/v2-staging)", + ) + parser.add_argument( + "--amdgpu-family", + required=True, + help="GPU family subdirectory under the index (e.g. gfx94X-dcgpu)", + ) + parser.add_argument( + "--torch-version-prefix", + default="", + help="If set, only wheels whose version starts with this prefix (e.g. 2.11)", + ) + args = parser.parse_args() + + rocm_ver, latest = discover_rocm_version( + args.index_url, + args.amdgpu_family, + args.torch_version_prefix, + ) + set_output("rocm_version", rocm_ver) + set_output("torch_wheel_version", latest) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/_build_portable_linux_pytorch_docker.yml b/.github/workflows/_build_portable_linux_pytorch_docker.yml new file mode 100644 index 0000000000000..e0c3c36072d13 --- /dev/null +++ b/.github/workflows/_build_portable_linux_pytorch_docker.yml @@ -0,0 +1,231 @@ +# Reusable workflow: build and push one Portable Linux PyTorch Docker image. +# Triggered only via workflow_call (see build_portable_linux_pytorch_dockers.yml parent). +# Strategy B — inputs are required on the callee (no default: here); callers supply literals. +# Use rocm_version: auto to run index discovery (parent normalizes empty dispatch input to auto). + +name: Build Portable Linux PyTorch Docker (reusable) + +on: + workflow_call: + inputs: + pytorch_repo: + description: GitHub repo to clone into the image (e.g. pytorch/pytorch) + required: true + type: string + pytorch_branch: + description: Branch or ref to clone + required: true + type: string + python_version: + required: true + type: string + amdgpu_family: + required: true + type: string + rocm_version: + description: Concrete ROCm version string, or 'auto' to discover from the wheel index + required: true + type: string + index_url: + description: Base URL for PyTorch wheels index + required: true + type: string + base_image: + description: Base image for the Dockerfile (e.g. ubuntu:24.04) + required: true + type: string + registry: + description: Docker registry host (e.g. docker.io) + required: true + type: string + image_name: + description: Image repository path without registry (e.g. org/name) + required: true + type: string + job_display_name: + description: Value for jobs.build-docker.name (parent-defined for UI) + required: true + type: string + summary_title: + description: Markdown heading title for the post-build summary (without leading ##) + required: true + type: string + +permissions: + contents: read + +run-name: >- + ${{ format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', + inputs.amdgpu_family, inputs.pytorch_repo, inputs.pytorch_branch, inputs.rocm_version) }} + +jobs: + build-docker: + name: ${{ inputs.job_display_name }} + runs-on: ubuntu-latest + steps: + - name: Checkout workflow files + uses: actions/checkout@v4 + + - name: Checkout PyTorch source + uses: actions/checkout@v4 + with: + repository: ${{ inputs.pytorch_repo }} + ref: ${{ inputs.pytorch_branch }} + path: pytorch-src + fetch-depth: 1 + + - name: Derive torch version prefix from branch + id: prefix + env: + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + run: | + BRANCH="$PYTORCH_BRANCH" + if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then + echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT + echo "Derived torch prefix: ${BASH_REMATCH[1]}" + else + echo "value=" >> $GITHUB_OUTPUT + echo "No prefix (nightly/main branch)" + fi + + - name: Discover ROCm version from index + id: discover + if: ${{ inputs.rocm_version == 'auto' || inputs.rocm_version == '' }} + env: + INDEX_URL: ${{ inputs.index_url }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + PREFIX_VALUE: ${{ steps.prefix.outputs.value }} + run: | + python3 .github/scripts/discover_rocm_from_torch_index.py \ + --index-url "$INDEX_URL" \ + --amdgpu-family "$AMDGPU_FAMILY" \ + --torch-version-prefix "$PREFIX_VALUE" + + - name: Resolve ROCm version + id: rocm + env: + ROCM_INPUT: ${{ inputs.rocm_version }} + DISCOVER_ROCM: ${{ steps.discover.outputs.rocm_version }} + DISCOVER_TORCH: ${{ steps.discover.outputs.torch_wheel_version }} + run: | + if [ "$ROCM_INPUT" = "auto" ] || [ -z "$ROCM_INPUT" ]; then + echo "version=${DISCOVER_ROCM}" >> $GITHUB_OUTPUT + echo "torch_wheel=${DISCOVER_TORCH}" >> $GITHUB_OUTPUT + else + echo "version=${ROCM_INPUT}" >> $GITHUB_OUTPUT + echo "torch_wheel=" >> $GITHUB_OUTPUT + fi + + - name: Resolve commit SHA + id: meta + run: | + COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" + echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT + + - name: Generate Docker image tag + id: docker-tag + env: + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + BASE_IMAGE: ${{ inputs.base_image }} + PYTHON_VERSION: ${{ inputs.python_version }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + REGISTRY: ${{ inputs.registry }} + IMAGE_NAME: ${{ inputs.image_name }} + ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} + PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} + run: | + BRANCH="$PYTORCH_BRANCH" + BRANCH_SAFE="${BRANCH//\//-}" + OS=$(echo "$BASE_IMAGE" | tr -d ':' | tr '/' '-') + + IMAGE_TAG="pytorch-${BRANCH_SAFE}-${PYTORCH_COMMIT}-rocm${ROCM_RESOLVED_VERSION}-${OS}-py${PYTHON_VERSION}-${AMDGPU_FAMILY}" + IMAGE_TAG="${IMAGE_TAG//+/-}" + IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" + echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "image=${IMAGE}" >> $GITHUB_OUTPUT + echo "Generated image tag: ${IMAGE_TAG}" + echo "Full image ref: ${IMAGE}" + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERUSERNAME }} + password: ${{ secrets.DOCKERTOKEN }} + + - name: Prepare build context + run: | + cp dockerfiles/Dockerfile pytorch-src/ + mkdir -p pytorch-src/.github/scripts + cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ + cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ + + - name: Build Docker image + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + PYTORCH_REPO: ${{ inputs.pytorch_repo }} + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} + BASE_IMAGE: ${{ inputs.base_image }} + ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + PYTHON_VERSION: ${{ inputs.python_version }} + INDEX_URL: ${{ inputs.index_url }} + TORCH_VERSION_PREFIX: ${{ steps.prefix.outputs.value }} + run: | + docker build \ + --file pytorch-src/Dockerfile \ + --tag "${IMAGE}" \ + --label "pytorch.repo=${PYTORCH_REPO}" \ + --label "pytorch.branch=${PYTORCH_BRANCH}" \ + --label "pytorch.commit=${PYTORCH_COMMIT}" \ + --build-arg "BASE_IMAGE=${BASE_IMAGE}" \ + --build-arg "ROCM_VERSION=${ROCM_RESOLVED_VERSION}" \ + --build-arg "AMDGPU_FAMILY=${AMDGPU_FAMILY}" \ + --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \ + --build-arg "INDEX_URL=${INDEX_URL}" \ + --build-arg "TORCH_VERSION_PREFIX=${TORCH_VERSION_PREFIX}" \ + pytorch-src + + echo "Docker image built successfully: ${IMAGE}" + + - name: Get ROCm packages info + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + run: | + docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" + + - name: Push Docker image + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + run: | + docker push "${IMAGE}" + echo "Docker image pushed successfully" + + - name: Post-build summary + env: + IMAGE: ${{ steps.docker-tag.outputs.image }} + TORCH_WHEEL: ${{ steps.rocm.outputs.torch_wheel }} + SUMMARY_TITLE: ${{ inputs.summary_title }} + PYTORCH_REPO: ${{ inputs.pytorch_repo }} + PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} + AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} + PYTHON_VERSION: ${{ inputs.python_version }} + INDEX_URL: ${{ inputs.index_url }} + ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} + PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} + run: | + echo "## ${SUMMARY_TITLE}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY + echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY + echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY + if [ -n "$TORCH_WHEEL" ]; then + echo "| Torch Wheel | ${TORCH_WHEEL} |" >> $GITHUB_STEP_SUMMARY + fi + echo "| PyTorch Repo | ${PYTORCH_REPO} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Branch | ${PYTORCH_BRANCH} |" >> $GITHUB_STEP_SUMMARY + echo "| PyTorch Commit | ${PYTORCH_COMMIT} |" >> $GITHUB_STEP_SUMMARY + echo "| AMDGPU Family | ${AMDGPU_FAMILY} |" >> $GITHUB_STEP_SUMMARY + echo "| Python | ${PYTHON_VERSION} |" >> $GITHUB_STEP_SUMMARY + echo "| ROCm | ${ROCM_RESOLVED_VERSION} |" >> $GITHUB_STEP_SUMMARY + echo "| Index URL | ${INDEX_URL} |" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/build_portable_linux_pytorch_dockers.yml b/.github/workflows/build_portable_linux_pytorch_dockers.yml index d5c9a94c3b1ad..bfb0f56965d8d 100644 --- a/.github/workflows/build_portable_linux_pytorch_dockers.yml +++ b/.github/workflows/build_portable_linux_pytorch_dockers.yml @@ -1,427 +1,109 @@ -name: Build Portable Linux PyTorch Dockers - -on: - schedule: - - cron: "0 6 * * *" # daily at 06:00 UTC - workflow_dispatch: - inputs: - pytorch_repo: - description: "GitHub repo to clone into the image (e.g. 'pytorch/pytorch' or 'ROCm/pytorch')" - type: string - default: "pytorch/pytorch" - pytorch_branch: - description: "Branch to clone. Default 'nightly' matches theRock wheel builds. For releases use ROCm/pytorch with 'release/2.11', 'release/2.10', etc." - type: string - default: "nightly" - python_version: - type: choice - options: - - "3.12" - - "3.10" - - "3.11" - - "3.13" - - "3.14" - default: "3.12" - amdgpu_family: - type: choice - options: - - gfx950-dcgpu - - gfx94X-dcgpu - - gfx90X-dcgpu - - gfx120X-all - - gfx110X-all - - gfx110X-dgpu - - gfx103X-dgpu - - gfx101X-dgpu - default: gfx94X-dcgpu - rocm_version: - description: "ROCm version (e.g. '7.13.0a20260413'). Leave empty to auto-discover from the latest available torch wheel." - type: string - index_url: - description: Base URL for PyTorch wheels index - type: string - default: "https://rocm.nightlies.amd.com/v2-staging" - -permissions: - contents: read - -run-name: >- - ${{ github.event_name == 'schedule' && 'Nightly Docker builds' || - format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', - inputs.amdgpu_family || 'gfx950-dcgpu', - inputs.pytorch_repo || 'pytorch/pytorch', - inputs.pytorch_branch || 'nightly', - inputs.rocm_version || 'auto') }} - -env: - REGISTRY: docker.io - IMAGE_NAME: rocm/pytorch-private - DEFAULT_AMDGPU_FAMILY: gfx950-dcgpu - DEFAULT_PYTHON_VERSION: "3.12" - DEFAULT_INDEX_URL: "https://rocm.nightlies.amd.com/v2-staging" - DEFAULT_BASE_IMAGE: "ubuntu:24.04" - -jobs: - # ── Nightly matrix build (schedule only) ───────────────────────────────── - nightly-matrix: - if: github.event_name == 'schedule' - strategy: - fail-fast: false - matrix: - include: - - pytorch_repo: pytorch/pytorch - pytorch_branch: nightly - label: nightly - - pytorch_repo: ROCm/pytorch - pytorch_branch: release/2.11 - label: "2.11" - - pytorch_repo: ROCm/pytorch - pytorch_branch: release/2.10 - label: "2.10" - - pytorch_repo: ROCm/pytorch - pytorch_branch: release/2.9 - label: "2.9" - name: "Nightly | torch ${{ matrix.label }} | MI355" - runs-on: ubuntu-latest - steps: - - name: Checkout workflow files - uses: actions/checkout@v4 - - - name: Checkout PyTorch source - uses: actions/checkout@v4 - with: - repository: ${{ matrix.pytorch_repo }} - ref: ${{ matrix.pytorch_branch }} - path: pytorch-src - fetch-depth: 1 - - - name: Derive torch version prefix from branch - id: prefix - run: | - BRANCH="${{ matrix.pytorch_branch }}" - if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then - echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT - echo "Derived torch prefix: ${BASH_REMATCH[1]}" - else - echo "value=" >> $GITHUB_OUTPUT - echo "No prefix (nightly/main branch)" - fi - - - name: Discover ROCm version from index - id: discover - run: | - python3 - "${{ env.DEFAULT_INDEX_URL }}" "${{ env.DEFAULT_AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' - import re, sys, urllib.request, urllib.parse - - index_url, gpu_family = sys.argv[1], sys.argv[2] - prefix = sys.argv[3] if len(sys.argv) > 3 else "" - - url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" - print(f"Fetching torch index: {url}") - html = urllib.request.urlopen(url, timeout=60).read().decode() - - pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) - versions = [] - for m in pattern.finditer(html): - ver = urllib.parse.unquote(m.group(1).split("-")[0]) - if "+rocm" in ver: - versions.append(ver) - - if prefix: - versions = [v for v in versions if v.split("+")[0].startswith(prefix)] - - if not versions: - print(f"::error::No torch wheels found (prefix={prefix!r})") - sys.exit(1) - - def key(v): - try: - return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) - except (ValueError, AttributeError): - return (0,) - - latest = max(versions, key=key) - rocm_ver = re.search(r"\+rocm(.+)", latest).group(1) - - print(f"Latest torch wheel: {latest}") - print(f"Discovered ROCm version: {rocm_ver}") - - import os - with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write(f"rocm_version={rocm_ver}\n") - f.write(f"torch_wheel_version={latest}\n") - PYEOF - - - name: Resolve config - id: cfg - run: | - echo "amdgpu_family=${{ env.DEFAULT_AMDGPU_FAMILY }}" >> $GITHUB_OUTPUT - echo "python_version=${{ env.DEFAULT_PYTHON_VERSION }}" >> $GITHUB_OUTPUT - echo "rocm_version=${{ steps.discover.outputs.rocm_version }}" >> $GITHUB_OUTPUT - echo "index_url=${{ env.DEFAULT_INDEX_URL }}" >> $GITHUB_OUTPUT - echo "base_image=${{ env.DEFAULT_BASE_IMAGE }}" >> $GITHUB_OUTPUT - echo "torch_prefix=${{ steps.prefix.outputs.value }}" >> $GITHUB_OUTPUT - echo "pytorch_repo=${{ matrix.pytorch_repo }}" >> $GITHUB_OUTPUT - echo "pytorch_branch=${{ matrix.pytorch_branch }}" >> $GITHUB_OUTPUT - - COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" - echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT - - - name: Generate Docker image tag - id: docker-tag - run: | - BRANCH="${{ matrix.pytorch_branch }}" - BRANCH_SAFE="${BRANCH//\//-}" - COMMIT="${{ steps.cfg.outputs.pytorch_commit }}" - ROCM_VERSION="${{ steps.cfg.outputs.rocm_version }}" - PYTHON_VERSION="${{ steps.cfg.outputs.python_version }}" - GFX="${{ steps.cfg.outputs.amdgpu_family }}" - BASE_IMAGE="${{ steps.cfg.outputs.base_image }}" - OS=$(echo "${BASE_IMAGE}" | tr -d ':' | tr '/' '-') - - IMAGE_TAG="pytorch-${BRANCH_SAFE}-${COMMIT}-rocm${ROCM_VERSION}-${OS}-py${PYTHON_VERSION}-${GFX}" - IMAGE_TAG="${IMAGE_TAG//+/-}" - echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT - echo "Generated image tag: ${IMAGE_TAG}" - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERUSERNAME }} - password: ${{ secrets.DOCKERTOKEN }} - - - name: Prepare build context - run: | - cp dockerfiles/Dockerfile pytorch-src/ - mkdir -p pytorch-src/.github/scripts - cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ - cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ - - - name: Build Docker image - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - - docker build \ - --file pytorch-src/Dockerfile \ - --tag "${IMAGE}" \ - --label "pytorch.repo=${{ matrix.pytorch_repo }}" \ - --label "pytorch.branch=${{ matrix.pytorch_branch }}" \ - --label "pytorch.commit=${{ steps.cfg.outputs.pytorch_commit }}" \ - --build-arg "BASE_IMAGE=${{ steps.cfg.outputs.base_image }}" \ - --build-arg "ROCM_VERSION=${{ steps.cfg.outputs.rocm_version }}" \ - --build-arg "AMDGPU_FAMILY=${{ steps.cfg.outputs.amdgpu_family }}" \ - --build-arg "PYTHON_VERSION=${{ steps.cfg.outputs.python_version }}" \ - --build-arg "INDEX_URL=${{ steps.cfg.outputs.index_url }}" \ - --build-arg "TORCH_VERSION_PREFIX=${{ steps.prefix.outputs.value }}" \ - pytorch-src - - echo "Docker image built successfully: ${IMAGE}" - - - name: Get ROCm packages info - id: rocm-packages - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - ROCM_PACKAGES=$(docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found") - echo "rocm_packages<> $GITHUB_OUTPUT - echo "${ROCM_PACKAGES}" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - echo "ROCm packages:" - echo "${ROCM_PACKAGES}" - - - name: Push Docker image - run: | - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} - echo "Docker image pushed successfully" - - - name: Post-build summary - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - echo "## PyTorch Docker Build Summary — ${{ matrix.label }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY - echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY - echo "| Torch Wheel | ${{ steps.discover.outputs.torch_wheel_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Repo | ${{ matrix.pytorch_repo }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Branch | ${{ matrix.pytorch_branch }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Commit | ${{ steps.cfg.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY - echo "| AMDGPU Family | ${{ steps.cfg.outputs.amdgpu_family }} |" >> $GITHUB_STEP_SUMMARY - echo "| Python | ${{ steps.cfg.outputs.python_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| ROCm (discovered) | ${{ steps.cfg.outputs.rocm_version }} |" >> $GITHUB_STEP_SUMMARY - - # ── Single image build (manual dispatch) ────────────────────────────────── - build-docker: - if: github.event_name == 'workflow_dispatch' - name: "Build | ${{ inputs.amdgpu_family }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" - runs-on: ubuntu-latest - steps: - - name: Checkout workflow files - uses: actions/checkout@v4 - - - name: Checkout PyTorch source - uses: actions/checkout@v4 - with: - repository: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} - ref: ${{ inputs.pytorch_branch || 'nightly' }} - path: pytorch-src - fetch-depth: 1 - - - name: Derive torch version prefix from branch - id: prefix - run: | - BRANCH="${{ inputs.pytorch_branch || 'nightly' }}" - if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then - echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT - echo "Derived torch prefix: ${BASH_REMATCH[1]}" - else - echo "value=" >> $GITHUB_OUTPUT - echo "No prefix (nightly/main branch)" - fi - - - name: Discover ROCm version from index - id: discover - if: ${{ !inputs.rocm_version }} - run: | - python3 - "${{ inputs.index_url || env.DEFAULT_INDEX_URL }}" "${{ inputs.amdgpu_family || env.DEFAULT_AMDGPU_FAMILY }}" "${{ steps.prefix.outputs.value }}" <<'PYEOF' - import re, sys, urllib.request, urllib.parse - - index_url, gpu_family = sys.argv[1], sys.argv[2] - prefix = sys.argv[3] if len(sys.argv) > 3 else "" - - url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" - print(f"Fetching torch index: {url}") - html = urllib.request.urlopen(url, timeout=60).read().decode() - - pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) - versions = [] - for m in pattern.finditer(html): - ver = urllib.parse.unquote(m.group(1).split("-")[0]) - if "+rocm" in ver: - versions.append(ver) - - if prefix: - versions = [v for v in versions if v.split("+")[0].startswith(prefix)] - - if not versions: - print(f"::error::No torch wheels found (prefix={prefix!r})") - sys.exit(1) - - def key(v): - try: - return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) - except (ValueError, AttributeError): - return (0,) - - latest = max(versions, key=key) - rocm_ver = re.search(r"\+rocm(.+)", latest).group(1) - - print(f"Latest torch wheel: {latest}") - print(f"Discovered ROCm version: {rocm_ver}") - - import os - with open(os.environ["GITHUB_OUTPUT"], "a") as f: - f.write(f"rocm_version={rocm_ver}\n") - f.write(f"torch_wheel_version={latest}\n") - PYEOF - - - name: Resolve inputs with defaults - id: cfg - run: | - echo "amdgpu_family=${{ inputs.amdgpu_family || env.DEFAULT_AMDGPU_FAMILY }}" >> $GITHUB_OUTPUT - echo "python_version=${{ inputs.python_version || env.DEFAULT_PYTHON_VERSION }}" >> $GITHUB_OUTPUT - - # Use explicit rocm_version if provided, otherwise use discovered version - ROCM="${{ inputs.rocm_version || steps.discover.outputs.rocm_version }}" - echo "rocm_version=${ROCM}" >> $GITHUB_OUTPUT - - echo "index_url=${{ inputs.index_url || env.DEFAULT_INDEX_URL }}" >> $GITHUB_OUTPUT - echo "base_image=${{ env.DEFAULT_BASE_IMAGE }}" >> $GITHUB_OUTPUT - echo "torch_prefix=${{ steps.prefix.outputs.value }}" >> $GITHUB_OUTPUT - echo "pytorch_repo=${{ inputs.pytorch_repo || 'pytorch/pytorch' }}" >> $GITHUB_OUTPUT - echo "pytorch_branch=${{ inputs.pytorch_branch || 'nightly' }}" >> $GITHUB_OUTPUT - - COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" - echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT - - - name: Generate Docker image tag - id: docker-tag - run: | - BRANCH="${{ steps.cfg.outputs.pytorch_branch }}" - BRANCH_SAFE="${BRANCH//\//-}" - COMMIT="${{ steps.cfg.outputs.pytorch_commit }}" - ROCM_VERSION="${{ steps.cfg.outputs.rocm_version }}" - PYTHON_VERSION="${{ steps.cfg.outputs.python_version }}" - GFX="${{ steps.cfg.outputs.amdgpu_family }}" - BASE_IMAGE="${{ steps.cfg.outputs.base_image }}" - OS=$(echo "${BASE_IMAGE}" | tr -d ':' | tr '/' '-') - - IMAGE_TAG="pytorch-${BRANCH_SAFE}-${COMMIT}-rocm${ROCM_VERSION}-${OS}-py${PYTHON_VERSION}-${GFX}" - IMAGE_TAG="${IMAGE_TAG//+/-}" - echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT - echo "Generated image tag: ${IMAGE_TAG}" - - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERUSERNAME }} - password: ${{ secrets.DOCKERTOKEN }} - - - name: Prepare build context - run: | - cp dockerfiles/Dockerfile pytorch-src/ - mkdir -p pytorch-src/.github/scripts - cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ - cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ - - - name: Build Docker image - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - - docker build \ - --file pytorch-src/Dockerfile \ - --tag "${IMAGE}" \ - --label "pytorch.repo=${{ steps.cfg.outputs.pytorch_repo }}" \ - --label "pytorch.branch=${{ steps.cfg.outputs.pytorch_branch }}" \ - --label "pytorch.commit=${{ steps.cfg.outputs.pytorch_commit }}" \ - --build-arg "BASE_IMAGE=${{ steps.cfg.outputs.base_image }}" \ - --build-arg "ROCM_VERSION=${{ steps.cfg.outputs.rocm_version }}" \ - --build-arg "AMDGPU_FAMILY=${{ steps.cfg.outputs.amdgpu_family }}" \ - --build-arg "PYTHON_VERSION=${{ steps.cfg.outputs.python_version }}" \ - --build-arg "INDEX_URL=${{ steps.cfg.outputs.index_url }}" \ - --build-arg "TORCH_VERSION_PREFIX=${{ steps.cfg.outputs.torch_prefix }}" \ - pytorch-src - - echo "Docker image built successfully: ${IMAGE}" - - - name: Get ROCm packages info - id: rocm-packages - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - ROCM_PACKAGES=$(docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found") - echo "rocm_packages<> $GITHUB_OUTPUT - echo "${ROCM_PACKAGES}" >> $GITHUB_OUTPUT - echo "EOF" >> $GITHUB_OUTPUT - echo "ROCm packages:" - echo "${ROCM_PACKAGES}" - - - name: Push Docker image - run: | - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }} - echo "Docker image pushed successfully" - - - name: Post-build summary - run: | - IMAGE="${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker-tag.outputs.tag }}" - echo "## PyTorch Docker Build Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY - echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Repo | ${{ steps.cfg.outputs.pytorch_repo }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Branch | ${{ steps.cfg.outputs.pytorch_branch }} |" >> $GITHUB_STEP_SUMMARY - echo "| PyTorch Commit | ${{ steps.cfg.outputs.pytorch_commit }} |" >> $GITHUB_STEP_SUMMARY - echo "| AMDGPU Family | ${{ steps.cfg.outputs.amdgpu_family }} |" >> $GITHUB_STEP_SUMMARY - echo "| Python | ${{ steps.cfg.outputs.python_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| ROCm | ${{ steps.cfg.outputs.rocm_version }} |" >> $GITHUB_STEP_SUMMARY - echo "| Torch Version Prefix | ${{ steps.cfg.outputs.torch_prefix || 'latest' }} |" >> $GITHUB_STEP_SUMMARY - echo "| Index URL | ${{ steps.cfg.outputs.index_url }} |" >> $GITHUB_STEP_SUMMARY +name: Build Portable Linux PyTorch Dockers + +on: + schedule: + - cron: "0 6 * * *" # daily at 06:00 UTC + workflow_dispatch: + inputs: + pytorch_repo: + description: "GitHub repo to clone into the image (e.g. 'pytorch/pytorch' or 'ROCm/pytorch')" + type: string + default: "pytorch/pytorch" + pytorch_branch: + description: "Branch to clone. Default 'nightly' matches theRock wheel builds. For releases use ROCm/pytorch with 'release/2.11', 'release/2.10', etc." + type: string + default: "nightly" + python_version: + type: choice + options: + - "3.12" + - "3.10" + - "3.11" + - "3.13" + - "3.14" + default: "3.12" + amdgpu_family: + type: choice + options: + - gfx94X-dcgpu + - gfx950-dcgpu + - gfx90X-dcgpu + - gfx120X-all + - gfx110X-all + - gfx110X-dgpu + - gfx103X-dgpu + - gfx101X-dgpu + default: gfx94X-dcgpu + rocm_version: + description: "ROCm version (e.g. '7.13.0a20260413'). Leave empty to auto-discover from the latest available torch wheel." + type: string + index_url: + description: Base URL for PyTorch wheels index + type: string + default: "https://rocm.nightlies.amd.com/v2-staging" + +permissions: + contents: read + +# Parent workflow owns all default literals. The reusable callee receives every +# input explicitly and uses inputs.* everywhere internally. +run-name: >- + ${{ github.event_name == 'schedule' && 'Nightly Docker builds' || + format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', + inputs.amdgpu_family || 'gfx94X-dcgpu', + inputs.pytorch_repo || 'pytorch/pytorch', + inputs.pytorch_branch || 'nightly', + inputs.rocm_version || 'auto') }} + +jobs: + nightly-matrix: + if: github.event_name == 'schedule' + strategy: + fail-fast: false + matrix: + include: + - pytorch_repo: pytorch/pytorch + pytorch_branch: nightly + label: nightly + - pytorch_repo: ROCm/pytorch + pytorch_branch: release/2.11 + label: "2.11" + - pytorch_repo: ROCm/pytorch + pytorch_branch: release/2.10 + label: "2.10" + - pytorch_repo: ROCm/pytorch + pytorch_branch: release/2.9 + label: "2.9" + name: "Nightly | torch ${{ matrix.label }} | py3.12 | gfx94X-dcgpu" + uses: ./.github/workflows/_build_portable_linux_pytorch_docker.yml + secrets: inherit + with: + pytorch_repo: ${{ matrix.pytorch_repo }} + pytorch_branch: ${{ matrix.pytorch_branch }} + python_version: "3.12" + amdgpu_family: gfx94X-dcgpu + rocm_version: auto + index_url: "https://rocm.nightlies.amd.com/v2-staging" + base_image: "ubuntu:24.04" + registry: docker.io + image_name: rocm/pytorch-private + job_display_name: "Nightly | torch ${{ matrix.label }} | py3.12 | gfx94X-dcgpu" + summary_title: "PyTorch Docker Build - ${{ matrix.label }}" + + build-docker: + if: github.event_name == 'workflow_dispatch' + name: "Build | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" + uses: ./.github/workflows/_build_portable_linux_pytorch_docker.yml + secrets: inherit + with: + pytorch_repo: ${{ inputs.pytorch_repo || 'pytorch/pytorch' }} + pytorch_branch: ${{ inputs.pytorch_branch || 'nightly' }} + python_version: ${{ inputs.python_version || '3.12' }} + amdgpu_family: ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} + rocm_version: ${{ inputs.rocm_version || 'auto' }} + index_url: ${{ inputs.index_url || 'https://rocm.nightlies.amd.com/v2-staging' }} + base_image: "ubuntu:24.04" + registry: docker.io + image_name: rocm/pytorch-private + job_display_name: "Build | ${{ inputs.amdgpu_family || 'gfx94X-dcgpu' }} | ${{ inputs.pytorch_repo || 'pytorch/pytorch' }}@${{ inputs.pytorch_branch || 'nightly' }}" + summary_title: "PyTorch Docker Build Summary"