Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions .github/scripts/discover_rocm_from_torch_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
discover_rocm_from_torch_index.py

Parse a ROCm PyTorch wheel index HTML, pick the latest torch wheel matching an
optional PEP 440-style prefix (from release/x.y branches), and emit step outputs
via GITHUB_OUTPUT (or legacy ::set-output when that variable is unset).

Usage (from repo root, as in GitHub Actions):

python3 .github/scripts/discover_rocm_from_torch_index.py \\
--index-url <BASE_URL> \\
--amdgpu-family <FAMILY> \\
[--torch-version-prefix <PREFIX>]
"""

from __future__ import annotations

import argparse
import os
import re
import urllib.parse
import urllib.request
from typing import Any


def _version_sort_key(v: str) -> tuple[int, ...]:
try:
return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit())
except (ValueError, AttributeError):
return (0,)


def discover_rocm_version(
index_url: str,
gpu_family: str,
torch_version_prefix: str,
*,
timeout_s: int = 60,
) -> tuple[str, str]:
"""Return (rocm_version, latest_torch_wheel_version_string)."""
url = f"{index_url.rstrip('/')}/{gpu_family}/torch/"
print(f"Fetching torch index: {url}")
html = urllib.request.urlopen(url, timeout=timeout_s).read().decode()

pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE)
versions: list[str] = []
for m in pattern.finditer(html):
ver = urllib.parse.unquote(m.group(1).split("-")[0])
if "+rocm" in ver:
versions.append(ver)

if torch_version_prefix:
versions = [v for v in versions if v.split("+")[0].startswith(torch_version_prefix)]

if not versions:
print(f"::error::No torch wheels found (prefix={torch_version_prefix!r})")
raise SystemExit(1)

latest = max(versions, key=_version_sort_key)
match = re.search(r"\+rocm(.+)", latest)
if not match:
print(f"::error::Could not parse ROCm suffix from wheel version {latest!r}")
raise SystemExit(1)
rocm_ver = match.group(1)

print(f"Latest torch wheel: {latest}")
print(f"Discovered ROCm version: {rocm_ver}")
return rocm_ver, latest


def set_output(name: str, val: Any) -> None:
print(f"Setting output {name}={val}")
if os.getenv("GITHUB_OUTPUT"):
with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
print(f"{name}={val}", file=env)
else:
print(f"::set-output name={name}::{val}")


def main() -> None:
parser = argparse.ArgumentParser(
description="Discover ROCm version from a PyTorch ROCm wheel index page.",
)
parser.add_argument(
"--index-url",
required=True,
help="Base index URL (e.g. https://rocm.nightlies.amd.com/v2-staging)",
)
parser.add_argument(
"--amdgpu-family",
required=True,
help="GPU family subdirectory under the index (e.g. gfx94X-dcgpu)",
)
parser.add_argument(
"--torch-version-prefix",
default="",
help="If set, only wheels whose version starts with this prefix (e.g. 2.11)",
)
args = parser.parse_args()

rocm_ver, latest = discover_rocm_version(
args.index_url,
args.amdgpu_family,
args.torch_version_prefix,
)
set_output("rocm_version", rocm_ver)
set_output("torch_wheel_version", latest)


if __name__ == "__main__":
main()
231 changes: 231 additions & 0 deletions .github/workflows/_build_portable_linux_pytorch_docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
# Reusable workflow: build and push one Portable Linux PyTorch Docker image.
# Triggered only via workflow_call (see build_portable_linux_pytorch_dockers.yml parent).
# Strategy B — inputs are required on the callee (no default: here); callers supply literals.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Strategy B — inputs are required on the callee (no default: here); callers supply literals.
Workflow inputs are required (no defaults here); callers supply literals.

# Use rocm_version: auto to run index discovery (parent normalizes empty dispatch input to auto).
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ethanwee1 If it's not infeasible for some reason, I'd prefer we do the "auto" resolution in the caller as well, so that the callee always sees a properly-formed ROCm version input. And since this workflow is meant to be dispatched via workflow_call anyway, it's not really a user-friendly feature.


name: Build Portable Linux PyTorch Docker (reusable)

on:
workflow_call:
inputs:
pytorch_repo:
description: GitHub repo to clone into the image (e.g. pytorch/pytorch)
required: true
type: string
pytorch_branch:
description: Branch or ref to clone
required: true
type: string
python_version:
required: true
type: string
amdgpu_family:
required: true
type: string
rocm_version:
description: Concrete ROCm version string, or 'auto' to discover from the wheel index
required: true
type: string
index_url:
description: Base URL for PyTorch wheels index
required: true
type: string
base_image:
description: Base image for the Dockerfile (e.g. ubuntu:24.04)
required: true
type: string
registry:
description: Docker registry host (e.g. docker.io)
required: true
type: string
image_name:
description: Image repository path without registry (e.g. org/name)
required: true
type: string
job_display_name:
description: Value for jobs.build-docker.name (parent-defined for UI)
required: true
type: string
summary_title:
description: Markdown heading title for the post-build summary (without leading ##)
required: true
type: string

permissions:
contents: read

run-name: >-
${{ format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})',
inputs.amdgpu_family, inputs.pytorch_repo, inputs.pytorch_branch, inputs.rocm_version) }}

jobs:
build-docker:
name: ${{ inputs.job_display_name }}
runs-on: ubuntu-latest
steps:
- name: Checkout workflow files
uses: actions/checkout@v4

- name: Checkout PyTorch source
uses: actions/checkout@v4
with:
repository: ${{ inputs.pytorch_repo }}
ref: ${{ inputs.pytorch_branch }}
path: pytorch-src
fetch-depth: 1

- name: Derive torch version prefix from branch
id: prefix
env:
PYTORCH_BRANCH: ${{ inputs.pytorch_branch }}
run: |
BRANCH="$PYTORCH_BRANCH"
if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then
echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT
echo "Derived torch prefix: ${BASH_REMATCH[1]}"
else
echo "value=" >> $GITHUB_OUTPUT
echo "No prefix (nightly/main branch)"
fi

- name: Discover ROCm version from index
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's move this to the caller workflow

id: discover
if: ${{ inputs.rocm_version == 'auto' || inputs.rocm_version == '' }}
env:
INDEX_URL: ${{ inputs.index_url }}
AMDGPU_FAMILY: ${{ inputs.amdgpu_family }}
PREFIX_VALUE: ${{ steps.prefix.outputs.value }}
run: |
python3 .github/scripts/discover_rocm_from_torch_index.py \
--index-url "$INDEX_URL" \
--amdgpu-family "$AMDGPU_FAMILY" \
--torch-version-prefix "$PREFIX_VALUE"

- name: Resolve ROCm version
id: rocm
env:
ROCM_INPUT: ${{ inputs.rocm_version }}
DISCOVER_ROCM: ${{ steps.discover.outputs.rocm_version }}
DISCOVER_TORCH: ${{ steps.discover.outputs.torch_wheel_version }}
run: |
if [ "$ROCM_INPUT" = "auto" ] || [ -z "$ROCM_INPUT" ]; then
echo "version=${DISCOVER_ROCM}" >> $GITHUB_OUTPUT
echo "torch_wheel=${DISCOVER_TORCH}" >> $GITHUB_OUTPUT
else
echo "version=${ROCM_INPUT}" >> $GITHUB_OUTPUT
echo "torch_wheel=" >> $GITHUB_OUTPUT
fi

- name: Resolve commit SHA
id: meta
run: |
COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)"
echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT

- name: Generate Docker image tag
id: docker-tag
env:
PYTORCH_BRANCH: ${{ inputs.pytorch_branch }}
BASE_IMAGE: ${{ inputs.base_image }}
PYTHON_VERSION: ${{ inputs.python_version }}
AMDGPU_FAMILY: ${{ inputs.amdgpu_family }}
REGISTRY: ${{ inputs.registry }}
IMAGE_NAME: ${{ inputs.image_name }}
ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }}
PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }}
run: |
BRANCH="$PYTORCH_BRANCH"
BRANCH_SAFE="${BRANCH//\//-}"
OS=$(echo "$BASE_IMAGE" | tr -d ':' | tr '/' '-')

IMAGE_TAG="pytorch-${BRANCH_SAFE}-${PYTORCH_COMMIT}-rocm${ROCM_RESOLVED_VERSION}-${OS}-py${PYTHON_VERSION}-${AMDGPU_FAMILY}"
IMAGE_TAG="${IMAGE_TAG//+/-}"
IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT
echo "image=${IMAGE}" >> $GITHUB_OUTPUT
echo "Generated image tag: ${IMAGE_TAG}"
echo "Full image ref: ${IMAGE}"

- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERUSERNAME }}
password: ${{ secrets.DOCKERTOKEN }}

- name: Prepare build context
run: |
cp dockerfiles/Dockerfile pytorch-src/
mkdir -p pytorch-src/.github/scripts
cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/
cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/

- name: Build Docker image
env:
IMAGE: ${{ steps.docker-tag.outputs.image }}
PYTORCH_REPO: ${{ inputs.pytorch_repo }}
PYTORCH_BRANCH: ${{ inputs.pytorch_branch }}
PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }}
BASE_IMAGE: ${{ inputs.base_image }}
ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }}
AMDGPU_FAMILY: ${{ inputs.amdgpu_family }}
PYTHON_VERSION: ${{ inputs.python_version }}
INDEX_URL: ${{ inputs.index_url }}
TORCH_VERSION_PREFIX: ${{ steps.prefix.outputs.value }}
run: |
docker build \
--file pytorch-src/Dockerfile \
--tag "${IMAGE}" \
--label "pytorch.repo=${PYTORCH_REPO}" \
--label "pytorch.branch=${PYTORCH_BRANCH}" \
--label "pytorch.commit=${PYTORCH_COMMIT}" \
--build-arg "BASE_IMAGE=${BASE_IMAGE}" \
--build-arg "ROCM_VERSION=${ROCM_RESOLVED_VERSION}" \
--build-arg "AMDGPU_FAMILY=${AMDGPU_FAMILY}" \
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
--build-arg "INDEX_URL=${INDEX_URL}" \
--build-arg "TORCH_VERSION_PREFIX=${TORCH_VERSION_PREFIX}" \
pytorch-src

echo "Docker image built successfully: ${IMAGE}"

- name: Get ROCm packages info
env:
IMAGE: ${{ steps.docker-tag.outputs.image }}
run: |
docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found"

- name: Push Docker image
env:
IMAGE: ${{ steps.docker-tag.outputs.image }}
run: |
docker push "${IMAGE}"
echo "Docker image pushed successfully"

- name: Post-build summary
env:
IMAGE: ${{ steps.docker-tag.outputs.image }}
TORCH_WHEEL: ${{ steps.rocm.outputs.torch_wheel }}
SUMMARY_TITLE: ${{ inputs.summary_title }}
PYTORCH_REPO: ${{ inputs.pytorch_repo }}
PYTORCH_BRANCH: ${{ inputs.pytorch_branch }}
AMDGPU_FAMILY: ${{ inputs.amdgpu_family }}
PYTHON_VERSION: ${{ inputs.python_version }}
INDEX_URL: ${{ inputs.index_url }}
ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }}
PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }}
run: |
echo "## ${SUMMARY_TITLE}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY
echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY
if [ -n "$TORCH_WHEEL" ]; then
echo "| Torch Wheel | ${TORCH_WHEEL} |" >> $GITHUB_STEP_SUMMARY
fi
echo "| PyTorch Repo | ${PYTORCH_REPO} |" >> $GITHUB_STEP_SUMMARY
echo "| PyTorch Branch | ${PYTORCH_BRANCH} |" >> $GITHUB_STEP_SUMMARY
echo "| PyTorch Commit | ${PYTORCH_COMMIT} |" >> $GITHUB_STEP_SUMMARY
echo "| AMDGPU Family | ${AMDGPU_FAMILY} |" >> $GITHUB_STEP_SUMMARY
echo "| Python | ${PYTHON_VERSION} |" >> $GITHUB_STEP_SUMMARY
echo "| ROCm | ${ROCM_RESOLVED_VERSION} |" >> $GITHUB_STEP_SUMMARY
echo "| Index URL | ${INDEX_URL} |" >> $GITHUB_STEP_SUMMARY
Loading
Loading