-
Notifications
You must be signed in to change notification settings - Fork 82
[CI] Refactor Docker workflow to single source of truth for config #3187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
376dc82
e4c51ef
1ae4908
a9eb101
8818b6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| #!/usr/bin/env python3 | ||
| """ | ||
| discover_rocm_from_torch_index.py | ||
|
|
||
| Parse a ROCm PyTorch wheel index HTML, pick the latest torch wheel matching an | ||
| optional PEP 440-style prefix (from release/x.y branches), and emit step outputs | ||
| via GITHUB_OUTPUT (or legacy ::set-output when that variable is unset). | ||
|
|
||
| Usage (from repo root, as in GitHub Actions): | ||
|
|
||
| python3 .github/scripts/discover_rocm_from_torch_index.py \\ | ||
| --index-url <BASE_URL> \\ | ||
| --amdgpu-family <FAMILY> \\ | ||
| [--torch-version-prefix <PREFIX>] | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import argparse | ||
| import os | ||
| import re | ||
| import urllib.parse | ||
| import urllib.request | ||
| from typing import Any | ||
|
|
||
|
|
||
| def _version_sort_key(v: str) -> tuple[int, ...]: | ||
| try: | ||
| return tuple(int(x) for x in re.split(r"[.\-a+]", v) if x.isdigit()) | ||
| except (ValueError, AttributeError): | ||
| return (0,) | ||
|
|
||
|
|
||
| def discover_rocm_version( | ||
| index_url: str, | ||
| gpu_family: str, | ||
| torch_version_prefix: str, | ||
| *, | ||
| timeout_s: int = 60, | ||
| ) -> tuple[str, str]: | ||
| """Return (rocm_version, latest_torch_wheel_version_string).""" | ||
| url = f"{index_url.rstrip('/')}/{gpu_family}/torch/" | ||
| print(f"Fetching torch index: {url}") | ||
| html = urllib.request.urlopen(url, timeout=timeout_s).read().decode() | ||
|
|
||
| pattern = re.compile(r"torch-(.+?)\.whl", re.IGNORECASE) | ||
| versions: list[str] = [] | ||
| for m in pattern.finditer(html): | ||
| ver = urllib.parse.unquote(m.group(1).split("-")[0]) | ||
| if "+rocm" in ver: | ||
| versions.append(ver) | ||
|
|
||
| if torch_version_prefix: | ||
| versions = [v for v in versions if v.split("+")[0].startswith(torch_version_prefix)] | ||
|
|
||
| if not versions: | ||
| print(f"::error::No torch wheels found (prefix={torch_version_prefix!r})") | ||
| raise SystemExit(1) | ||
|
|
||
| latest = max(versions, key=_version_sort_key) | ||
| match = re.search(r"\+rocm(.+)", latest) | ||
| if not match: | ||
| print(f"::error::Could not parse ROCm suffix from wheel version {latest!r}") | ||
| raise SystemExit(1) | ||
| rocm_ver = match.group(1) | ||
|
|
||
| print(f"Latest torch wheel: {latest}") | ||
| print(f"Discovered ROCm version: {rocm_ver}") | ||
| return rocm_ver, latest | ||
|
|
||
|
|
||
| def set_output(name: str, val: Any) -> None: | ||
| print(f"Setting output {name}={val}") | ||
| if os.getenv("GITHUB_OUTPUT"): | ||
| with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env: | ||
| print(f"{name}={val}", file=env) | ||
| else: | ||
| print(f"::set-output name={name}::{val}") | ||
|
|
||
|
|
||
| def main() -> None: | ||
| parser = argparse.ArgumentParser( | ||
| description="Discover ROCm version from a PyTorch ROCm wheel index page.", | ||
| ) | ||
| parser.add_argument( | ||
| "--index-url", | ||
| required=True, | ||
| help="Base index URL (e.g. https://rocm.nightlies.amd.com/v2-staging)", | ||
| ) | ||
| parser.add_argument( | ||
| "--amdgpu-family", | ||
| required=True, | ||
| help="GPU family subdirectory under the index (e.g. gfx94X-dcgpu)", | ||
| ) | ||
| parser.add_argument( | ||
| "--torch-version-prefix", | ||
| default="", | ||
| help="If set, only wheels whose version starts with this prefix (e.g. 2.11)", | ||
| ) | ||
| args = parser.parse_args() | ||
|
|
||
| rocm_ver, latest = discover_rocm_version( | ||
| args.index_url, | ||
| args.amdgpu_family, | ||
| args.torch_version_prefix, | ||
| ) | ||
| set_output("rocm_version", rocm_ver) | ||
| set_output("torch_wheel_version", latest) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,231 @@ | ||
| # Reusable workflow: build and push one Portable Linux PyTorch Docker image. | ||
| # Triggered only via workflow_call (see build_portable_linux_pytorch_dockers.yml parent). | ||
| # Strategy B — inputs are required on the callee (no default: here); callers supply literals. | ||
| # Use rocm_version: auto to run index discovery (parent normalizes empty dispatch input to auto). | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ethanwee1 If it's not infeasible for some reason, I'd prefer we do the "auto" resolution in the caller as well, so that the callee always sees a properly-formed ROCm version input. And since this workflow is meant to be dispatched via workflow_call anyway, it's not really a user-friendly feature. |
||
|
|
||
| name: Build Portable Linux PyTorch Docker (reusable) | ||
|
|
||
| on: | ||
| workflow_call: | ||
| inputs: | ||
| pytorch_repo: | ||
| description: GitHub repo to clone into the image (e.g. pytorch/pytorch) | ||
| required: true | ||
| type: string | ||
| pytorch_branch: | ||
| description: Branch or ref to clone | ||
| required: true | ||
| type: string | ||
| python_version: | ||
| required: true | ||
| type: string | ||
| amdgpu_family: | ||
| required: true | ||
| type: string | ||
| rocm_version: | ||
| description: Concrete ROCm version string, or 'auto' to discover from the wheel index | ||
| required: true | ||
| type: string | ||
| index_url: | ||
| description: Base URL for PyTorch wheels index | ||
| required: true | ||
| type: string | ||
| base_image: | ||
| description: Base image for the Dockerfile (e.g. ubuntu:24.04) | ||
| required: true | ||
| type: string | ||
| registry: | ||
| description: Docker registry host (e.g. docker.io) | ||
| required: true | ||
| type: string | ||
| image_name: | ||
| description: Image repository path without registry (e.g. org/name) | ||
| required: true | ||
| type: string | ||
| job_display_name: | ||
| description: Value for jobs.build-docker.name (parent-defined for UI) | ||
| required: true | ||
| type: string | ||
| summary_title: | ||
| description: Markdown heading title for the post-build summary (without leading ##) | ||
| required: true | ||
| type: string | ||
|
|
||
| permissions: | ||
| contents: read | ||
|
|
||
| run-name: >- | ||
| ${{ format('Build PyTorch Docker ({0}, {1}/{2}, ROCm {3})', | ||
| inputs.amdgpu_family, inputs.pytorch_repo, inputs.pytorch_branch, inputs.rocm_version) }} | ||
|
|
||
| jobs: | ||
| build-docker: | ||
| name: ${{ inputs.job_display_name }} | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - name: Checkout workflow files | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Checkout PyTorch source | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| repository: ${{ inputs.pytorch_repo }} | ||
| ref: ${{ inputs.pytorch_branch }} | ||
| path: pytorch-src | ||
| fetch-depth: 1 | ||
|
|
||
| - name: Derive torch version prefix from branch | ||
| id: prefix | ||
| env: | ||
| PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} | ||
| run: | | ||
| BRANCH="$PYTORCH_BRANCH" | ||
| if [[ "$BRANCH" =~ ^release/([0-9]+\.[0-9]+) ]]; then | ||
| echo "value=${BASH_REMATCH[1]}" >> $GITHUB_OUTPUT | ||
| echo "Derived torch prefix: ${BASH_REMATCH[1]}" | ||
| else | ||
| echo "value=" >> $GITHUB_OUTPUT | ||
| echo "No prefix (nightly/main branch)" | ||
| fi | ||
|
|
||
| - name: Discover ROCm version from index | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's move this to the caller workflow |
||
| id: discover | ||
| if: ${{ inputs.rocm_version == 'auto' || inputs.rocm_version == '' }} | ||
| env: | ||
| INDEX_URL: ${{ inputs.index_url }} | ||
| AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} | ||
| PREFIX_VALUE: ${{ steps.prefix.outputs.value }} | ||
| run: | | ||
| python3 .github/scripts/discover_rocm_from_torch_index.py \ | ||
| --index-url "$INDEX_URL" \ | ||
| --amdgpu-family "$AMDGPU_FAMILY" \ | ||
| --torch-version-prefix "$PREFIX_VALUE" | ||
|
|
||
| - name: Resolve ROCm version | ||
| id: rocm | ||
| env: | ||
| ROCM_INPUT: ${{ inputs.rocm_version }} | ||
| DISCOVER_ROCM: ${{ steps.discover.outputs.rocm_version }} | ||
| DISCOVER_TORCH: ${{ steps.discover.outputs.torch_wheel_version }} | ||
| run: | | ||
| if [ "$ROCM_INPUT" = "auto" ] || [ -z "$ROCM_INPUT" ]; then | ||
| echo "version=${DISCOVER_ROCM}" >> $GITHUB_OUTPUT | ||
| echo "torch_wheel=${DISCOVER_TORCH}" >> $GITHUB_OUTPUT | ||
| else | ||
| echo "version=${ROCM_INPUT}" >> $GITHUB_OUTPUT | ||
| echo "torch_wheel=" >> $GITHUB_OUTPUT | ||
| fi | ||
|
|
||
| - name: Resolve commit SHA | ||
| id: meta | ||
| run: | | ||
| COMMIT="$(cd pytorch-src && git rev-parse --short=8 HEAD)" | ||
| echo "pytorch_commit=${COMMIT}" >> $GITHUB_OUTPUT | ||
|
|
||
| - name: Generate Docker image tag | ||
| id: docker-tag | ||
| env: | ||
| PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} | ||
| BASE_IMAGE: ${{ inputs.base_image }} | ||
| PYTHON_VERSION: ${{ inputs.python_version }} | ||
| AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} | ||
| REGISTRY: ${{ inputs.registry }} | ||
| IMAGE_NAME: ${{ inputs.image_name }} | ||
| ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} | ||
| PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} | ||
| run: | | ||
| BRANCH="$PYTORCH_BRANCH" | ||
| BRANCH_SAFE="${BRANCH//\//-}" | ||
| OS=$(echo "$BASE_IMAGE" | tr -d ':' | tr '/' '-') | ||
|
|
||
| IMAGE_TAG="pytorch-${BRANCH_SAFE}-${PYTORCH_COMMIT}-rocm${ROCM_RESOLVED_VERSION}-${OS}-py${PYTHON_VERSION}-${AMDGPU_FAMILY}" | ||
| IMAGE_TAG="${IMAGE_TAG//+/-}" | ||
| IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}" | ||
| echo "tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT | ||
| echo "image=${IMAGE}" >> $GITHUB_OUTPUT | ||
| echo "Generated image tag: ${IMAGE_TAG}" | ||
| echo "Full image ref: ${IMAGE}" | ||
|
|
||
| - name: Log in to Docker Hub | ||
| uses: docker/login-action@v3 | ||
| with: | ||
| username: ${{ secrets.DOCKERUSERNAME }} | ||
| password: ${{ secrets.DOCKERTOKEN }} | ||
|
|
||
| - name: Prepare build context | ||
| run: | | ||
| cp dockerfiles/Dockerfile pytorch-src/ | ||
| mkdir -p pytorch-src/.github/scripts | ||
| cp .github/scripts/install_rocm_deps.sh pytorch-src/.github/scripts/ | ||
| cp .github/scripts/install_pytorch_wheels.py pytorch-src/.github/scripts/ | ||
|
|
||
| - name: Build Docker image | ||
| env: | ||
| IMAGE: ${{ steps.docker-tag.outputs.image }} | ||
| PYTORCH_REPO: ${{ inputs.pytorch_repo }} | ||
| PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} | ||
| PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} | ||
| BASE_IMAGE: ${{ inputs.base_image }} | ||
| ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} | ||
| AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} | ||
| PYTHON_VERSION: ${{ inputs.python_version }} | ||
| INDEX_URL: ${{ inputs.index_url }} | ||
| TORCH_VERSION_PREFIX: ${{ steps.prefix.outputs.value }} | ||
| run: | | ||
| docker build \ | ||
| --file pytorch-src/Dockerfile \ | ||
| --tag "${IMAGE}" \ | ||
| --label "pytorch.repo=${PYTORCH_REPO}" \ | ||
| --label "pytorch.branch=${PYTORCH_BRANCH}" \ | ||
| --label "pytorch.commit=${PYTORCH_COMMIT}" \ | ||
| --build-arg "BASE_IMAGE=${BASE_IMAGE}" \ | ||
| --build-arg "ROCM_VERSION=${ROCM_RESOLVED_VERSION}" \ | ||
| --build-arg "AMDGPU_FAMILY=${AMDGPU_FAMILY}" \ | ||
| --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \ | ||
| --build-arg "INDEX_URL=${INDEX_URL}" \ | ||
| --build-arg "TORCH_VERSION_PREFIX=${TORCH_VERSION_PREFIX}" \ | ||
| pytorch-src | ||
|
|
||
| echo "Docker image built successfully: ${IMAGE}" | ||
|
|
||
| - name: Get ROCm packages info | ||
| env: | ||
| IMAGE: ${{ steps.docker-tag.outputs.image }} | ||
| run: | | ||
| docker run --rm "${IMAGE}" pip freeze | grep -i rocm || echo "No ROCm packages found" | ||
|
|
||
| - name: Push Docker image | ||
| env: | ||
| IMAGE: ${{ steps.docker-tag.outputs.image }} | ||
| run: | | ||
| docker push "${IMAGE}" | ||
| echo "Docker image pushed successfully" | ||
|
|
||
| - name: Post-build summary | ||
| env: | ||
| IMAGE: ${{ steps.docker-tag.outputs.image }} | ||
| TORCH_WHEEL: ${{ steps.rocm.outputs.torch_wheel }} | ||
| SUMMARY_TITLE: ${{ inputs.summary_title }} | ||
| PYTORCH_REPO: ${{ inputs.pytorch_repo }} | ||
| PYTORCH_BRANCH: ${{ inputs.pytorch_branch }} | ||
| AMDGPU_FAMILY: ${{ inputs.amdgpu_family }} | ||
| PYTHON_VERSION: ${{ inputs.python_version }} | ||
| INDEX_URL: ${{ inputs.index_url }} | ||
| ROCM_RESOLVED_VERSION: ${{ steps.rocm.outputs.version }} | ||
| PYTORCH_COMMIT: ${{ steps.meta.outputs.pytorch_commit }} | ||
| run: | | ||
| echo "## ${SUMMARY_TITLE}" >> $GITHUB_STEP_SUMMARY | ||
| echo "" >> $GITHUB_STEP_SUMMARY | ||
| echo "| Parameter | Value |" >> $GITHUB_STEP_SUMMARY | ||
| echo "|-----------|-------|" >> $GITHUB_STEP_SUMMARY | ||
| echo "| Image | \`${IMAGE}\` |" >> $GITHUB_STEP_SUMMARY | ||
| if [ -n "$TORCH_WHEEL" ]; then | ||
| echo "| Torch Wheel | ${TORCH_WHEEL} |" >> $GITHUB_STEP_SUMMARY | ||
| fi | ||
| echo "| PyTorch Repo | ${PYTORCH_REPO} |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| PyTorch Branch | ${PYTORCH_BRANCH} |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| PyTorch Commit | ${PYTORCH_COMMIT} |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| AMDGPU Family | ${AMDGPU_FAMILY} |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| Python | ${PYTHON_VERSION} |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| ROCm | ${ROCM_RESOLVED_VERSION} |" >> $GITHUB_STEP_SUMMARY | ||
| echo "| Index URL | ${INDEX_URL} |" >> $GITHUB_STEP_SUMMARY | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.