From e6c54525d42f6fb06732e4b79afd0a368d159e8d Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 26 Nov 2025 00:22:42 +0000 Subject: [PATCH 1/9] First draft of ROCm build workflow --- .github/workflows/build-rocm.yml | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .github/workflows/build-rocm.yml diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml new file mode 100644 index 000000000..97ed6ac7b --- /dev/null +++ b/.github/workflows/build-rocm.yml @@ -0,0 +1,49 @@ +name: Build ROCm + +on: + workflow_call: + workflow_dispatch: + +concurrency: + group: build-rocm-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} + cancel-in-progress: true + +jobs: + build-rocm: + name: Build ROCm (rocm6.4-py3.10) + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + strategy: + fail-fast: true + matrix: + include: + - name: 4xlargegpu + runs-on: linux.rocm.gpu.gfx942.8.meta-pytorch + torch-spec: 'torch --index-url https://download.pytorch.org/whl/rocm6.4/' + gpu-arch-type: "rocm" + gpu-arch-version: "6.4" + with: + timeout: 60 + runner: ${{ matrix.runs-on }} + gpu-arch-type: ${{ matrix.gpu-arch-type }} + gpu-arch-version: ${{ matrix.gpu-arch-version }} + submodules: recursive + upload-artifact: monarch-rocm-${{ github.sha }} + script: | + # Source common setup functions + source scripts/common-setup.sh + + # TODO TEMPORARY: ROCm6.4 pytorch/almalinux-builder:rocm6.4 image has gcc-toolset-14 + export PATH=/opt/rh/devtoolset-14/root/usr/bin/:$PATH + + # Setup build environment (conda + system deps + rust + build deps) + setup_build_environment + + # Install torch nightly + pip install ${{ matrix.torch-spec }} + pip install -r build-requirements.txt + + # Setup Tensor Engine + setup_tensor_engine + + # Build monarch (ROCm version) + USE_TENSOR_ENGINE=0 python setup.py bdist_wheel From 1f068d912f1ab399dba3e982bd43d5a1802f8cdb Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Tue, 25 Nov 2025 18:54:18 -0600 Subject: [PATCH 2/9] Update build-rocm.yml --- .github/workflows/build-rocm.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 97ed6ac7b..5fb17fe1a 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -46,4 +46,5 @@ jobs: setup_tensor_engine # Build monarch (ROCm version) + # TODO TEMPORARY: Use USE_TENSOR_ENGINE=0 to avoid Rust build errors with cuda-sys, nccl-sys etc. USE_TENSOR_ENGINE=0 python setup.py bdist_wheel From 146f61cb67ffe0ed18bd246a84b0239a79f55408 Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:29:00 -0600 Subject: [PATCH 3/9] Add ciflow tracking issue and ciflow/rocm label trigger --- .github/pytorch-probot.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .github/pytorch-probot.yml diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml new file mode 100644 index 000000000..b2fdb4616 --- /dev/null +++ b/.github/pytorch-probot.yml @@ -0,0 +1,3 @@ +ciflow_tracking_issue: 64124 +ciflow_push_tags: +- ciflow/rocm From bd74c435d3834f164d880ce44ac11f994c74d8e8 Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:33:11 -0600 Subject: [PATCH 4/9] Add push trigger for ROCm build workflow --- .github/workflows/build-rocm.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 5fb17fe1a..285fca329 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -2,6 +2,9 @@ name: Build ROCm on: workflow_call: + push: + tags: + - ciflow/trunk/* workflow_dispatch: concurrency: From 13672cf05c6f668f6157f169cd8fe30b8ae9beb8 Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:33:44 -0600 Subject: [PATCH 5/9] Update tag pattern for ROCm build workflow --- .github/workflows/build-rocm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 285fca329..4198d5939 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -1,10 +1,10 @@ -name: Build ROCm +trunk: Build ROCm on: workflow_call: push: tags: - - ciflow/trunk/* + - ciflow/rocm/* workflow_dispatch: concurrency: From 75067b3af7fdf7dd6456b49d91708e45af27c3bd Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Fri, 5 Dec 2025 18:58:11 -0600 Subject: [PATCH 6/9] Add temporary pull_request trigger to build workflow --- .github/workflows/build-rocm.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 4198d5939..2b91dc3ab 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -5,6 +5,9 @@ on: push: tags: - ciflow/rocm/* + ### TEMPORARY STARTS ### + pull_request: + ### TEMPORARY ENDS ### workflow_dispatch: concurrency: From 74d4405a92563d4ba9d8df48244230cc5c04d093 Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Fri, 5 Dec 2025 19:12:07 -0600 Subject: [PATCH 7/9] Typo --- .github/workflows/build-rocm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 2b91dc3ab..5c21e99b6 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -1,4 +1,4 @@ -trunk: Build ROCm +name: Build ROCm on: workflow_call: From f6326e65880c3598aeb3825ab3f1a6c4f3846b57 Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:45:00 -0600 Subject: [PATCH 8/9] Remove temporary pull_request trigger since ciflow/rocm label seems to be working as expected --- .github/workflows/build-rocm.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 5c21e99b6..2ced50f13 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -5,9 +5,6 @@ on: push: tags: - ciflow/rocm/* - ### TEMPORARY STARTS ### - pull_request: - ### TEMPORARY ENDS ### workflow_dispatch: concurrency: From 2073f6a40f49f4b2e4bd352c04bfb1f8e6b9e940 Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Wed, 10 Dec 2025 11:56:34 -0600 Subject: [PATCH 9/9] Add id-token write permissions for OIDC --- .github/workflows/build-rocm.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build-rocm.yml b/.github/workflows/build-rocm.yml index 2ced50f13..0e0537f4c 100644 --- a/.github/workflows/build-rocm.yml +++ b/.github/workflows/build-rocm.yml @@ -11,6 +11,10 @@ concurrency: group: build-rocm-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} cancel-in-progress: true +permissions: + id-token: write + contents: read + jobs: build-rocm: name: Build ROCm (rocm6.4-py3.10)