Skip to content

Commit 2aebb44

Browse files
allenwang28facebook-github-bot
authored andcommitted
Add ARM workflow for GB200 support (#2029)
Summary: As title - currently pip install from a GB200 machine won't work as we don't publish an ARM build to PyPI. Differential Revision: D88202637
1 parent ee42dc1 commit 2aebb44

File tree

8 files changed

+480
-75
lines changed

8 files changed

+480
-75
lines changed

.github/workflows/build-cpu.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,19 @@ jobs:
2020
with:
2121
timeout: 60
2222
runner: ${{ matrix.runs-on }}
23+
docker-image: "pytorch/manylinux2_28-builder:cpu"
2324
submodules: recursive
2425
upload-artifact: monarch-cpu-${{ github.sha }}
2526
script: |
2627
# Source common setup functions
2728
source scripts/common-setup.sh
2829
29-
# Setup build environment (conda + system deps + rust + build deps)
30+
# Setup build environment (manylinux Python + system deps + rust)
3031
setup_build_environment
3132
3233
# Install torch nightly (CPU version)
3334
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
3435
pip install -r build-requirements.txt
3536
36-
# Build monarch (No tensor engine, CPU version)
37-
USE_TENSOR_ENGINE=0 python setup.py bdist_wheel
37+
# Build monarch (No tensor engine, CPU version) with proper library paths
38+
USE_TENSOR_ENGINE=0 with_build_env python setup.py bdist_wheel

.github/workflows/build-cuda.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
# Source common setup functions
3232
source scripts/common-setup.sh
3333
34-
# Setup build environment (conda + system deps + rust + build deps)
34+
# Setup build environment (manylinux Python + system deps + rust)
3535
setup_build_environment
3636
3737
# Install torch nightly
@@ -41,7 +41,8 @@ jobs:
4141
# Setup Tensor Engine
4242
setup_tensor_engine
4343
44-
export CUDA_LIB_DIR=/usr/lib64
44+
# Setup CUDA environment (detects CUDA paths automatically)
45+
setup_cuda_environment
4546
46-
# Build monarch (CUDA version)
47-
python setup.py bdist_wheel
47+
# Build monarch (CUDA version) with proper library paths
48+
with_build_env python setup.py bdist_wheel

.github/workflows/doc_build.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
# Source common setup functions
2727
source scripts/common-setup.sh
2828
29-
# Setup build environment (conda + system deps + rust + build deps)
29+
# Setup build environment (manylinux Python + system deps + rust)
3030
# docs build will use 3.13
3131
setup_build_environment 3.13
3232
@@ -46,10 +46,12 @@ jobs:
4646
export USE_TENSOR_ENGINE=1
4747
export RUSTFLAGS="-Zthreads=16 ${RUSTFLAGS:-}"
4848
export _GLIBCXX_USE_CXX11_ABI=1
49-
export CUDA_LIB_DIR=/usr/lib64
49+
50+
# Setup CUDA environment (detects CUDA paths automatically)
51+
setup_cuda_environment
5052
5153
# Build Monarch completely for documentation - use dedicated script
52-
./scripts/build_monarch_for_docs.sh
54+
with_build_env ./scripts/build_monarch_for_docs.sh
5355
5456
# Generate documentation for all workspace crates
5557
cargo doc --workspace --no-deps

.github/workflows/publish_release.yml

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,38 @@ concurrency:
1212
cancel-in-progress: true
1313
jobs:
1414
build:
15-
name: cuda12.6-py${{ matrix.python-version }}-${{ matrix.name }}
15+
name: ${{ matrix.name }}-py${{ matrix.python-version }}
1616
strategy:
17-
fail-fast: false # Changed to false to see results from all Python versions
17+
fail-fast: false
1818
matrix:
1919
# TODO add 3.14 once we figure out py03 issue
2020
python-version: ["3.10", "3.11", "3.12", "3.13"]
2121
include:
22-
- name: 4xlarge
23-
runs-on: linux.g5.4xlarge.nvidia.gpu
22+
# x86_64 CUDA builds
23+
- name: cuda12.8-x86_64
24+
runner: linux.g5.4xlarge.nvidia.gpu
2425
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu128'
2526
gpu-arch-type: "cuda"
2627
gpu-arch-version: "12.8"
28+
docker-image: "pytorch/almalinux-builder" # Uses default, becomes pytorch/almalinux-builder:cuda12.8
29+
platform-tag: "manylinux2014_x86_64"
30+
# aarch64 CUDA builds
31+
- name: cuda12.8-aarch64
32+
runner: linux.arm64.r7g.12xlarge.memory # GPU-enabled ARM runner like PyTorch uses
33+
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu128'
34+
gpu-arch-type: "cpu" # Use "cpu" to skip nvidia driver install, CUDA libs are in Docker image
35+
gpu-arch-version: ""
36+
docker-image: "pytorch/manylinuxaarch64-builder:cuda12.8" # ARM-specific image with CUDA
37+
platform-tag: "manylinux2014_aarch64"
2738
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2839
with:
2940
timeout: 60
30-
runner: ${{ matrix.runs-on }}
41+
runner: ${{ matrix.runner }}
3142
gpu-arch-type: ${{ matrix.gpu-arch-type }}
3243
gpu-arch-version: ${{ matrix.gpu-arch-version }}
44+
docker-image: ${{ matrix.docker-image }}
3345
submodules: recursive
34-
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.gpu-arch-type }}${{ matrix.gpu-arch-version }}
46+
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.name }}
3547
script: |
3648
source scripts/common-setup.sh
3749
setup_build_environment ${{ matrix.python-version }}
@@ -44,17 +56,17 @@ jobs:
4456
# Setup Tensor Engine dependencies
4557
setup_tensor_engine
4658
47-
cargo install --path monarch_hyperactor
59+
# Setup CUDA environment (detects CUDA paths automatically for both x86_64 and aarch64)
60+
setup_cuda_environment
4861
49-
# Build wheel
62+
# Build wheel with proper library paths
5063
export MONARCH_PACKAGE_NAME="torchmonarch"
51-
export CUDA_LIB_DIR=/usr/lib64
5264
export MONARCH_VERSION="${{ github.event.inputs.version }}"
53-
python setup.py bdist_wheel
5465
55-
# hacky until the right distribution wheel can be made...
56-
find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
57-
ls -la dist/
66+
with_build_env python setup.py bdist_wheel
67+
68+
# Properly retag wheel with manylinux platform tag
69+
retag_wheel_platform "${{ matrix.platform-tag }}"
5870
5971
# Run tests
6072
install_python_test_dependencies

.github/workflows/test-cpu-python.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ jobs:
1919
with:
2020
timeout: 60
2121
runner: linux.4xlarge
22+
docker-image: "pytorch/manylinux2_28-builder:cpu"
2223
submodules: recursive
2324
download-artifact: ${{ inputs.artifact-name }}
2425
script: |
2526
# Source common setup functions
2627
source scripts/common-setup.sh
2728
28-
# Setup test environment
29-
setup_conda_environment
29+
# Setup test environment (uses manylinux Python)
30+
setup_test_environment
3031
3132
# Disable tensor engine
3233
export USE_TENSOR_ENGINE=0

.github/workflows/wheels.yml

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,25 +13,38 @@ concurrency:
1313
cancel-in-progress: true
1414
jobs:
1515
build:
16-
name: cuda12.6-py${{ matrix.python-version }}-${{ matrix.name }}
16+
name: ${{ matrix.name }}-py${{ matrix.python-version }}
1717
strategy:
18-
fail-fast: false # Changed to false to see results from all Python versions
18+
fail-fast: false
1919
matrix:
20-
python-version: ["3.10", "3.11", "3.12", "3.13"]
20+
# python-version: ["3.10", "3.11", "3.12", "3.13"]
21+
python-version: ["3.10"]
2122
include:
22-
- name: 4xlarge
23-
runs-on: linux.g5.4xlarge.nvidia.gpu
23+
# x86_64 CUDA builds
24+
- name: cuda12.6-x86_64
25+
runner: linux.g5.4xlarge.nvidia.gpu
2426
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
2527
gpu-arch-type: "cuda"
2628
gpu-arch-version: "12.6"
29+
docker-image: "pytorch/almalinux-builder" # Uses default, becomes pytorch/almalinux-builder:cuda12.6
30+
platform-tag: "manylinux2014_x86_64"
31+
# aarch64 CUDA builds
32+
- name: cuda12.6-aarch64
33+
runner: linux.arm64.r7g.12xlarge.memory # GPU-enabled ARM runner like PyTorch uses
34+
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
35+
gpu-arch-type: "cpu" # Use "cpu" to skip nvidia driver install, CUDA libs are in Docker image
36+
gpu-arch-version: ""
37+
docker-image: "pytorch/manylinuxaarch64-builder:cuda12.6" # ARM-specific image with CUDA
38+
platform-tag: "manylinux2014_aarch64"
2739
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2840
with:
2941
timeout: 60
30-
runner: ${{ matrix.runs-on }}
42+
runner: ${{ matrix.runner }}
3143
gpu-arch-type: ${{ matrix.gpu-arch-type }}
3244
gpu-arch-version: ${{ matrix.gpu-arch-version }}
45+
docker-image: ${{ matrix.docker-image }}
3346
submodules: recursive
34-
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.gpu-arch-type }}${{ matrix.gpu-arch-version }}
47+
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.name }}
3548
script: |
3649
source scripts/common-setup.sh
3750
setup_build_environment ${{ matrix.python-version }}
@@ -44,18 +57,17 @@ jobs:
4457
# Setup Tensor Engine dependencies
4558
setup_tensor_engine
4659
47-
cargo install --path monarch_hyperactor
60+
# Setup CUDA environment (detects CUDA paths automatically for both x86_64 and aarch64)
61+
setup_cuda_environment
4862
49-
# Build wheel
63+
# Build wheel with proper library paths
5064
export MONARCH_PACKAGE_NAME="torchmonarch-nightly"
5165
export MONARCH_VERSION=$(date +'%Y.%m.%d')
52-
export CUDA_LIB_DIR=/usr/lib64
5366
54-
python setup.py bdist_wheel
67+
with_build_env python setup.py bdist_wheel
5568
56-
# hacky until the right distribution wheel can be made...
57-
find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
58-
ls -la dist/
69+
# Properly retag wheel with manylinux platform tag
70+
retag_wheel_platform "${{ matrix.platform-tag }}"
5971
6072
# Run tests
6173
install_python_test_dependencies

build_utils/src/lib.rs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,17 +152,33 @@ pub fn discover_cuda_config() -> Result<CudaConfig, BuildError> {
152152
};
153153

154154
// Add standard include directories
155-
// Check both old-style (include) and new-style (targets/x86_64-linux/include) CUDA installations
156-
for include_subdir in &["include", "targets/x86_64-linux/include"] {
155+
// Check both old-style (include) and new-style target-specific paths
156+
// Support both x86_64 and aarch64/ARM architectures
157+
for include_subdir in &[
158+
"include",
159+
"targets/x86_64-linux/include",
160+
"targets/aarch64-linux/include",
161+
"targets/sbsa-linux/include",
162+
] {
157163
let include_dir = cuda_home_path.join(include_subdir);
158164
if include_dir.exists() {
159165
config.include_dirs.push(include_dir);
160166
}
161167
}
162168

163169
// Add standard library directories
164-
// Check both old-style (lib64, lib) and new-style (targets/x86_64-linux/lib) CUDA installations
165-
for lib_subdir in &["lib64", "lib", "lib/x64", "targets/x86_64-linux/lib"] {
170+
// Check both old-style and new-style CUDA installations for both x86_64 and aarch64
171+
// Try architecture-specific paths first, then generic paths
172+
for lib_subdir in &[
173+
"lib64", // Common x86_64 location
174+
"lib", // Common aarch64 location
175+
"lib/x64", // Windows x64
176+
"targets/x86_64-linux/lib", // CUDA toolkit x86_64
177+
"targets/aarch64-linux/lib", // CUDA toolkit aarch64
178+
"targets/sbsa-linux/lib", // CUDA toolkit ARM server
179+
"lib/aarch64-linux-gnu", // Debian/Ubuntu aarch64
180+
"lib/x86_64-linux-gnu", // Debian/Ubuntu x86_64
181+
] {
166182
let lib_dir = cuda_home_path.join(lib_subdir);
167183
if lib_dir.exists() {
168184
config.lib_dirs.push(lib_dir);
@@ -201,8 +217,16 @@ pub fn get_cuda_lib_dir() -> Result<String, BuildError> {
201217
// Try to deduce from CUDA configuration
202218
let cuda_config = discover_cuda_config()?;
203219
if let Some(cuda_home) = cuda_config.cuda_home {
204-
// Check both old-style and new-style CUDA library paths
205-
for lib_subdir in &["lib64", "lib", "targets/x86_64-linux/lib"] {
220+
// Check both x86_64 and aarch64 CUDA library paths
221+
for lib_subdir in &[
222+
"lib64", // Common x86_64 location
223+
"lib", // Common aarch64 location
224+
"targets/x86_64-linux/lib", // CUDA toolkit x86_64
225+
"targets/aarch64-linux/lib", // CUDA toolkit aarch64
226+
"targets/sbsa-linux/lib", // CUDA toolkit ARM server
227+
"lib/aarch64-linux-gnu", // Debian/Ubuntu aarch64
228+
"lib/x86_64-linux-gnu", // Debian/Ubuntu x86_64
229+
] {
206230
let lib_path = cuda_home.join(lib_subdir);
207231
if lib_path.exists() {
208232
return Ok(lib_path.to_string_lossy().to_string());

0 commit comments

Comments
 (0)