Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/build-cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,19 @@ jobs:
with:
timeout: 60
runner: ${{ matrix.runs-on }}
docker-image: "pytorch/manylinux2_28-builder:cpu"
submodules: recursive
upload-artifact: monarch-cpu-${{ github.sha }}
script: |
# Source common setup functions
source scripts/common-setup.sh

# Setup build environment (conda + system deps + rust + build deps)
# Setup build environment (manylinux Python + system deps + rust)
setup_build_environment

# Install torch nightly (CPU version)
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
pip install -r build-requirements.txt

# Build monarch (No tensor engine, CPU version)
USE_TENSOR_ENGINE=0 python setup.py bdist_wheel
# Build monarch (No tensor engine, CPU version) with proper library paths
USE_TENSOR_ENGINE=0 with_build_env python setup.py bdist_wheel
9 changes: 5 additions & 4 deletions .github/workflows/build-cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
# Source common setup functions
source scripts/common-setup.sh

# Setup build environment (conda + system deps + rust + build deps)
# Setup build environment (manylinux Python + system deps + rust)
setup_build_environment

# Install torch nightly
Expand All @@ -41,7 +41,8 @@ jobs:
# Setup Tensor Engine
setup_tensor_engine

export CUDA_LIB_DIR=/usr/lib64
# Setup CUDA environment (detects CUDA paths automatically)
setup_cuda_environment

# Build monarch (CUDA version)
python setup.py bdist_wheel
# Build monarch (CUDA version) with proper library paths
with_build_env python setup.py bdist_wheel
8 changes: 5 additions & 3 deletions .github/workflows/doc_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
# Source common setup functions
source scripts/common-setup.sh

# Setup build environment (conda + system deps + rust + build deps)
# Setup build environment (manylinux Python + system deps + rust)
# docs build will use 3.13
setup_build_environment 3.13

Expand All @@ -46,10 +46,12 @@ jobs:
export USE_TENSOR_ENGINE=1
export RUSTFLAGS="-Zthreads=16 ${RUSTFLAGS:-}"
export _GLIBCXX_USE_CXX11_ABI=1
export CUDA_LIB_DIR=/usr/lib64

# Setup CUDA environment (detects CUDA paths automatically)
setup_cuda_environment

# Build Monarch completely for documentation - use dedicated script
./scripts/build_monarch_for_docs.sh
with_build_env ./scripts/build_monarch_for_docs.sh

# Generate documentation for all workspace crates
cargo doc --workspace --no-deps
Expand Down
38 changes: 25 additions & 13 deletions .github/workflows/publish_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,38 @@ concurrency:
cancel-in-progress: true
jobs:
build:
name: cuda12.6-py${{ matrix.python-version }}-${{ matrix.name }}
name: ${{ matrix.name }}-py${{ matrix.python-version }}
strategy:
fail-fast: false # Changed to false to see results from all Python versions
fail-fast: false
matrix:
# TODO add 3.14 once we figure out py03 issue
python-version: ["3.10", "3.11", "3.12", "3.13"]
include:
- name: 4xlarge
runs-on: linux.g5.4xlarge.nvidia.gpu
# x86_64 CUDA builds
- name: cuda12.8-x86_64
runner: linux.g5.4xlarge.nvidia.gpu
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu128'
gpu-arch-type: "cuda"
gpu-arch-version: "12.8"
docker-image: "pytorch/almalinux-builder" # Uses default, becomes pytorch/almalinux-builder:cuda12.8
platform-tag: "manylinux2014_x86_64"
# aarch64 CUDA builds
- name: cuda12.8-aarch64
runner: linux.arm64.r7g.12xlarge.memory # GPU-enabled ARM runner like PyTorch uses
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu128'
gpu-arch-type: "cpu" # Use "cpu" to skip nvidia driver install, CUDA libs are in Docker image
gpu-arch-version: ""
docker-image: "pytorch/manylinuxaarch64-builder:cuda12.8" # ARM-specific image with CUDA
platform-tag: "manylinux2014_aarch64"
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 60
runner: ${{ matrix.runs-on }}
runner: ${{ matrix.runner }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
docker-image: ${{ matrix.docker-image }}
submodules: recursive
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.gpu-arch-type }}${{ matrix.gpu-arch-version }}
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.name }}
script: |
source scripts/common-setup.sh
setup_build_environment ${{ matrix.python-version }}
Expand All @@ -44,17 +56,17 @@ jobs:
# Setup Tensor Engine dependencies
setup_tensor_engine

cargo install --path monarch_hyperactor
# Setup CUDA environment (detects CUDA paths automatically for both x86_64 and aarch64)
setup_cuda_environment

# Build wheel
# Build wheel with proper library paths
export MONARCH_PACKAGE_NAME="torchmonarch"
export CUDA_LIB_DIR=/usr/lib64
export MONARCH_VERSION="${{ github.event.inputs.version }}"
python setup.py bdist_wheel

# hacky until the right distribution wheel can be made...
find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
ls -la dist/
with_build_env python setup.py bdist_wheel

# Properly retag wheel with manylinux platform tag
retag_wheel_platform "${{ matrix.platform-tag }}"

# Run tests
install_python_test_dependencies
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/test-cpu-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ jobs:
with:
timeout: 60
runner: linux.4xlarge
docker-image: "pytorch/manylinux2_28-builder:cpu"
submodules: recursive
download-artifact: ${{ inputs.artifact-name }}
script: |
# Source common setup functions
source scripts/common-setup.sh
# Setup test environment
setup_conda_environment
# Setup test environment (uses manylinux Python)
setup_test_environment
# Disable tensor engine
export USE_TENSOR_ENGINE=0
Expand Down
40 changes: 26 additions & 14 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,38 @@ concurrency:
cancel-in-progress: true
jobs:
build:
name: cuda12.6-py${{ matrix.python-version }}-${{ matrix.name }}
name: ${{ matrix.name }}-py${{ matrix.python-version }}
strategy:
fail-fast: false # Changed to false to see results from all Python versions
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
# python-version: ["3.10", "3.11", "3.12", "3.13"]
python-version: ["3.10"]
include:
- name: 4xlarge
runs-on: linux.g5.4xlarge.nvidia.gpu
# x86_64 CUDA builds
- name: cuda12.6-x86_64
runner: linux.g5.4xlarge.nvidia.gpu
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
gpu-arch-type: "cuda"
gpu-arch-version: "12.6"
docker-image: "pytorch/almalinux-builder" # Uses default, becomes pytorch/almalinux-builder:cuda12.6
platform-tag: "manylinux2014_x86_64"
# aarch64 CUDA builds
- name: cuda12.6-aarch64
runner: linux.arm64.r7g.12xlarge.memory # GPU-enabled ARM runner like PyTorch uses
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
gpu-arch-type: "cpu" # Use "cpu" to skip nvidia driver install, CUDA libs are in Docker image
gpu-arch-version: ""
docker-image: "pytorch/manylinuxaarch64-builder:cuda12.6" # ARM-specific image with CUDA
platform-tag: "manylinux2014_aarch64"
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
timeout: 60
runner: ${{ matrix.runs-on }}
runner: ${{ matrix.runner }}
gpu-arch-type: ${{ matrix.gpu-arch-type }}
gpu-arch-version: ${{ matrix.gpu-arch-version }}
docker-image: ${{ matrix.docker-image }}
submodules: recursive
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.gpu-arch-type }}${{ matrix.gpu-arch-version }}
upload-artifact: monarch-${{ matrix.python-version }}-${{ matrix.name }}
script: |
source scripts/common-setup.sh
setup_build_environment ${{ matrix.python-version }}
Expand All @@ -44,18 +57,17 @@ jobs:
# Setup Tensor Engine dependencies
setup_tensor_engine

cargo install --path monarch_hyperactor
# Setup CUDA environment (detects CUDA paths automatically for both x86_64 and aarch64)
setup_cuda_environment

# Build wheel
# Build wheel with proper library paths
export MONARCH_PACKAGE_NAME="torchmonarch-nightly"
export MONARCH_VERSION=$(date +'%Y.%m.%d')
export CUDA_LIB_DIR=/usr/lib64

python setup.py bdist_wheel
with_build_env python setup.py bdist_wheel

# hacky until the right distribution wheel can be made...
find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
ls -la dist/
# Properly retag wheel with manylinux platform tag
retag_wheel_platform "${{ matrix.platform-tag }}"

# Run tests
install_python_test_dependencies
Expand Down
36 changes: 30 additions & 6 deletions build_utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,33 @@ pub fn discover_cuda_config() -> Result<CudaConfig, BuildError> {
};

// Add standard include directories
// Check both old-style (include) and new-style (targets/x86_64-linux/include) CUDA installations
for include_subdir in &["include", "targets/x86_64-linux/include"] {
// Check both old-style (include) and new-style target-specific paths
// Support both x86_64 and aarch64/ARM architectures
for include_subdir in &[
"include",
"targets/x86_64-linux/include",
"targets/aarch64-linux/include",
"targets/sbsa-linux/include",
] {
let include_dir = cuda_home_path.join(include_subdir);
if include_dir.exists() {
config.include_dirs.push(include_dir);
}
}

// Add standard library directories
// Check both old-style (lib64, lib) and new-style (targets/x86_64-linux/lib) CUDA installations
for lib_subdir in &["lib64", "lib", "lib/x64", "targets/x86_64-linux/lib"] {
// Check both old-style and new-style CUDA installations for both x86_64 and aarch64
// Try architecture-specific paths first, then generic paths
for lib_subdir in &[
"lib64", // Common x86_64 location
"lib", // Common aarch64 location
"lib/x64", // Windows x64
"targets/x86_64-linux/lib", // CUDA toolkit x86_64
"targets/aarch64-linux/lib", // CUDA toolkit aarch64
"targets/sbsa-linux/lib", // CUDA toolkit ARM server
"lib/aarch64-linux-gnu", // Debian/Ubuntu aarch64
"lib/x86_64-linux-gnu", // Debian/Ubuntu x86_64
] {
let lib_dir = cuda_home_path.join(lib_subdir);
if lib_dir.exists() {
config.lib_dirs.push(lib_dir);
Expand Down Expand Up @@ -201,8 +217,16 @@ pub fn get_cuda_lib_dir() -> Result<String, BuildError> {
// Try to deduce from CUDA configuration
let cuda_config = discover_cuda_config()?;
if let Some(cuda_home) = cuda_config.cuda_home {
// Check both old-style and new-style CUDA library paths
for lib_subdir in &["lib64", "lib", "targets/x86_64-linux/lib"] {
// Check both x86_64 and aarch64 CUDA library paths
for lib_subdir in &[
"lib64", // Common x86_64 location
"lib", // Common aarch64 location
"targets/x86_64-linux/lib", // CUDA toolkit x86_64
"targets/aarch64-linux/lib", // CUDA toolkit aarch64
"targets/sbsa-linux/lib", // CUDA toolkit ARM server
"lib/aarch64-linux-gnu", // Debian/Ubuntu aarch64
"lib/x86_64-linux-gnu", // Debian/Ubuntu x86_64
] {
let lib_path = cuda_home.join(lib_subdir);
if lib_path.exists() {
return Ok(lib_path.to_string_lossy().to_string());
Expand Down
Loading
Loading