diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 1eca06471a110..0321e48c043ff 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -112,7 +112,10 @@ case "$tag" in GCC_VERSION=11 KATEX=yes TRITON=yes +<<<<<<< HEAD INSTALL_MINGW=yes +======= +>>>>>>> upstream/release/2.11 ;; pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11-inductor-benchmarks) CUDA_VERSION=13.0.2 @@ -159,8 +162,15 @@ case "$tag" in else ANACONDA_PYTHON_VERSION=3.12 fi +<<<<<<< HEAD GCC_VERSION=13 ROCM_VERSION=7.2 +======= + GCC_VERSION=11 + VISION=yes + ROCM_VERSION=7.2 + NINJA_VERSION=1.9.0 +>>>>>>> upstream/release/2.11 TRITON=yes KATEX=yes PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950;gfx1100" diff --git a/.ci/docker/ci_commit_pins/huggingface-requirements.txt b/.ci/docker/ci_commit_pins/huggingface-requirements.txt index 51a16f10e0632..d2c03b74e9a0d 100644 --- a/.ci/docker/ci_commit_pins/huggingface-requirements.txt +++ b/.ci/docker/ci_commit_pins/huggingface-requirements.txt @@ -1,2 +1,6 @@ +<<<<<<< HEAD transformers==5.5.3 +======= +transformers==5.2.0 +>>>>>>> upstream/release/2.11 soxr==0.5.0 diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt index ab27e757c6e57..827119f4ab49e 100644 --- a/.ci/docker/ci_commit_pins/triton.txt +++ b/.ci/docker/ci_commit_pins/triton.txt @@ -1 +1,5 @@ +<<<<<<< HEAD b4e20bbe55617cc798b986c2555a2bc7b303c737 +======= +4ed888920c5a0871957f1cf912e557bc79fbe56c +>>>>>>> upstream/release/2.11 diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh index 9d7cd7ad78c05..5aad50b78605b 100644 --- a/.ci/docker/common/install_cuda.sh +++ b/.ci/docker/common/install_cuda.sh @@ -132,9 +132,14 @@ function install_129 { } function install_128 { +<<<<<<< HEAD CUDNN_VERSION=9.20.0.48 CUSPARSELT_VERSION=0.7.1.0 echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-${CUSPARSELT_VERSION}" +======= + CUDNN_VERSION=9.19.0.56 + echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1" +>>>>>>> upstream/release/2.11 # install CUDA 12.8.1 in the same container install_cuda 12.8.1 cuda_12.8.1_570.124.06_linux @@ -151,9 +156,14 @@ function install_128 { } function install_130 { +<<<<<<< HEAD CUDNN_VERSION=9.20.0.48 CUSPARSELT_VERSION=0.8.1.1 echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-${CUSPARSELT_VERSION}" +======= + CUDNN_VERSION=9.19.0.56 + echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1" +>>>>>>> upstream/release/2.11 # install CUDA 13.0 in the same container install_cuda 13.0.2 cuda_13.0.2_580.95.05_linux diff --git a/.ci/docker/common/install_rocm.sh b/.ci/docker/common/install_rocm.sh index b4f6d70af3676..fd17d1669b80d 100644 --- a/.ci/docker/common/install_rocm.sh +++ b/.ci/docker/common/install_rocm.sh @@ -203,6 +203,29 @@ EOF fi fi + # ROCm 7.2 needs a fix from procprof sdk that isn't available until 7.2.1 + if [[ $(ver $ROCM_VERSION) -eq $(ver 7.2) ]]; then + git clone --no-checkout --filter=blob:none https://github.com/ROCm/rocm-systems.git + pushd rocm-systems/ + git sparse-checkout init --cone + git sparse-checkout set projects/rocprofiler-sdk shared/rocprofiler-compute + git checkout develop + git checkout rocm-7.2.0 + git config --global user.email "you@example.com" + git config --global user.name "Your Name" + git cherry-pick a71cc3cc88ed68b24c40cefec77d764053044862 + sudo apt install -y cmake libdw-dev libsqlite3-dev + cmake \ + -B rocprofiler-sdk-build \ + -DCMAKE_INSTALL_PREFIX=/opt/rocm \ + -DCMAKE_PREFIX_PATH=/opt/rocm \ + -DGPU_TARGETS="${PYTORCH_ROCM_ARCH}" \ + projects/rocprofiler-sdk + cmake --build rocprofiler-sdk-build --target all --parallel $(nproc) + cmake --build rocprofiler-sdk-build --target install + popd + fi + # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime for kdb in /opt/rocm/share/miopen/db/*.kdb do diff --git a/.ci/docker/common/install_triton.sh b/.ci/docker/common/install_triton.sh index 1b68e3c247839..b2fdebdcc4747 100755 --- a/.ci/docker/common/install_triton.sh +++ b/.ci/docker/common/install_triton.sh @@ -21,7 +21,7 @@ elif [ -n "${TRITON_CPU}" ]; then TRITON_REPO="https://github.com/triton-lang/triton-cpu" TRITON_TEXT_FILE="triton-cpu" else - TRITON_REPO="https://github.com/triton-lang/triton" + TRITON_REPO="https://github.com/ROCm/triton" TRITON_TEXT_FILE="triton" fi diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index c2e2635fc27eb..0028b774dfc20 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -117,9 +117,16 @@ ninja==1.11.1.4 #Pinned versions: 1.11.1.4 #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py +<<<<<<< HEAD numba==0.61.2 ; python_version < "3.14" and platform_machine != "s390x" numba==0.64.0 ; python_version >= "3.14" and platform_machine != "s390x" +======= +numba==0.49.0 ; python_version < "3.9" and platform_machine != "s390x" +numba==0.60.0 ; python_version == "3.9" and platform_machine != "s390x" +numba==0.61.2 ; python_version >= "3.10" and python_version < "3.14" and platform_machine != "s390x" +numba==0.64.0 ; python_version >= "3.14" and platform_machine != "s390x" +>>>>>>> upstream/release/2.11 #Description: Just-In-Time Compiler for Numerical Functions #Pinned versions: 0.55.2, 0.60.0 #test that import: test_numba_integration.py @@ -137,8 +144,13 @@ numba==0.64.0 ; python_version >= "3.14" and platform_machine != "s390x" #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py, #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py, #test_binary_ufuncs.py +<<<<<<< HEAD numpy==2.1.2 ; python_version < "3.14" numpy==2.3.4; python_version >= "3.14" +======= +numpy==2.0.2 ; python_version == "3.9" +numpy==2.1.2 ; python_version > "3.9" +>>>>>>> upstream/release/2.11 pandas==2.2.3; python_version < "3.14" pandas==2.3.3; python_version >= "3.14" @@ -252,7 +264,12 @@ scikit-image==0.22.0 #Pinned versions: 0.20.3 #test that import: +<<<<<<< HEAD scipy==1.14.1 ; python_version < "3.14" +======= +scipy==1.13.1 ; python_version == "3.9" +scipy==1.14.1 ; python_version > "3.9" and python_version < "3.14" +>>>>>>> upstream/release/2.11 scipy==1.16.2 ; python_version >= "3.14" # Pin SciPy because of failing distribution tests (see #60347) @@ -289,11 +306,14 @@ lintrunner==0.12.11 #Pinned versions: 0.12.11 #test that import: +<<<<<<< HEAD spin==0.17 #Description: developer CLI for common build/lint tasks #Pinned versions: 0.17 #test that import: +======= +>>>>>>> upstream/release/2.11 redis==7.4.0 #Description: redis database #test that import: anything that tests OSS caching/mocking (inductor/test_codecache.py, inductor/test_max_autotune.py) @@ -390,7 +410,7 @@ dataclasses_json==0.6.7 #Pinned versions: 0.6.7 #test that import: -cmake==3.31.6 +cmake==4.0.0 #Description: required for building tlparse==0.4.0 diff --git a/.ci/docker/requirements-docs.txt b/.ci/docker/requirements-docs.txt index 484d99ec1152e..1b0fca88a8e1f 100644 --- a/.ci/docker/requirements-docs.txt +++ b/.ci/docker/requirements-docs.txt @@ -2,9 +2,15 @@ sphinx==7.2.6 #Description: This is used to generate PyTorch docs #Pinned versions: 7.2.6 +<<<<<<< HEAD pytorch_sphinx_theme2==0.4.9 #Description: This is needed to generate PyTorch docs #Pinned versions: 0.4.9 +======= +pytorch_sphinx_theme2==0.4.6 +#Description: This is needed to generate PyTorch docs +#Pinned versions: 0.4.6 +>>>>>>> upstream/release/2.11 sphinxcontrib.katex==0.9.11 #Description: This is used to generate PyTorch docs diff --git a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test_library.yaml b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test_library.yaml index 402f2d8bf0e69..7347b41b6e78b 100644 --- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test_library.yaml +++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test_library.yaml @@ -20,7 +20,12 @@ vllm_basic_models_test: - pytest -v -s models/test_registry.py - pytest -v -s models/test_utils.py - pytest -v -s models/test_vision.py +<<<<<<< HEAD - HF_DATASETS_OFFLINE=0 TRANSFORMERS_OFFLINE=0 pytest -v -s models/test_initialization.py +======= + - pytest -v -s models/test_initialization.py -k 'not voxtral' + - HF_DATASETS_OFFLINE=0 TRANSFORMERS_OFFLINE=0 pytest -v -s models/test_initialization.py -k voxtral +>>>>>>> upstream/release/2.11 vllm_entrypoints_test: title: Entrypoints Test diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh index 613301059f2a6..7a4d5bdd3902b 100644 --- a/.ci/manywheel/build_cuda.sh +++ b/.ci/manywheel/build_cuda.sh @@ -114,7 +114,11 @@ case ${CUDA_VERSION} in TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST//8.6;/}" # Remove 8.6 for libtorch fi ;; +<<<<<<< HEAD 13.0|13.2) +======= + 13.0) +>>>>>>> upstream/release/2.11 TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};$([[ "$ARCH" == "aarch64" ]] && echo "11.0;" || echo "")12.0" export TORCH_NVCC_FLAGS="-compress-mode=size" export BUILD_BUNDLE_PTXAS=1 diff --git a/.ci/pytorch/binary_populate_env.sh b/.ci/pytorch/binary_populate_env.sh index 53914914c8c93..21d7d38e0e2bb 100755 --- a/.ci/pytorch/binary_populate_env.sh +++ b/.ci/pytorch/binary_populate_env.sh @@ -6,7 +6,9 @@ export TZ=UTC tagged_version() { GIT_DIR="${workdir}/pytorch/.git" GIT_DESCRIBE="git --git-dir ${GIT_DIR} describe --tags --match v[0-9]*.[0-9]*.[0-9]*" - if [[ ! -d "${GIT_DIR}" ]]; then + if [[ -n "${CIRCLE_TAG:-}" ]]; then + echo "${CIRCLE_TAG}" + elif [[ ! -d "${GIT_DIR}" ]]; then echo "Abort, abort! Git dir ${GIT_DIR} does not exists!" kill $$ elif ${GIT_DESCRIBE} --exact >/dev/null; then @@ -70,6 +72,8 @@ fi export PYTORCH_BUILD_NUMBER=1 +# This part is done in the builder scripts so commenting the duplicate code +: <<'BLOCK_COMMENT' # Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt) TRITON_CONSTRAINT="platform_system == 'Linux'" @@ -111,6 +115,7 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}" fi fi +BLOCK_COMMENT USE_GLOO_WITH_OPENSSL="OFF" if [[ "$GPU_ARCH_TYPE" =~ .*aarch64.* ]]; then diff --git a/.ci/pytorch/common.sh b/.ci/pytorch/common.sh index 94d9629eac519..6e3232b23c7af 100644 --- a/.ci/pytorch/common.sh +++ b/.ci/pytorch/common.sh @@ -5,7 +5,11 @@ source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" set -ex -o pipefail +<<<<<<< HEAD # Source ROCm environment variables (paths may vary between tarball/wheel installs) +======= +# for ROCm environment variables +>>>>>>> upstream/release/2.11 if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]] && [[ -f /etc/rocm_env.sh ]]; then # shellcheck disable=SC1091 source /etc/rocm_env.sh diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index 354841db899f8..502de4b893ac0 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -279,7 +279,11 @@ function install_torchrec_and_fbgemm() { function clone_pytorch_xla() { if [[ ! -d ./xla ]]; then +<<<<<<< HEAD git clone --recursive -b r2.12 https://github.com/pytorch/xla.git +======= + git clone --recursive -b r2.11 https://github.com/pytorch/xla.git +>>>>>>> upstream/release/2.11 pushd xla # pin the xla hash so that we don't get broken by changes to xla git checkout "$(cat ../.github/ci_commit_pins/xla.txt)" diff --git a/.ci/pytorch/windows/internal/cuda_install.bat b/.ci/pytorch/windows/internal/cuda_install.bat index c1050edecc0b9..198b0f0c4fc27 100644 --- a/.ci/pytorch/windows/internal/cuda_install.bat +++ b/.ci/pytorch/windows/internal/cuda_install.bat @@ -43,18 +43,27 @@ goto cuda_download :cuda128 set CUDA_INSTALL_EXE=cuda_12.8.0_571.96_windows.exe set "ARGS=cuda_profiler_api_12.8 thrust_12.8 nvcc_12.8 cuobjdump_12.8 nvprune_12.8 nvprof_12.8 cupti_12.8 cublas_12.8 cublas_dev_12.8 cudart_12.8 cufft_12.8 cufft_dev_12.8 curand_12.8 curand_dev_12.8 cusolver_12.8 cusolver_dev_12.8 cusparse_12.8 cusparse_dev_12.8 npp_12.8 npp_dev_12.8 nvrtc_12.8 nvrtc_dev_12.8 nvml_dev_12.8 nvjitlink_12.8 nvtx_12.8" +<<<<<<< HEAD set CUDNN_FOLDER=cudnn-windows-x86_64-9.20.0.48_cuda12-archive +======= +set CUDNN_FOLDER=cudnn-windows-x86_64-9.19.0.56_cuda12-archive +>>>>>>> upstream/release/2.11 goto cuda_download :cuda129 set CUDA_INSTALL_EXE=cuda_12.9.1_576.57_windows.exe set "ARGS=cuda_profiler_api_12.9 thrust_12.9 nvcc_12.9 cuobjdump_12.9 nvprune_12.9 nvprof_12.9 cupti_12.9 cublas_12.9 cublas_dev_12.9 cudart_12.9 cufft_12.9 cufft_dev_12.9 curand_12.9 curand_dev_12.9 cusolver_12.9 cusolver_dev_12.9 cusparse_12.9 cusparse_dev_12.9 npp_12.9 npp_dev_12.9 nvrtc_12.9 nvrtc_dev_12.9 nvml_dev_12.9 nvjitlink_12.9 nvtx_12.9" +<<<<<<< HEAD set CUDNN_FOLDER=cudnn-windows-x86_64-9.20.0.48_cuda12-archive +======= +set CUDNN_FOLDER=cudnn-windows-x86_64-9.17.1.4_cuda12-archive +>>>>>>> upstream/release/2.11 goto cuda_download :cuda130 set CUDA_INSTALL_EXE=cuda_13.0.0_windows.exe set "ARGS=" +<<<<<<< HEAD set CUDNN_FOLDER=cudnn-windows-x86_64-9.20.0.48_cuda13-archive goto cuda_download @@ -62,6 +71,9 @@ goto cuda_download set CUDA_INSTALL_EXE=cuda_13.2.1_windows.exe set "ARGS=" set CUDNN_FOLDER=cudnn-windows-x86_64-9.20.0.48_cuda13-archive +======= +set CUDNN_FOLDER=cudnn-windows-x86_64-9.19.0.56_cuda13-archive +>>>>>>> upstream/release/2.11 goto cuda_download :: Common download logic for CUDA toolkit, cuDNN, and ZLIB @@ -168,6 +180,7 @@ if %CUDA_VER% EQU 126 ( set EXPECTED_CUDNN_VERSION=9.10.2 ) if %CUDA_VER% EQU 128 ( +<<<<<<< HEAD set CUDNN_FOLDER=cudnn-windows-x86_64-9.20.0.48_cuda12-archive set EXPECTED_CUDNN_VERSION=9.20.0 ) @@ -182,6 +195,18 @@ if %CUDA_VER% EQU 130 ( if %CUDA_VER% EQU 132 ( set CUDNN_FOLDER=cudnn-windows-x86_64-9.20.0.48_cuda13-archive set EXPECTED_CUDNN_VERSION=9.20.0 +======= + set CUDNN_FOLDER=cudnn-windows-x86_64-9.19.0.56_cuda12-archive + set EXPECTED_CUDNN_VERSION=9.19.0 +) +if %CUDA_VER% EQU 129 ( + set CUDNN_FOLDER=cudnn-windows-x86_64-9.17.1.4_cuda12-archive + set EXPECTED_CUDNN_VERSION=9.17.1 +) +if %CUDA_VER% EQU 130 ( + set CUDNN_FOLDER=cudnn-windows-x86_64-9.19.0.56_cuda13-archive + set EXPECTED_CUDNN_VERSION=9.19.0 +>>>>>>> upstream/release/2.11 ) set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt index 21f23c297a01d..1b67d3a71d81b 100644 --- a/.github/ci_commit_pins/vllm.txt +++ b/.github/ci_commit_pins/vllm.txt @@ -1 +1,5 @@ +<<<<<<< HEAD ba4a78eb5d2ea30477b58a0bb8109b129f35c8b1 +======= +a4047d4ea993fd52038433d87c16e603bee4f214 +>>>>>>> upstream/release/2.11 diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt index 42fb03f58599d..0f91917cedbef 100644 --- a/.github/ci_commit_pins/xla.txt +++ b/.github/ci_commit_pins/xla.txt @@ -1 +1,5 @@ +<<<<<<< HEAD r2.12 +======= +r2.11 +>>>>>>> upstream/release/2.11 diff --git a/.github/scripts/build_triton_wheel.py b/.github/scripts/build_triton_wheel.py index de0fa7739edc5..887f732bcfb8f 100644 --- a/.github/scripts/build_triton_wheel.py +++ b/.github/scripts/build_triton_wheel.py @@ -3,6 +3,7 @@ from __future__ import annotations import os +import re import shutil import sys from pathlib import Path @@ -52,6 +53,31 @@ def patch_init_py( f.write(orig) +def get_rocm_version() -> str: + rocm_path = os.environ.get('ROCM_HOME') or os.environ.get('ROCM_PATH') or "/opt/rocm" + rocm_version = "0.0.0" + rocm_version_h = f"{rocm_path}/include/rocm-core/rocm_version.h" + if not os.path.isfile(rocm_version_h): + rocm_version_h = f"{rocm_path}/include/rocm_version.h" + if os.path.isfile(rocm_version_h): + RE_MAJOR = re.compile(r"#define\s+ROCM_VERSION_MAJOR\s+(\d+)") + RE_MINOR = re.compile(r"#define\s+ROCM_VERSION_MINOR\s+(\d+)") + RE_PATCH = re.compile(r"#define\s+ROCM_VERSION_PATCH\s+(\d+)") + major, minor, patch = 0, 0, 0 + for line in open(rocm_version_h): + match = RE_MAJOR.search(line) + if match: + major = int(match.group(1)) + match = RE_MINOR.search(line) + if match: + minor = int(match.group(1)) + match = RE_PATCH.search(line) + if match: + patch = int(match.group(1)) + rocm_version = str(major) + "." + str(minor) + "." + str(patch) + return rocm_version + + def build_triton( *, version: str, @@ -66,13 +92,20 @@ def build_triton( max_jobs = os.cpu_count() or 1 env["MAX_JOBS"] = str(max_jobs) + version_suffix = "" + if not release: + rocm_version = get_rocm_version() + version_suffix = f"+rocm{rocm_version}.git{commit_hash[:8]}" + version += version_suffix + with TemporaryDirectory() as tmpdir: triton_basedir = Path(tmpdir) / "triton" triton_pythondir = triton_basedir / "python" triton_repo = "https://github.com/openai/triton" if device == "rocm": - triton_pkg_name = "triton-rocm" + triton_repo = "https://github.com/ROCm/triton" + triton_pkg_name = "triton" elif device == "xpu": triton_pkg_name = "triton-xpu" triton_repo = "https://github.com/intel/intel-xpu-backend-for-triton" @@ -97,7 +130,11 @@ def build_triton( # change built wheel name and version env["TRITON_WHEEL_NAME"] = triton_pkg_name +<<<<<<< HEAD env["TRITON_EXT_ENABLED"] = "ON" +======= + env["TRITON_WHEEL_VERSION_SUFFIX"] = version_suffix +>>>>>>> upstream/release/2.11 if with_clang_ldd: env["TRITON_BUILD_WITH_CLANG_LLD"] = "1" diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py index 0dbbb4aa3761b..aab90b99c98b5 100755 --- a/.github/scripts/filter_test_configs.py +++ b/.github/scripts/filter_test_configs.py @@ -57,10 +57,17 @@ def is_cuda_or_rocm_job( "rerun_disabled_tests": lambda job_name, config=None: True, } +<<<<<<< HEAD # The link to the published list of disabled jobs DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=wTGHVmBBfqYt9mKCiXeWHzek1ZaYHJeH" # and unstable jobs UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=6zhdrg_i6w2bx_KfIfy8SKn5WdSO0ZAf" +======= +# The link to the published list of disabled jobs. +DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=EdtXb8H1wC3KKKfSV9z7QtgG3FngDv3B" +# and unstable jobs +UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=iafjJg17T2MK7wQiJ0qx32zIPMqqwZqv" +>>>>>>> upstream/release/2.11 # Some constants used to handle disabled and unstable jobs JOB_NAME_SEP = "/" diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index aaf19e57b8d8d..ae55a0d0559a1 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -60,6 +60,7 @@ "cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | " "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | " "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " +<<<<<<< HEAD "nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | " "nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux'" ), @@ -78,6 +79,33 @@ "nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | " "nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | " "nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | " +======= + "nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | " + "nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux'" + ), + "12.8": ( + "cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | " # noqa: B950 + "cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | " + "nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | " + "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " + "nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | " + "nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux'" + ), + "12.9": ( + "cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | " # noqa: B950 + "cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | " + "nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | " + "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " + "nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | " + "nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux'" + ), + "13.0": ( + "cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | " # noqa: B950 + "cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | " + "nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | " + "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | " + "nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | " +>>>>>>> upstream/release/2.11 "nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux'" ), "xpu": ( diff --git a/.github/templates/common.yml.j2 b/.github/templates/common.yml.j2 index acbd4aee44736..405050dcedc7c 100644 --- a/.github/templates/common.yml.j2 +++ b/.github/templates/common.yml.j2 @@ -32,7 +32,11 @@ concurrency: {%- macro setup_ec2_windows() -%} !{{ display_ec2_information() }} - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/templates/linux_binary_build_workflow.yml.j2 b/.github/templates/linux_binary_build_workflow.yml.j2 index 349207af1d7de..51e256c11d8ce 100644 --- a/.github/templates/linux_binary_build_workflow.yml.j2 +++ b/.github/templates/linux_binary_build_workflow.yml.j2 @@ -56,7 +56,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -135,24 +139,41 @@ jobs: steps: !{{ common.checkout(deep_clone=False, checkout_pr_head=False) }} - name: Setup XPU +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.12 - name: Login to ECR uses: pytorch/pytorch/.github/actions/ecr-login@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 +>>>>>>> upstream/release/2.11 - uses: !{{ common.download_artifact_action }} name: Download Build Artifacts with: name: !{{ config["build_name"] }} path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD - name: Calculate docker image id: calculate-docker-image uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }} + - name: Calculate docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: !{{ config["container_image"] }} custom-tag-prefix: !{{ config["container_image_tag_prefix"] }} docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -177,6 +198,10 @@ jobs: with: name: !{{ config["build_name"] }} path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }} +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -190,14 +215,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: !{{ config["container_image"] }} custom-tag-prefix: !{{ config["container_image_tag_prefix"] }} docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary diff --git a/.github/templates/macos_binary_build_workflow.yml.j2 b/.github/templates/macos_binary_build_workflow.yml.j2 index 2bb6f726003ba..d43a041a48346 100644 --- a/.github/templates/macos_binary_build_workflow.yml.j2 +++ b/.github/templates/macos_binary_build_workflow.yml.j2 @@ -71,7 +71,11 @@ jobs: steps: !{{ set_runner_specific_vars() }} !{{ setup_python(config.get("python_version", "3.10")) }} +<<<<<<< HEAD !{{ common.checkout(deep_clone=False, checkout_pr_head=False) }} +======= + !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }} +>>>>>>> upstream/release/2.11 - name: Populate binary env run: | "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" diff --git a/.github/templates/windows_binary_build_workflow.yml.j2 b/.github/templates/windows_binary_build_workflow.yml.j2 index b24a763656428..7c69a6815e0ce 100644 --- a/.github/templates/windows_binary_build_workflow.yml.j2 +++ b/.github/templates/windows_binary_build_workflow.yml.j2 @@ -64,7 +64,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -134,7 +138,11 @@ jobs: {%- else %} !{{ set_runner_specific_vars() }} !{{ common.setup_ec2_windows() }} +<<<<<<< HEAD !{{ common.checkout(deep_clone=False, checkout_pr_head=False) }} +======= + !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }} +>>>>>>> upstream/release/2.11 {%- endif %} - name: Populate binary env shell: bash @@ -209,7 +217,11 @@ jobs: ".ci/pytorch/windows/arm64/bootstrap_rust.bat" {%- else %} !{{ common.setup_ec2_windows() }} +<<<<<<< HEAD !{{ common.checkout(deep_clone=False, checkout_pr_head=False) }} +======= + !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }} +>>>>>>> upstream/release/2.11 !{{ set_runner_specific_vars() }} {%- endif %} - uses: !{{ common.download_artifact_action }} diff --git a/.github/workflows/_bazel-build-test.yml b/.github/workflows/_bazel-build-test.yml new file mode 100644 index 0000000000000..21508ce0d7f21 --- /dev/null +++ b/.github/workflows/_bazel-build-test.yml @@ -0,0 +1,215 @@ +name: bazel + +on: + workflow_call: + inputs: + build-environment: + required: true + type: string + description: Top-level label for what's being built/tested. + docker-image-name: + required: true + type: string + description: Name of the base docker image to build with. + cuda-version: + required: true + type: string + description: What CUDA version to build with (i.e. "11.7"), "cpu" for none. + sync-tag: + required: false + type: string + default: "" + description: | + If this is set, our linter will use this to make sure that every other + job with the same `sync-tag` is identical. + test-matrix: + required: true + type: string + description: | + A JSON description of what configs to run later on. + runner: + required: false + type: string + default: "linux.large" + description: Runner type + +env: + GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + +jobs: + filter: + if: github.repository_owner == 'pytorch' + runs-on: ${{ inputs.runner }} + outputs: + test-matrix: ${{ steps.filter.outputs.test-matrix }} + is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }} + keep-going: ${{ steps.filter.outputs.keep-going }} + reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }} + steps: + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + with: + fetch-depth: 1 + submodules: false + + - name: Select all requested test configurations + id: filter + uses: ./.github/actions/filter-test-configs + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + test-matrix: ${{ inputs.test-matrix }} + + build-and-test: + needs: filter + # Don't run on forked repos. + if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False' + strategy: + matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }} + fail-fast: false + runs-on: ${{ matrix.runner }} + steps: + - name: Setup SSH (Click me for login details) + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + + # [see note: pytorch repo ref] + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + + - name: Setup Linux + uses: ./.github/actions/setup-linux + + - name: Login to ECR + uses: ./.github/actions/ecr-login + + - name: Calculate docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 + with: + docker-image-name: ${{ inputs.docker-image-name }} + + - name: Pull docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 + with: + docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + + - name: Check if in a container runner + shell: bash + id: check_container_runner + run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT" + + - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG + uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.11 + + - name: Output disk space left + run: | + sudo df -H + + - name: Preserve github env variables for use in docker + run: | + env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}" + env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}" + + - name: Parse ref + id: parse-ref + run: .github/scripts/parse_ref.py + + - name: Get workflow job id + id: get-job-id + uses: ./.github/actions/get-workflow-job-id + if: always() + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Build + env: + BUILD_ENVIRONMENT: ${{ inputs.build-environment }} + PR_NUMBER: ${{ github.event.pull_request.number }} + BRANCH: ${{ steps.parse-ref.outputs.branch }} + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_WORKFLOW: ${{ github.workflow }} + GITHUB_JOB: ${{ github.job }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }} + JOB_ID: ${{ steps.get-job-id.outputs.job-id }} + REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }} + # TODO duplicated + AWS_DEFAULT_REGION: us-east-1 + SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 + SCCACHE_REGION: us-east-1 + TORCH_CUDA_ARCH_LIST: 5.2 + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }} + CUDA_VERSION: ${{ inputs.cuda-version }} + run: | + export SHARD_NUMBER=0 + # detached container should get cleaned up by teardown_ec2_linux + # TODO: Stop building test binaries as part of the build phase + # Make sure we copy test results from bazel-testlogs symlink to + # a regular directory ./test/test-reports + # shellcheck disable=SC2086 + container_name=$(docker run \ + ${GPU_FLAG:-} \ + -e AWS_DEFAULT_REGION \ + -e BUILD_ENVIRONMENT \ + -e GITHUB_ACTIONS \ + -e GITHUB_REPOSITORY \ + -e GITHUB_WORKFLOW \ + -e GITHUB_JOB \ + -e GITHUB_RUN_NUMBER \ + -e GITHUB_RUN_ATTEMPT \ + -e JOB_ID \ + -e GIT_DEFAULT_BRANCH="$GIT_DEFAULT_BRANCH" \ + -e SHARD_NUMBER \ + -e NUM_TEST_SHARDS \ + -e MAX_JOBS="$(nproc --ignore=2)" \ + -e SCCACHE_BUCKET \ + -e SCCACHE_REGION \ + -e SKIP_SCCACHE_INITIALIZATION=1 \ + -e REENABLED_ISSUES \ + -e TORCH_CUDA_ARCH_LIST \ + -e OUR_GITHUB_JOB_ID \ + -e CUDA_VERSION \ + --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ + --security-opt seccomp=unconfined \ + --cap-add=SYS_PTRACE \ + --shm-size="1g" \ + --tty \ + --detach \ + --user jenkins \ + -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ + -w /var/lib/jenkins/workspace \ + "${DOCKER_IMAGE}" + ) + docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh' + echo "container_id=${container_name}" >> "${GITHUB_ENV}" + + - name: Test + id: test + # Time out the test phase after 3.5 hours + timeout-minutes: 120 + run: | + docker exec -t "${container_id}" sh -c '.ci/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports' + + - name: Print remaining test logs + shell: bash + if: always() && steps.test.conclusion + run: | + cat test/**/*_toprint.log || true + + - name: Chown workspace + uses: ./.github/actions/chown-workspace + if: always() + + - name: Upload test artifacts + uses: ./.github/actions/upload-test-artifacts + if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped' + with: + file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }} + + - name: Teardown Linux + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 + if: always() diff --git a/.github/workflows/_binary-build-flash-attention-wheel-linux.yml b/.github/workflows/_binary-build-flash-attention-wheel-linux.yml index 6836dc19edf9e..cf2ed3472e46b 100644 --- a/.github/workflows/_binary-build-flash-attention-wheel-linux.yml +++ b/.github/workflows/_binary-build-flash-attention-wheel-linux.yml @@ -23,7 +23,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -83,18 +87,31 @@ jobs: TORCH_VERSION: "2.10.0" steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 with: github-secret: ${{ secrets.GITHUB_TOKEN }} fail-silently: false +<<<<<<< HEAD - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 +======= + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: true - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ env.DOCKER_IMAGE }} @@ -135,5 +152,9 @@ jobs: path: ${{ runner.temp }}/artifacts/*.whl - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() diff --git a/.github/workflows/_binary-build-flash-attention-wheel-windows.yml b/.github/workflows/_binary-build-flash-attention-wheel-windows.yml index d3d5e96a42eb1..f64cb20ea462a 100644 --- a/.github/workflows/_binary-build-flash-attention-wheel-windows.yml +++ b/.github/workflows/_binary-build-flash-attention-wheel-windows.yml @@ -22,7 +22,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -71,12 +75,20 @@ jobs: git config --global core.ignorecase false git config --global core.fsmonitor false - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 with: github-secret: ${{ secrets.GITHUB_TOKEN }} - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: true submodules: true diff --git a/.github/workflows/_binary-build-linux.yml b/.github/workflows/_binary-build-linux.yml index 88d5177ee093d..17db8feb67ace 100644 --- a/.github/workflows/_binary-build-linux.yml +++ b/.github/workflows/_binary-build-linux.yml @@ -142,13 +142,21 @@ jobs: - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" if: inputs.build_environment != 'linux-s390x-binary-manywheel' +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.github-token }} - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }} @@ -179,7 +187,10 @@ jobs: - name: Checkout PyTorch to pytorch dir uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: +<<<<<<< HEAD fetch-depth: 2 +======= +>>>>>>> upstream/release/2.11 submodules: recursive path: pytorch show-progress: false @@ -214,9 +225,13 @@ jobs: - name: Calculate docker image id: calculate-docker-image if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }} +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: - # If doing this in main or release branch, use docker.io. Otherwise + # If doing this in release/2.11 or release branch, use docker.io. Otherwise # use ECR docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: ${{ inputs.DOCKER_IMAGE }} @@ -229,7 +244,11 @@ jobs: - name: Pull Docker image if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }} +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -282,7 +301,11 @@ jobs: - name: Teardown Linux if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel' +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 - name: Chown workspace if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel' diff --git a/.github/workflows/_binary-test-linux.yml b/.github/workflows/_binary-test-linux.yml index d9270ea39313d..09cd4237170ce 100644 --- a/.github/workflows/_binary-test-linux.yml +++ b/.github/workflows/_binary-test-linux.yml @@ -125,14 +125,22 @@ jobs: - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" if: inputs.build_environment != 'linux-s390x-binary-manywheel' +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.github-token }} # Setup the environment - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }} @@ -184,7 +192,11 @@ jobs: - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG id: install-nvidia-driver +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }} - name: configure aws credentials @@ -199,7 +211,11 @@ jobs: - name: Calculate docker image id: calculate-docker-image if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }} +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: ${{ inputs.DOCKER_IMAGE }} @@ -209,7 +225,11 @@ jobs: - name: Pull Docker image if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }} +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -221,7 +241,11 @@ jobs: - name: Teardown Linux if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel' +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 - name: Chown workspace if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel' diff --git a/.github/workflows/_binary-upload.yml b/.github/workflows/_binary-upload.yml index c7eb31efdb050..83c540128bae7 100644 --- a/.github/workflows/_binary-upload.yml +++ b/.github/workflows/_binary-upload.yml @@ -91,7 +91,11 @@ jobs: SHA1: ${{ github.event.pull_request.head.sha || github.sha }} steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: true diff --git a/.github/workflows/_docs.yml b/.github/workflows/_docs.yml index a4e51b01ee4a8..15d310184af40 100644 --- a/.github/workflows/_docs.yml +++ b/.github/workflows/_docs.yml @@ -99,7 +99,11 @@ jobs: name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }} steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 with: github-secret: ${{ secrets.GITHUB_TOKEN }} instructions: | @@ -108,6 +112,13 @@ jobs: To start Python docs build type: cd docs && make html && make coverage +<<<<<<< HEAD +======= + # [see note: pytorch repo ref] + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + +>>>>>>> upstream/release/2.11 - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 with: @@ -123,12 +134,20 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ${{ inputs.docker-image }} - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -268,7 +287,11 @@ jobs: echo "https://docs-preview.pytorch.org/pytorch/pytorch/nightly-${{ github.sha }}/cppdocs/index.html" - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() build-docs-osdc: diff --git a/.github/workflows/_link_check.yml b/.github/workflows/_link_check.yml index efa5b433947ef..87950cb4c7c89 100644 --- a/.github/workflows/_link_check.yml +++ b/.github/workflows/_link_check.yml @@ -11,7 +11,11 @@ on: jobs: lint-urls: if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }} +<<<<<<< HEAD uses: ./.github/workflows/_lint.yml +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: runner: mt-l-x86iamx-8-16 docker-image: ghcr.io/pytorch/test-infra:cpu-x86_64-810d48d @@ -32,7 +36,11 @@ jobs: lint-xrefs: if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-xref-lint') }} +<<<<<<< HEAD uses: ./.github/workflows/_lint.yml +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: runner: mt-l-x86iamx-8-16 docker-image: ghcr.io/pytorch/test-infra:cpu-x86_64-810d48d diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml index 07872fb5038d5..cbaa5671f66a2 100644 --- a/.github/workflows/_linux-build.yml +++ b/.github/workflows/_linux-build.yml @@ -153,7 +153,11 @@ jobs: build-environment: ${{ inputs.build-environment }} steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -161,6 +165,18 @@ jobs: Build is done inside the container, to start an interactive session run: docker exec -it $(docker container ps --format '{{.ID}}') bash +<<<<<<< HEAD +======= + # [pytorch repo ref] + # Use a pytorch/pytorch reference instead of a reference to the local + # checkout because when we run this action we don't *have* a local + # checkout. In other cases you should prefer a local checkout. + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + with: + no-sudo: true + +>>>>>>> upstream/release/2.11 - name: Setup Linux id: setup-linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 @@ -189,7 +205,11 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: docker-image-name: ${{ inputs.docker-image-name }} @@ -205,7 +225,11 @@ jobs: echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.use-old-whl.outputs.reuse != 'true' with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -453,7 +477,11 @@ jobs: artifact_prefix: usage_log_build_${{ steps.setup-linux.outputs.job-id }} - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel' - name: Cleanup docker diff --git a/.github/workflows/_linux-test-stable-fa3.yml b/.github/workflows/_linux-test-stable-fa3.yml index d45a38c424dcd..7d4bdd3949040 100644 --- a/.github/workflows/_linux-test-stable-fa3.yml +++ b/.github/workflows/_linux-test-stable-fa3.yml @@ -59,8 +59,15 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 +======= + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + with: + no-sudo: true +>>>>>>> upstream/release/2.11 - name: Checkout flash-attention as a secondary repository uses: actions/checkout@v4 @@ -73,7 +80,11 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ${{ inputs.docker-image }} @@ -87,7 +98,11 @@ jobs: echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -249,5 +264,9 @@ jobs: workflow_attempt: ${{github.run_attempt}} - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml index 46a591ba0ba6e..c85f4d993352c 100644 --- a/.github/workflows/_linux-test.yml +++ b/.github/workflows/_linux-test.yml @@ -136,7 +136,11 @@ jobs: contents: read steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ !contains(matrix.runner, 'b200') && inputs.build-environment != 'linux-s390x-binary-manywheel' }} with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -144,6 +148,21 @@ jobs: All testing is done inside the container, to start an interactive session run: docker exec -it $(docker container ps --format '{{.ID}}') bash +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + with: + no-sudo: true + + - name: Setup Python + if: contains(matrix.runner, 'b200') + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: '3.12' + cache: pip + +>>>>>>> upstream/release/2.11 - name: Setup Linux id: setup-linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 @@ -173,7 +192,11 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: docker-image-name: ${{ inputs.docker-image }} @@ -189,7 +212,11 @@ jobs: echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -201,7 +228,11 @@ jobs: - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG id: install-nvidia-driver +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.11 +>>>>>>> upstream/release/2.11 with: driver-version: '580.82.07' if: ${{ !contains(matrix.runner, 'b200') }} @@ -539,7 +570,11 @@ jobs: aws-region: us-east-1 - name: Upload the benchmark results +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.11 +>>>>>>> upstream/release/2.11 if: inputs.build-environment != 'linux-s390x-binary-manywheel' && steps.check-tpu.outputs.has_tpu != 'true' with: benchmark-results-dir: test/test-reports @@ -597,7 +632,11 @@ jobs: workflow_attempt: ${{github.run_attempt}} - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' - name: Cleanup docker diff --git a/.github/workflows/_mac-build.yml b/.github/workflows/_mac-build.yml index e4ffcef200656..0bdb669da2144 100644 --- a/.github/workflows/_mac-build.yml +++ b/.github/workflows/_mac-build.yml @@ -71,11 +71,19 @@ jobs: build-environment: ${{ inputs.build-environment }} steps: - name: Clean up disk space before running MacOS workflow +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.12 # [see note: pytorch repo ref] - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.11 + + # [see note: pytorch repo ref] + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 - name: Set xcode version env: @@ -86,7 +94,11 @@ jobs: fi - name: Setup Python +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-python@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-python@release/2.11 +>>>>>>> upstream/release/2.11 with: python-version: ${{ inputs.python-version }} pip-requirements-file: .ci/docker/requirements-ci.txt @@ -192,4 +204,8 @@ jobs: - name: Clean up disk space if: always() continue-on-error: true +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.11 +>>>>>>> upstream/release/2.11 diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml index 8a1464e207703..b8e6d01223993 100644 --- a/.github/workflows/_mac-test.yml +++ b/.github/workflows/_mac-test.yml @@ -105,11 +105,19 @@ jobs: done - name: Clean up disk space before running MacOS workflow +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.12 # [see note: pytorch repo ref] - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.11 + + # [see note: pytorch repo ref] + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 - name: Get workflow job id id: get-job-id @@ -119,7 +127,11 @@ jobs: github-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Python +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-python@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-python@release/2.11 +>>>>>>> upstream/release/2.11 with: python-version: ${{ inputs.python-version }} pip-requirements-file: .ci/docker/requirements-ci.txt @@ -257,7 +269,11 @@ jobs: file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }} - name: Upload the benchmark results +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.11 +>>>>>>> upstream/release/2.11 with: benchmark-results-dir: test/test-reports dry-run: false @@ -287,4 +303,8 @@ jobs: - name: Clean up disk space if: always() continue-on-error: true +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.11 +>>>>>>> upstream/release/2.11 diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml index 1cd9cc3381610..34e8bb3fe03bc 100644 --- a/.github/workflows/_rocm-test.yml +++ b/.github/workflows/_rocm-test.yml @@ -85,7 +85,11 @@ jobs: timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }} steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: true @@ -104,12 +108,20 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ${{ inputs.docker-image }} - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -332,7 +344,11 @@ jobs: aws-region: us-east-1 - name: Upload the benchmark results +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.11 +>>>>>>> upstream/release/2.11 with: benchmark-results-dir: test/test-reports dry-run: false diff --git a/.github/workflows/_runner-determinator.yml b/.github/workflows/_runner-determinator.yml index 06aefad9dd634..2fe6542a9ea7d 100644 --- a/.github/workflows/_runner-determinator.yml +++ b/.github/workflows/_runner-determinator.yml @@ -63,8 +63,663 @@ jobs: OPT_OUT_EXPERIMENTS: ${{ inputs.opt_out_experiments }} PR_NUMBER: ${{ github.event.pull_request.number }} steps: +<<<<<<< HEAD - name: Checkout PyTorch uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 +======= + # - name: Checkout PyTorch + # uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + # with: + # fetch-depth: 1 + # submodules: true + + # TODO: Remove the hardcoded step below + # Hardcoding below is temporary for testing ALI runners + # This file below should match the script found in .github/scripts/runner_determinator.py + - name: Hardcode runner-determinator script + id: hardcode-script + run: | + cat < runner_determinator.py + # flake8: noqa: G004 + + # Note: Copies of this script in runner_determinator.py and _runner-determinator.yml + # must be kept in sync. You can do it easily by running the following command: + # python .github/scripts/update_runner_determinator.py + + """ + This runner determinator is used to determine which set of runners to run a + GitHub job on. It uses the first comment of a GitHub issue (by default + https://github.com/pytorch/test-infra/issues/5132) to define the configuration + of which runners should be used to run which job. + + The configuration has two parts, the settings and a list of opted-in users, + separated by a line containing "---". If the line is not present, the + settings are considered to be empty with only the second part, the user + list, defined. + + The first part is a YAML block that defines the rollout settings. This can be + used to define any settings that are needed to determine which runners to use. + It's fields are defined by the RolloutSettings class below. + + The second part is a list of users who are explicitly opted in to the LF fleet. + The user list is also a comma separated list of additional features or + experiments which the user could be opted in to. + + The user list has the following rules: + + - Users are GitHub usernames, which must start with the @ prefix + - Each user is also a comma-separated list of features/experiments to enable + - A "#" prefix opts the user out of all experiments + + Example config: + # A list of experiments that can be opted into. + # This defines the behavior they'll induce when opted into. + # Expected syntax is: + # [experiment_name]: # Name of the experiment. Also used for the label prefix. + # rollout_perc: [int] # % of workflows to run with this experiment when users are not opted in. + + experiments: + lf: + rollout_percent: 25 + all_branches: false + default: true + --- + + # Opt-ins: + # Users can opt into the LF fleet by adding their GitHub username to this list + # and specifying experiments to enable in a comma-separated list. + # To always opt out of an experiment, prefix it with a "-". + # Experiments should be from the above list. + + @User1,-lf,split_build + @User2,lf + @User3,split_build + """ + + import json + import logging + import os + import random + import re + import sys + from argparse import ArgumentParser + from collections.abc import Iterable + from functools import cache + from logging import LogRecord + from typing import Any, NamedTuple + from urllib.request import Request, urlopen + + import yaml + from github import Auth, Github + from github.Issue import Issue + + + DEFAULT_LABEL_PREFIX = "" # use meta runners + WORKFLOW_LABEL_LF = "lf." # use runners from the linux foundation + WORKFLOW_LABEL_LF_CANARY = "lf.c." # use canary runners from the linux foundation + + GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "") + GH_OUTPUT_KEY_AMI = "runner-ami" + GH_OUTPUT_KEY_LABEL_TYPE = "label-type" + OPT_OUT_LABEL = "no-runner-experiments" + + SETTING_EXPERIMENTS = "experiments" + + LF_FLEET_EXPERIMENT = "lf" + CANARY_FLEET_SUFFIX = ".c" + + + class Experiment(NamedTuple): + rollout_perc: float = ( + 0 # Percentage of workflows to experiment on when user is not opted-in. + ) + all_branches: bool = ( + False # If True, the experiment is also enabled on the exception branches + ) + default: bool = ( + True # If True, the experiment is enabled by default for all queries + ) + + # Add more fields as needed + + + class Settings(NamedTuple): + """ + Settings for the experiments that can be opted into. + """ + + experiments: dict[str, Experiment] = {} + + + class ColorFormatter(logging.Formatter): + """Color codes the log messages based on the log level""" + + COLORS = { + "WARNING": "\033[33m", # Yellow + "ERROR": "\033[31m", # Red + "CRITICAL": "\033[31m", # Red + "INFO": "\033[0m", # Reset + "DEBUG": "\033[0m", # Reset + } + + def format(self, record: LogRecord) -> str: + log_color = self.COLORS.get(record.levelname, "\033[0m") # Default to reset + record.msg = f"{log_color}{record.msg}\033[0m" + return super().format(record) + + + handler = logging.StreamHandler() + handler.setFormatter(ColorFormatter(fmt="%(levelname)-8s: %(message)s")) + + log = logging.getLogger(os.path.basename(__file__)) + log.addHandler(handler) + log.setLevel(logging.INFO) + + + def set_github_output(key: str, value: str) -> None: + """ + Defines outputs of the github action that invokes this script + """ + if not GITHUB_OUTPUT: + # See https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/ for deprecation notice + log.warning( + "No env var found for GITHUB_OUTPUT, you must be running this code locally. Falling back to the deprecated print method." + ) + print(f"::set-output name={key}::{value}") + return + + with open(GITHUB_OUTPUT, "a") as f: + log.info(f"Setting output: {key}='{value}'") + f.write(f"{key}={value}\n") + + + def _str_comma_separated_to_set(value: str) -> frozenset[str]: + return frozenset( + filter(lambda itm: itm != "", map(str.strip, value.strip(" \n\t").split(","))) + ) + + + def parse_args() -> Any: + parser = ArgumentParser("Get dynamic rollout settings") + parser.add_argument("--github-token", type=str, required=True, help="GitHub token") + parser.add_argument( + "--github-issue-repo", + type=str, + required=False, + default="pytorch/test-infra", + help="GitHub repo to get the issue", + ) + parser.add_argument( + "--github-repo", + type=str, + required=True, + help="GitHub repo where CI is running", + ) + parser.add_argument( + "--github-issue", type=int, required=True, help="GitHub issue number" + ) + parser.add_argument( + "--github-actor", type=str, required=True, help="GitHub triggering_actor" + ) + parser.add_argument( + "--github-issue-owner", type=str, required=True, help="GitHub issue owner" + ) + parser.add_argument( + "--github-branch", type=str, required=True, help="Current GitHub branch or tag" + ) + parser.add_argument( + "--github-ref-type", + type=str, + required=True, + help="Current GitHub ref type, branch or tag", + ) + parser.add_argument( + "--eligible-experiments", + type=_str_comma_separated_to_set, + required=False, + default="", + help="comma separated list of experiments to check, if omitted all experiments marked with default=True are checked", + ) + parser.add_argument( + "--opt-out-experiments", + type=_str_comma_separated_to_set, + required=False, + default="", + help=( + "comma separated list of experiments to opt-out of. If unset, no opt-outs will occur. " + "If the same experiment is listed both here and in '--eligible-experiments' opt-out will take priority." + ), + ) + parser.add_argument( + "--pr-number", + type=str, + required=False, + default="", + help="the optional PR number where this is run", + ) + + return parser.parse_args() + + + def get_gh_client(github_token: str) -> Github: # type: ignore[no-any-unimported] + auth = Auth.Token(github_token) + return Github(auth=auth) + + + def get_issue(gh: Github, repo: str, issue_num: int) -> Issue: # type: ignore[no-any-unimported] + repo = gh.get_repo(repo) + return repo.get_issue(number=issue_num) + + + def get_potential_pr_author( + github_token: str, repo: str, username: str, ref_type: str, ref_name: str + ) -> str: + # If the trigger was a new tag added by a bot, this is a ciflow case + # Fetch the actual username from the original PR. The PR number is + # embedded in the tag name: ciflow// + + gh = get_gh_client(github_token) + + if username == "pytorch-bot[bot]" and ref_type == "tag": + split_tag = ref_name.split("/") + if ( + len(split_tag) == 3 + and split_tag[0] == "ciflow" + and split_tag[2].isnumeric() + ): + pr_number = split_tag[2] + try: + repository = gh.get_repo(repo) + pull = repository.get_pull(number=int(pr_number)) + except Exception as e: + raise Exception( # noqa: TRY002 + f"issue with pull request {pr_number} from repo {repository}" + ) from e + return pull.user.login # type: ignore[no-any-return] + # In all other cases, return the original input username + return username + + + def is_exception_branch(branch: str) -> bool: + """ + Branches that get opted out of experiments by default, until they're explicitly enabled. + """ + return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"} + + + def load_yaml(yaml_text: str) -> Any: + try: + data = yaml.safe_load(yaml_text) + return data + except yaml.YAMLError: + log.exception("Error loading YAML") + raise + + + def extract_settings_user_opt_in_from_text(rollout_state: str) -> tuple[str, str]: + """ + Extracts the text with settings, if any, and the opted in users from the rollout state. + + If the issue body contains "---" then the text above that is the settings + and the text below is the list of opted in users. + + If it doesn't contain "---" then the settings are empty and the rest is the users. + """ + rollout_state_parts = rollout_state.split("---") + if len(rollout_state_parts) >= 2: + return rollout_state_parts[0], rollout_state_parts[1] + else: + return "", rollout_state + + + class UserOptins(dict[str, list[str]]): + """ + Dictionary of users with a list of features they have opted into + """ + + + def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins: + """ + Parse the user opt-in text into a key value pair of username and the list of features they have opted into + + Users are GitHub usernames with the @ prefix. Each user is also a comma-separated list of features/experiments to enable. + - Example line: "@User1,lf,split_build" + - A "#" prefix indicates the user is opted out of all experiments + + + """ + optins = UserOptins() + for user in user_optin_text.split("\n"): + user = user.strip("\r\n\t -") + if not user or not user.startswith("@"): + # Not a valid user. Skip + continue + + if user: + usr_name = user.split(",")[0].strip("@") + optins[usr_name] = [exp.strip(" ") for exp in user.split(",")[1:]] + + return optins + + + def is_valid_experiment_name(experiment_name: str) -> bool: + """ + Check if the experiment name is valid. + A valid name: + - Contains only alphanumeric characters and the special characters "_" & "-" + - The special characters "_" & "-" shouldn't be the first or last characters + - Cannot contain spaces + """ + + valid_char_regex = r"^[a-zA-Z0-9]([\w-]*[a-zA-Z0-9])?$" + valid = bool(re.match(valid_char_regex, experiment_name)) + + if valid: + return True + + log.error( + f"Invalid experiment name: {experiment_name}. Experiment names should only contain alphanumeric characters, '_', and '-'. They cannot contain spaces, and the special characters '_' and '-' cannot be the first or last characters." + ) + return False + + + def parse_settings_from_text(settings_text: str) -> Settings: + """ + Parse the experiments from the issue body into a list of ExperimentSettings + """ + try: + if settings_text: + # Escape the backtick as well so that we can have the settings in a code block on the GH issue + # for easy reading + # Note: Using ascii for the backtick so that the cat step in _runner-determinator.yml doesn't choke on + # the backtick character in shell commands. + backtick = chr(96) # backtick character + settings_text = settings_text.strip(f"\r\n\t{backtick} ") + settings = load_yaml(settings_text) + + # For now we just load experiments. We can expand this if/when we add more settings + experiments = {} + + for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items(): + if not is_valid_experiment_name(exp_name): + # Exclude invalid experiments from the list. We log an error, but don't raise an exception so that other experiments can still be processed. + continue + + valid_settings = {} + for setting in exp_settings: + if setting not in Experiment._fields: + log.warning( + f"Unexpected setting in experiment: {setting} = {exp_settings[setting]}" + ) + else: + valid_settings[setting] = exp_settings[setting] + + experiments[exp_name] = Experiment(**valid_settings) + return Settings(experiments) + + except Exception: + log.exception("Failed to parse settings") + + return Settings() + + + def parse_settings(rollout_state: str) -> Settings: + """ + Parse settings, if any, from the rollout state. + + If the issue body contains "---" then the text above that is the settings + and the text below is the list of opted in users. + + If it doesn't contain "---" then the settings are empty and the default values are used. + """ + settings_text, _ = extract_settings_user_opt_in_from_text(rollout_state) + return parse_settings_from_text(settings_text) + + + def parse_users(rollout_state: str) -> UserOptins: + """ + Parse users from the rollout state. + + """ + _, users_text = extract_settings_user_opt_in_from_text(rollout_state) + return parse_user_opt_in_from_text(users_text) + + + def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -> bool: + """ + Check if a user is opted into an experiment + """ + return experiment_name in user_optins.get(user, []) + + + def is_user_opted_out(user: str, user_optins: UserOptins, experiment_name: str) -> bool: + """ + Check if a user explicitly opted out of an experiment + """ + # if the experiment is prefixed with a "-", then it's an opt-out + experiment_optout = "-" + experiment_name + if experiment_optout not in user_optins.get(user, []): + return False + + if is_user_opted_in(user, user_optins, experiment_name): + log.warning( + f"User {user} is opted into experiment {experiment_name}, but also opted out of it. Defaulting to opting out" + ) + + return True + + + def get_runner_prefix( + rollout_state: str, + workflow_requestors: Iterable[str], + branch: str, + eligible_experiments: frozenset[str] = frozenset(), + opt_out_experiments: frozenset[str] = frozenset(), + is_canary: bool = False, + ) -> str: + settings = parse_settings(rollout_state) + user_optins = parse_users(rollout_state) + + fleet_prefix = "" + prefixes = [] + for experiment_name, experiment_settings in settings.experiments.items(): + if not experiment_settings.all_branches and is_exception_branch(branch): + log.info( + f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}." + ) + continue + + if opt_out_experiments: + if experiment_name in opt_out_experiments: + opt_out_exp_list = ", ".join(opt_out_experiments) + log.info( + f"Skipping experiment '{experiment_name}', as this workflow has opted-out (opted out experiments are: {opt_out_exp_list})" + ) + continue + + if eligible_experiments: + if experiment_name not in eligible_experiments: + exp_list = ", ".join(eligible_experiments) + log.info( + f"Skipping experiment '{experiment_name}', as it is not in the eligible_experiments list: {exp_list}" + ) + continue + elif not experiment_settings.default: + log.info( + f"Skipping experiment '{experiment_name}', as it is not a default experiment" + ) + continue + + # Is any workflow_requestor opted out to this experiment? + opted_out_users = [ + requestor + for requestor in workflow_requestors + if is_user_opted_out(requestor, user_optins, experiment_name) + ] + + if opted_out_users: + log.info( + f"{', '.join(opted_out_users)} have opted out of experiment {experiment_name}." + ) + continue + + # Is any workflow_requestor opted in to this experiment? + opted_in_users = [ + requestor + for requestor in workflow_requestors + if is_user_opted_in(requestor, user_optins, experiment_name) + ] + + enabled = False + if opted_in_users: + log.info( + f"{', '.join(opted_in_users)} have opted into experiment {experiment_name}." + ) + enabled = True + + elif experiment_settings.rollout_perc: + # If no user is opted in, then we randomly enable the experiment based on the rollout percentage + if random.uniform(0, 100) <= experiment_settings.rollout_perc: + log.info( + f"Based on rollout percentage of {experiment_settings.rollout_perc}%, enabling experiment {experiment_name}." + ) + enabled = True + + if enabled: + label = experiment_name + if experiment_name == LF_FLEET_EXPERIMENT: + # We give some special treatment to the "lf" experiment since determines the fleet we use + # - If it's enabled, then we always list it's prefix first + # - If we're in the canary branch, then we append ".c" to the lf prefix + if is_canary: + label += CANARY_FLEET_SUFFIX + fleet_prefix = label + else: + prefixes.append(label) + + if len(prefixes) > 1: + log.error( + f"Only a fleet and one other experiment can be enabled for a job at any time. Enabling {prefixes[0]} and ignoring the rest, which are {', '.join(prefixes[1:])}" + ) + prefixes = prefixes[:1] + + # Fleet always comes first + if fleet_prefix: + prefixes.insert(0, fleet_prefix) + + return ".".join(prefixes) + "." if prefixes else "" + + + def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -> str: + """ + Gets the first comment of the issue, which contains the desired rollout state. + + The default issue we use - https://github.com/pytorch/test-infra/issues/5132 + """ + gh = get_gh_client(github_token) + issue = get_issue(gh, repo, issue_num) + return str(issue.get_comments()[0].body.strip("\n\t ")) + + + def download_json(url: str, headers: dict[str, str], num_retries: int = 3) -> Any: + for _ in range(num_retries): + try: + req = Request(url=url, headers=headers) + content = urlopen(req, timeout=5).read().decode("utf-8") + return json.loads(content) + except Exception as e: + log.warning(f"Could not download {url}: {e}") + + log.warning(f"All {num_retries} retries exhausted, downloading {url} failed") + return {} + + + @cache + def get_pr_info(github_repo: str, github_token: str, pr_number: int) -> dict[str, Any]: + """ + Dynamically get PR information + """ + github_api = f"https://api.github.com/repos/{github_repo}" + headers = { + "Accept": "application/vnd.github.v3+json", + "Authorization": f"token {github_token}", + } + json_response: dict[str, Any] = download_json( + url=f"{github_api}/issues/{pr_number}", + headers=headers, + ) + + if not json_response: + log.warning(f"Failed to get the labels for #{pr_number}") + return {} + + return json_response + + + def get_labels(github_repo: str, github_token: str, pr_number: int) -> set[str]: + """ + Dynamically get the latest list of labels from the pull request + """ + pr_info = get_pr_info(github_repo, github_token, pr_number) + return { + label.get("name") for label in pr_info.get("labels", []) if label.get("name") + } + + + def main() -> None: + args = parse_args() + + runner_label_prefix = DEFAULT_LABEL_PREFIX + + # Check if the PR is opt-out + if args.pr_number: + labels = get_labels(args.github_repo, args.github_token, int(args.pr_number)) + if OPT_OUT_LABEL in labels: + log.info( + f"Opt-out runner determinator because #{args.pr_number} has {OPT_OUT_LABEL} label" + ) + set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix) + sys.exit() + + try: + rollout_state = get_rollout_state_from_issue( + args.github_token, args.github_issue_repo, args.github_issue + ) + + username = get_potential_pr_author( + args.github_token, + args.github_repo, + args.github_actor, + args.github_ref_type, + args.github_branch, + ) + + is_canary = args.github_repo == "pytorch/pytorch-canary" + + runner_label_prefix = get_runner_prefix( + rollout_state, + (args.github_issue_owner, username), + args.github_branch, + args.eligible_experiments, + args.opt_out_experiments, + is_canary, + ) + + except Exception as e: + log.error( + f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}" + ) + + set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix) + + + if __name__ == "__main__": + main() + + EOF + + cat runner_determinator.py +>>>>>>> upstream/release/2.11 - name: Install dependencies run: python3 -m pip install urllib3==1.26.18 PyGithub==2.3.0 diff --git a/.github/workflows/_vllm-benchmark.yml b/.github/workflows/_vllm-benchmark.yml index 7a69ccaa10d08..d90e67284490b 100644 --- a/.github/workflows/_vllm-benchmark.yml +++ b/.github/workflows/_vllm-benchmark.yml @@ -94,7 +94,11 @@ jobs: name: ${{ inputs.build_environment }} s3-bucket: gha-artifacts +<<<<<<< HEAD - uses: pytorch/test-infra/.github/actions/setup-uv@release/2.12 +======= + - uses: pytorch/test-infra/.github/actions/setup-uv@release/2.11 +>>>>>>> upstream/release/2.11 with: python-version: "3.12" activate-environment: "true" @@ -235,7 +239,11 @@ jobs: aws-region: us-east-1 - name: Upload the benchmark results to OSS benchmark database for the dashboard +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.11 +>>>>>>> upstream/release/2.11 with: benchmark-results-dir: vllm-project/vllm/benchmarks/results benchmark-name: 'PyTorch x vLLM benchmark' diff --git a/.github/workflows/_win-build.yml b/.github/workflows/_win-build.yml index d763e895596d9..da1506017b92b 100644 --- a/.github/workflows/_win-build.yml +++ b/.github/workflows/_win-build.yml @@ -89,7 +89,11 @@ jobs: git config --global core.fsmonitor false - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 with: github-secret: ${{ secrets.GITHUB_TOKEN }} instructions: | @@ -104,7 +108,11 @@ jobs: # [see note: pytorch repo ref] - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: true diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml index b0d4b472debe4..ba8f43d969734 100644 --- a/.github/workflows/_win-test.yml +++ b/.github/workflows/_win-test.yml @@ -78,7 +78,11 @@ jobs: git config --global core.fsmonitor false - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 with: github-secret: ${{ secrets.GITHUB_TOKEN }} instructions: | @@ -94,7 +98,11 @@ jobs: # [see note: pytorch repo ref] - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: true submodules: false diff --git a/.github/workflows/_xpu-test.yml b/.github/workflows/_xpu-test.yml index 4caf910760f27..40af763cd4a65 100644 --- a/.github/workflows/_xpu-test.yml +++ b/.github/workflows/_xpu-test.yml @@ -86,14 +86,22 @@ jobs: steps: # [see note: pytorch repo ref] - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 - name: Setup XPU uses: ./.github/actions/setup-xpu - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ${{ inputs.docker-image }} @@ -107,7 +115,11 @@ jobs: echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} @@ -342,7 +354,11 @@ jobs: aws-region: us-east-1 - name: Upload the benchmark results +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.11 +>>>>>>> upstream/release/2.11 with: benchmark-results-dir: test/test-reports dry-run: false diff --git a/.github/workflows/b200-distributed.yml b/.github/workflows/b200-distributed.yml index 301e4e966563a..982e5f8eec234 100644 --- a/.github/workflows/b200-distributed.yml +++ b/.github/workflows/b200-distributed.yml @@ -25,7 +25,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/b200-symm-mem.yml b/.github/workflows/b200-symm-mem.yml index 284b8cef3af98..eaaec2235e6cf 100644 --- a/.github/workflows/b200-symm-mem.yml +++ b/.github/workflows/b200-symm-mem.yml @@ -25,7 +25,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/build-almalinux-images.yml b/.github/workflows/build-almalinux-images.yml index 2d11a97ab692c..ac363d25d4c0c 100644 --- a/.github/workflows/build-almalinux-images.yml +++ b/.github/workflows/build-almalinux-images.yml @@ -39,7 +39,11 @@ jobs: tag: ["cuda12.6", "cuda12.8", "cuda13.0", "cuda13.2", "rocm7.0", "rocm7.1", "rocm7.2", "cpu"] steps: - name: Build docker image +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: almalinux-builder custom-tag-prefix: ${{matrix.tag}} diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml new file mode 100644 index 0000000000000..bdc81c0fc4a3f --- /dev/null +++ b/.github/workflows/build-libtorch-images.yml @@ -0,0 +1,68 @@ +name: Build libtorch docker images + +on: + push: + branches: + - main + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate or nightly builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + paths: + - .ci/docker/** + - .github/workflows/build-libtorch-images.yml + - .github/actions/binary-docker-build/** + pull_request: + paths: + - .ci/docker/** + - .github/workflows/build-libtorch-images.yml + - .github/actions/binary-docker-build/** + +env: + DOCKER_REGISTRY: "docker.io" + DOCKER_BUILDKIT: 1 + WITH_PUSH: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + get-label-type: + if: github.repository_owner == 'pytorch' + name: get-label-type + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + + build: + environment: ${{ (github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/release') || startsWith(github.ref, 'refs/tags/v')) && 'docker-build') || '' }} + needs: get-label-type + runs-on: ${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral + name: libtorch-cxx11-builder:${{ matrix.tag }} + strategy: + fail-fast: false + matrix: + include: [ + { tag: "cuda13.0" }, + { tag: "cuda12.9" }, + { tag: "cuda12.8" }, + { tag: "cuda12.6" }, + { tag: "rocm7.0" }, + { tag: "rocm7.1" }, + { tag: "rocm7.2" }, + { tag: "cpu" }, + ] + steps: + - name: Build docker image + uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.11 + with: + docker-image-name: libtorch-cxx11-builder + custom-tag-prefix: ${{ matrix.tag }} + docker-build-dir: libtorch + DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} + DOCKER_ID: ${{ secrets.DOCKER_ID }} diff --git a/.github/workflows/build-manywheel-images-s390x.yml b/.github/workflows/build-manywheel-images-s390x.yml index b15d0b4f433f0..27380e9db585e 100644 --- a/.github/workflows/build-manywheel-images-s390x.yml +++ b/.github/workflows/build-manywheel-images-s390x.yml @@ -25,7 +25,11 @@ jobs: runs-on: linux.s390x steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false no-sudo: true diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index 28e5739fba62e..acd469258245b 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -32,7 +32,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -65,7 +69,11 @@ jobs: name: ${{ matrix.name }}:${{ matrix.tag }} steps: - name: Build docker image +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/binary-docker-build@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ${{ matrix.name }} custom-tag-prefix: ${{ matrix.tag }} diff --git a/.github/workflows/build-triton-wheel.yml b/.github/workflows/build-triton-wheel.yml index 6f8aaca9d1f86..fb2601465fa7c 100644 --- a/.github/workflows/build-triton-wheel.yml +++ b/.github/workflows/build-triton-wheel.yml @@ -3,7 +3,11 @@ name: Build Triton wheels on: push: branches: +<<<<<<< HEAD - release/2.12 +======= + - release/2.11 +>>>>>>> upstream/release/2.11 tags: # NOTE: Binary build pipelines should only get triggered on release candidate builds # Release candidate tags look like: v1.11.0-rc1 @@ -36,7 +40,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -74,12 +82,21 @@ jobs: PLATFORM: 'manylinux_2_28_x86_64' steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 with: github-secret: ${{ secrets.GITHUB_TOKEN }} - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false @@ -87,7 +104,11 @@ jobs: uses: ./.github/actions/ecr-login - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ env.DOCKER_IMAGE }} @@ -176,7 +197,11 @@ jobs: path: ${{ runner.temp }}/artifacts/wheelhouse/* - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() build-wheel-win: @@ -209,7 +234,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build-vllm-wheel.yml b/.github/workflows/build-vllm-wheel.yml index 458e4a101acfe..656c48598dade 100644 --- a/.github/workflows/build-vllm-wheel.yml +++ b/.github/workflows/build-vllm-wheel.yml @@ -65,12 +65,21 @@ jobs: BUILD_DEVICE: ${{ matrix.device }} steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 with: github-secret: ${{ secrets.GITHUB_TOKEN }} - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false @@ -164,7 +173,11 @@ jobs: path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() # Copied from build-triton-wheel workflow (mostly) diff --git a/.github/workflows/claude-code.yml b/.github/workflows/claude-code.yml index e0a565ddfd3f9..461021e9edd12 100644 --- a/.github/workflows/claude-code.yml +++ b/.github/workflows/claude-code.yml @@ -14,9 +14,37 @@ jobs: pull-requests: write issues: write id-token: write +<<<<<<< HEAD secrets: inherit with: additional_claude_args: '--allowedTools Skill' append_system_prompt: | When asked to review a PR, always use the /pr-review skill first. It contains PyTorch-specific review guidelines, output format, and critical checks. +======= + steps: + # Fork PR support enabled by using izaitsevfb/claude-code-action@forked-pr-fix + + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Configure AWS credentials via OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_claude_code + aws-region: us-east-1 + + - name: Run Claude Code + uses: izaitsevfb/claude-code-action@forked-pr-fix + with: + # We filter by github.actor at workflow level, there is no point of filtering here as well + allowed_bots: "*" + claude_args: "--model global.anthropic.claude-opus-4-5-20251101-v1:0" + settings: '{"alwaysThinkingEnabled": true}' + use_bedrock: "true" + + - name: Upload usage metrics + if: always() + uses: pytorch/test-infra/.github/actions/upload-claude-usage@release/2.11 +>>>>>>> upstream/release/2.11 diff --git a/.github/workflows/claude-issue-triage-run.yml b/.github/workflows/claude-issue-triage-run.yml index 7751584b14322..2bc865e3ce6bb 100644 --- a/.github/workflows/claude-issue-triage-run.yml +++ b/.github/workflows/claude-issue-triage-run.yml @@ -108,6 +108,7 @@ jobs: fi - name: Upload usage metrics +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/upload-claude-usage@release/2.12 - name: Upload execution output to S3 @@ -132,3 +133,6 @@ jobs: else echo "No execution output found at ${OUTPUT_FILE}" fi +======= + uses: pytorch/test-infra/.github/actions/upload-claude-usage@release/2.11 +>>>>>>> upstream/release/2.11 diff --git a/.github/workflows/close-nonexistent-disable-issues.yml b/.github/workflows/close-nonexistent-disable-issues.yml index fd3693178786c..3829bb9c91d81 100644 --- a/.github/workflows/close-nonexistent-disable-issues.yml +++ b/.github/workflows/close-nonexistent-disable-issues.yml @@ -13,7 +13,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false fetch-depth: 1 diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 2058df9f15967..36f3cecc99eba 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -19,7 +19,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index bfe3489526445..f8302ae76dc91 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -33,7 +33,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -53,12 +57,21 @@ jobs: pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11, pytorch-linux-jammy-cuda13.0-cudnn9-py3.12-gcc11-vllm, pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11-inductor-benchmarks, +<<<<<<< HEAD pytorch-linux-jammy-py3.10-clang18, pytorch-linux-jammy-py3.11-clang18, pytorch-linux-jammy-py3.12-clang18, pytorch-linux-jammy-py3.13-clang18, pytorch-linux-jammy-py3.14-clang18, pytorch-linux-jammy-py3.14t-clang18, +======= + pytorch-linux-jammy-py3.10-clang15, + pytorch-linux-jammy-py3.11-clang15, + pytorch-linux-jammy-py3.12-clang15, + pytorch-linux-jammy-py3.13-clang15, + pytorch-linux-jammy-py3.14-clang15, + pytorch-linux-jammy-py3.14t-clang15, +>>>>>>> upstream/release/2.11 pytorch-linux-jammy-rocm-n-py3, pytorch-linux-noble-rocm-n-py3, pytorch-linux-noble-rocm-nightly-py3, @@ -97,6 +110,14 @@ jobs: sudo rm -rf "${GITHUB_WORKSPACE}" mkdir "${GITHUB_WORKSPACE}" +<<<<<<< HEAD +======= + # [see note: pytorch repo ref] + # deep clone (fetch-depth 0) required for git merge-base + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + +>>>>>>> upstream/release/2.11 - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 @@ -105,14 +126,22 @@ jobs: - name: Build docker image id: build-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ci-image:${{ matrix.docker-image-name }} always-rebuild: true push: true - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.build-docker-image.outputs.docker-image }} @@ -162,5 +191,9 @@ jobs: if: always() - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() diff --git a/.github/workflows/docker-cache-rocm.yml b/.github/workflows/docker-cache-rocm.yml index 067a953790923..174f75cede892 100644 --- a/.github/workflows/docker-cache-rocm.yml +++ b/.github/workflows/docker-cache-rocm.yml @@ -71,7 +71,11 @@ jobs: echo "Outputs of download-docker-builds-artifacts job: ${JSON_STRINGIFIED}" - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: no-sudo: true @@ -86,7 +90,11 @@ jobs: echo "ghcr_image=${ghcr_image}" >> "$GITHUB_OUTPUT" - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.ghcr-io-tag.outputs.ghcr_image }} diff --git a/.github/workflows/docker-release.yml b/.github/workflows/docker-release.yml index 537eb7f4cf91d..a0f120fc1dccd 100644 --- a/.github/workflows/docker-release.yml +++ b/.github/workflows/docker-release.yml @@ -38,7 +38,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -53,7 +57,11 @@ jobs: matrix: ${{ steps.generate-matrix.outputs.matrix }} steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: fetch-depth: 1 submodules: true @@ -83,7 +91,11 @@ jobs: CUDNN_VERSION: ${{ matrix.cudnn_version }} steps: - name: Setup SSH (Click me for login details) +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -163,13 +175,21 @@ jobs: fi - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() validate: needs: build if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v')) }} +<<<<<<< HEAD uses: pytorch/test-infra/.github/workflows/validate-docker-images.yml@release/2.12 +======= + uses: pytorch/test-infra/.github/workflows/validate-docker-images.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: channel: nightly ref: main diff --git a/.github/workflows/dynamo-unittest.yml b/.github/workflows/dynamo-unittest.yml index d822926765d1c..8c417c55222a7 100644 --- a/.github/workflows/dynamo-unittest.yml +++ b/.github/workflows/dynamo-unittest.yml @@ -22,7 +22,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml index a1d1e3371562f..9c5fa0da5451b 100644 --- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml @@ -41,7 +41,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -136,7 +140,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -185,9 +193,154 @@ jobs: build_name: manywheel-py3_10-cuda-aarch64-12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_10-cuda-aarch64-12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.10" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_10-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_10-cuda-aarch64-12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_10-cuda-aarch64-12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_10-cuda-aarch64-12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_10-cuda-aarch64-12_8-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda-aarch64-12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_10-cuda-aarch64-12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.10" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_10-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_10-cuda-aarch64-12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_10-cuda-aarch64-12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_10-cuda-aarch64-12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_10-cuda-aarch64-12_9-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda-aarch64-12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_10-cuda-aarch64-13_0-build: @@ -210,7 +363,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -427,7 +584,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -476,9 +637,154 @@ jobs: build_name: manywheel-py3_11-cuda-aarch64-12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_11-cuda-aarch64-12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.11" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_11-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_11-cuda-aarch64-12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_11-cuda-aarch64-12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_11-cuda-aarch64-12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_11-cuda-aarch64-12_8-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda-aarch64-12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_11-cuda-aarch64-12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.11" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_11-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_11-cuda-aarch64-12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_11-cuda-aarch64-12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_11-cuda-aarch64-12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_11-cuda-aarch64-12_9-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda-aarch64-12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_11-cuda-aarch64-13_0-build: @@ -501,7 +807,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -718,7 +1028,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_12-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -767,12 +1081,14 @@ jobs: build_name: manywheel-py3_12-cuda-aarch64-12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_12-cuda-aarch64-13_0-build: + manywheel-py3_12-cuda-aarch64-12_8-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml needs: get-label-type @@ -781,26 +1097,26 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0-aarch64" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.r7g.12xlarge.memory ALPINE_IMAGE: "arm64v8/alpine" - build_name: manywheel-py3_12-cuda-aarch64-13_0 + build_name: manywheel-py3_12-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_12-cuda-aarch64-13_0-test: # Testing + manywheel-py3_12-cuda-aarch64-12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - manywheel-py3_12-cuda-aarch64-13_0-build + - manywheel-py3_12-cuda-aarch64-12_8-build - get-label-type uses: ./.github/workflows/_binary-test-linux.yml with: @@ -808,45 +1124,42 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0-aarch64" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 DESIRED_PYTHON: "3.12" - build_name: manywheel-py3_12-cuda-aarch64-13_0 + build_name: manywheel-py3_12-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.2xlarge ALPINE_IMAGE: "arm64v8/alpine" secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_12-cuda-aarch64-13_0-upload: # Uploading + manywheel-py3_12-cuda-aarch64-12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: manywheel-py3_12-cuda-aarch64-13_0-build + needs: manywheel-py3_12-cuda-aarch64-12_8-build with: PYTORCH_ROOT: /pytorch PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0-aarch64" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 DESIRED_PYTHON: "3.12" - build_name: manywheel-py3_12-cuda-aarch64-13_0 + build_name: manywheel-py3_12-cuda-aarch64-12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_12-cuda-aarch64-13_2-build: + manywheel-py3_12-cuda-aarch64-12_9-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml needs: get-label-type @@ -855,26 +1168,26 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2-aarch64" + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda13.2 + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.r7g.12xlarge.memory ALPINE_IMAGE: "arm64v8/alpine" - build_name: manywheel-py3_12-cuda-aarch64-13_2 + build_name: manywheel-py3_12-cuda-aarch64-12_9 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.2.1; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_12-cuda-aarch64-13_2-test: # Testing + manywheel-py3_12-cuda-aarch64-12_9-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - manywheel-py3_12-cuda-aarch64-13_2-build + - manywheel-py3_12-cuda-aarch64-12_9-build - get-label-type uses: ./.github/workflows/_binary-test-linux.yml with: @@ -882,45 +1195,195 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2-aarch64" + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda13.2 + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 DESIRED_PYTHON: "3.12" - build_name: manywheel-py3_12-cuda-aarch64-13_2 + build_name: manywheel-py3_12-cuda-aarch64-12_9 build_environment: linux-aarch64-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.2xlarge ALPINE_IMAGE: "arm64v8/alpine" secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_12-cuda-aarch64-13_2-upload: # Uploading + manywheel-py3_12-cuda-aarch64-12_9-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: manywheel-py3_12-cuda-aarch64-13_2-build + needs: manywheel-py3_12-cuda-aarch64-12_9-build with: PYTORCH_ROOT: /pytorch PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2-aarch64" + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda13.2 + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 DESIRED_PYTHON: "3.12" - build_name: manywheel-py3_12-cuda-aarch64-13_2 + build_name: manywheel-py3_12-cuda-aarch64-12_9 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_13-cpu-aarch64-build: + manywheel-py3_12-cuda-aarch64-13_0-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + DESIRED_PYTHON: "3.12" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_12-cuda-aarch64-13_0 + build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_12-cuda-aarch64-13_0-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_12-cuda-aarch64-13_0-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda-aarch64-13_0 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_12-cuda-aarch64-13_0-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_12-cuda-aarch64-13_0-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda-aarch64-13_0 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_12-cuda-aarch64-13_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.2 + DESIRED_PYTHON: "3.12" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_12-cuda-aarch64-13_2 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.2.1; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_12-cuda-aarch64-13_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_12-cuda-aarch64-13_2-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.2 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda-aarch64-13_2 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_12-cuda-aarch64-13_2-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_12-cuda-aarch64-13_2-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.2 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda-aarch64-13_2 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cpu-aarch64-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml needs: get-label-type @@ -1009,7 +1472,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1058,9 +1525,154 @@ jobs: build_name: manywheel-py3_13-cuda-aarch64-12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cuda-aarch64-12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_13-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13-cuda-aarch64-12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13-cuda-aarch64-12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cuda-aarch64-12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-cuda-aarch64-12_8-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda-aarch64-12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cuda-aarch64-12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_13-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13-cuda-aarch64-12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13-cuda-aarch64-12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cuda-aarch64-12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-cuda-aarch64-12_9-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda-aarch64-12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_13-cuda-aarch64-13_0-build: @@ -1083,7 +1695,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1228,15 +1844,167 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.r7g.12xlarge.memory ALPINE_IMAGE: "arm64v8/alpine" - build_name: manywheel-py3_13t-cpu-aarch64 + build_name: manywheel-py3_13t-cpu-aarch64 + build_environment: linux-aarch64-binary-manywheel + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13t-cpu-aarch64-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13t-cpu-aarch64-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu-aarch64 + DOCKER_IMAGE: manylinux2_28_aarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cpu-aarch64 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13t-cpu-aarch64-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13t-cpu-aarch64-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu-aarch64 + DOCKER_IMAGE: manylinux2_28_aarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cpu-aarch64 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13t-cuda-aarch64-12_6-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + DESIRED_PYTHON: "3.13t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_13t-cuda-aarch64-12_6 + build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13t-cuda-aarch64-12_6-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13t-cuda-aarch64-12_6-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cuda-aarch64-12_6 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13t-cuda-aarch64-12_6-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13t-cuda-aarch64-12_6-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cuda-aarch64-12_6 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13t-cuda-aarch64-12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_13t-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_13t-cpu-aarch64-test: # Testing + manywheel-py3_13t-cuda-aarch64-12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - manywheel-py3_13t-cpu-aarch64-build + - manywheel-py3_13t-cuda-aarch64-12_8-build - get-label-type uses: ./.github/workflows/_binary-test-linux.yml with: @@ -1244,43 +2012,42 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu-aarch64 - DOCKER_IMAGE: manylinux2_28_aarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 DESIRED_PYTHON: "3.13t" - build_name: manywheel-py3_13t-cpu-aarch64 + build_name: manywheel-py3_13t-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.2xlarge ALPINE_IMAGE: "arm64v8/alpine" secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_13t-cpu-aarch64-upload: # Uploading + manywheel-py3_13t-cuda-aarch64-12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: manywheel-py3_13t-cpu-aarch64-test + needs: manywheel-py3_13t-cuda-aarch64-12_8-build with: PYTORCH_ROOT: /pytorch PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu-aarch64 - DOCKER_IMAGE: manylinux2_28_aarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cpu-aarch64 + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 DESIRED_PYTHON: "3.13t" - build_name: manywheel-py3_13t-cpu-aarch64 + build_name: manywheel-py3_13t-cuda-aarch64-12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - manywheel-py3_13t-cuda-aarch64-12_6-build: + manywheel-py3_13t-cuda-aarch64-12_9-build: if: ${{ github.repository_owner == 'pytorch' }} uses: ./.github/workflows/_binary-build-linux.yml needs: get-label-type @@ -1289,26 +2056,26 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6-aarch64" + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.r7g.12xlarge.memory ALPINE_IMAGE: "arm64v8/alpine" - build_name: manywheel-py3_13t-cuda-aarch64-12_6 + build_name: manywheel-py3_13t-cuda-aarch64-12_9 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_13t-cuda-aarch64-12_6-test: # Testing + manywheel-py3_13t-cuda-aarch64-12_9-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - manywheel-py3_13t-cuda-aarch64-12_6-build + - manywheel-py3_13t-cuda-aarch64-12_9-build - get-label-type uses: ./.github/workflows/_binary-test-linux.yml with: @@ -1316,42 +2083,40 @@ jobs: PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6-aarch64" + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 DESIRED_PYTHON: "3.13t" - build_name: manywheel-py3_13t-cuda-aarch64-12_6 + build_name: manywheel-py3_13t-cuda-aarch64-12_9 build_environment: linux-aarch64-binary-manywheel runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" runs_on: linux.arm64.2xlarge ALPINE_IMAGE: "arm64v8/alpine" secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - manywheel-py3_13t-cuda-aarch64-12_6-upload: # Uploading + manywheel-py3_13t-cuda-aarch64-12_9-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: manywheel-py3_13t-cuda-aarch64-12_6-build + needs: manywheel-py3_13t-cuda-aarch64-12_9-build with: PYTORCH_ROOT: /pytorch PACKAGE_TYPE: manywheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6-aarch64" + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" GPU_ARCH_TYPE: cuda-aarch64 DOCKER_IMAGE: manylinuxaarch64-builder - DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 DESIRED_PYTHON: "3.13t" - build_name: manywheel-py3_13t-cuda-aarch64-12_6 + build_name: manywheel-py3_13t-cuda-aarch64-12_9 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_13t-cuda-aarch64-13_0-build: @@ -1374,7 +2139,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13t-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1591,7 +2360,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1640,9 +2413,154 @@ jobs: build_name: manywheel-py3_14-cuda-aarch64-12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14-cuda-aarch64-12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_14-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14-cuda-aarch64-12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14-cuda-aarch64-12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14-cuda-aarch64-12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14-cuda-aarch64-12_8-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda-aarch64-12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14-cuda-aarch64-12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_14-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14-cuda-aarch64-12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14-cuda-aarch64-12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14-cuda-aarch64-12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14-cuda-aarch64-12_9-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda-aarch64-12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_14-cuda-aarch64-13_0-build: @@ -1665,7 +2583,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1882,7 +2804,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1931,9 +2857,154 @@ jobs: build_name: manywheel-py3_14t-cuda-aarch64-12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14t-cuda-aarch64-12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_14t-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14t-cuda-aarch64-12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14t-cuda-aarch64-12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda-aarch64-12_8 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14t-cuda-aarch64-12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14t-cuda-aarch64-12_8-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda-aarch64-12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14t-cuda-aarch64-12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.r7g.12xlarge.memory + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_14t-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14t-cuda-aarch64-12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14t-cuda-aarch64-12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda-aarch64-12_9 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14t-cuda-aarch64-12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14t-cuda-aarch64-12_9-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9-aarch64" + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: manylinuxaarch64-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda-aarch64-12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_14t-cuda-aarch64-13_0-build: @@ -1956,7 +3027,11 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generated-linux-binary-libtorch-nightly.yml b/.github/workflows/generated-linux-binary-libtorch-nightly.yml new file mode 100644 index 0000000000000..b05b969362f13 --- /dev/null +++ b/.github/workflows/generated-linux-binary-libtorch-nightly.yml @@ -0,0 +1,626 @@ +# @generated DO NOT EDIT MANUALLY + +# Template is at: .github/templates/linux_binary_build_workflow.yml.j2 +# Generation script: .github/scripts/generate_ci_workflows.py +name: linux-binary-libtorch + + +on: + push: + # NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build + branches: + - nightly + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + - 'ciflow/binaries/*' + - 'ciflow/binaries_libtorch/*' + workflow_dispatch: + +permissions: + id-token: write + +env: + # Needed for conda builds + ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" + AWS_DEFAULT_REGION: us-east-1 + BINARY_ENV_FILE: /tmp/env + BUILD_ENVIRONMENT: linux-binary-libtorch + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + PYTORCH_FINAL_PACKAGE_DIR: /artifacts + PYTORCH_ROOT: /pytorch + SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + SKIP_ALL_TESTS: 0 +concurrency: + group: linux-binary-libtorch-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + get-label-type: + if: github.repository_owner == 'pytorch' + name: get-label-type + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + libtorch-cpu-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cpu + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: libtorch-cpu-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-cpu-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cpu-shared-with-deps-release-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cpu + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cpu-shared-with-deps-release + build_environment: linux-binary-libtorch + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.4xlarge + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cpu-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cpu-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cpu + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cpu-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + libtorch-cuda12_6-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: libtorch-cuda12_6-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-cuda12_6-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_6-shared-with-deps-release-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda12_6-shared-with-deps-release + build_environment: linux-binary-libtorch + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cuda12_6-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_6-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.6 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda12_6-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + libtorch-cuda12_8-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: libtorch-cuda12_8-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-cuda12_8-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_8-shared-with-deps-release-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda12_8-shared-with-deps-release + build_environment: linux-binary-libtorch + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cuda12_8-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_8-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda12_8-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + libtorch-cuda12_9-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: libtorch-cuda12_9-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-cuda12_9-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_9-shared-with-deps-release-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda12_9-shared-with-deps-release + build_environment: linux-binary-libtorch + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cuda12_9-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_9-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda12_9-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + libtorch-cuda13_0-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: libtorch-cuda13_0-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-cuda13_0-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda13_0-shared-with-deps-release-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda13_0-shared-with-deps-release + build_environment: linux-binary-libtorch + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + libtorch-cuda13_0-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda13_0-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cuda13.0 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cuda13_0-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + libtorch-rocm7_1-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm7.1 + GPU_ARCH_VERSION: "7.1" + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: rocm7.1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 420 + build_name: libtorch-rocm7_1-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-rocm7_1-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-rocm7_1-shared-with-deps-release-build + - get-label-type + runs-on: linux.rocm.gpu.mi250.1 + timeout-minutes: 240 + env: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm7.1 + GPU_ARCH_VERSION: "7.1" + GPU_ARCH_TYPE: rocm + SKIP_ALL_TESTS: 1 + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: rocm7.1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + permissions: + id-token: write + contents: read + steps: + - name: Setup ROCm + uses: ./.github/actions/setup-rocm + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-rocm7_1-shared-with-deps-release + path: "${{ runner.temp }}/artifacts/" + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" + - name: configure aws credentials + id: aws_creds + if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only + aws-region: us-east-1 + role-duration-seconds: 18000 + - name: Calculate docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 + with: + docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} + docker-image-name: libtorch-cxx11-builder + custom-tag-prefix: rocm7.1 + docker-build-dir: .ci/docker + working-directory: pytorch + - name: Pull Docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 + with: + docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Test Pytorch binary + uses: ./pytorch/.github/actions/test-pytorch-binary + env: + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Teardown ROCm + uses: ./.github/actions/teardown-rocm + libtorch-rocm7_1-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-rocm7_1-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm7.1 + GPU_ARCH_VERSION: "7.1" + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: rocm7.1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-rocm7_1-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + libtorch-rocm7_2-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm7.2 + GPU_ARCH_VERSION: "7.2" + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: rocm7.2 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + timeout-minutes: 420 + build_name: libtorch-rocm7_2-shared-with-deps-release + build_environment: linux-binary-libtorch + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + libtorch-rocm7_2-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-rocm7_2-shared-with-deps-release-build + - get-label-type + runs-on: linux.rocm.gpu.mi250.1 + timeout-minutes: 240 + env: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm7.2 + GPU_ARCH_VERSION: "7.2" + GPU_ARCH_TYPE: rocm + SKIP_ALL_TESTS: 1 + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: rocm7.2 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + permissions: + id-token: write + contents: read + steps: + - name: Setup ROCm + uses: ./.github/actions/setup-rocm + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-rocm7_2-shared-with-deps-release + path: "${{ runner.temp }}/artifacts/" + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: ROCm set GPU_FLAG + run: | + echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" + - name: configure aws credentials + id: aws_creds + if: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only + aws-region: us-east-1 + role-duration-seconds: 18000 + - name: Calculate docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 + with: + docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} + docker-image-name: libtorch-cxx11-builder + custom-tag-prefix: rocm7.2 + docker-build-dir: .ci/docker + working-directory: pytorch + - name: Pull Docker image + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 + with: + docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Test Pytorch binary + uses: ./pytorch/.github/actions/test-pytorch-binary + env: + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + - name: Teardown ROCm + uses: ./.github/actions/teardown-rocm + libtorch-rocm7_2-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-rocm7_2-shared-with-deps-release-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: rocm7.2 + GPU_ARCH_VERSION: "7.2" + GPU_ARCH_TYPE: rocm + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: rocm7.2 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-rocm7_2-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index 73fcb91f9f74e..347dba1b73697 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -42,7 +42,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -132,7 +136,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -179,9 +187,146 @@ jobs: build_name: manywheel-py3_10-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_10-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.10" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_10-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_10-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_10-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_10-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_10-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_10-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.10" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_10-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_10-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_10-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_10-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_10-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.10" + build_name: manywheel-py3_10-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_10-cuda13_0-build: @@ -202,7 +347,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -385,6 +534,20 @@ jobs: with: name: manywheel-py3_10-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -398,14 +561,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -500,6 +671,20 @@ jobs: with: name: manywheel-py3_10-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -513,14 +698,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -597,6 +790,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_10-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -617,14 +822,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -741,7 +954,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -788,9 +1005,146 @@ jobs: build_name: manywheel-py3_11-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_11-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.11" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_11-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_11-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_11-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_11-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_11-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_11-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.11" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_11-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_11-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_11-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_11-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_11-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.11" + build_name: manywheel-py3_11-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_11-cuda13_0-build: @@ -811,7 +1165,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -994,6 +1352,20 @@ jobs: with: name: manywheel-py3_11-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -1007,14 +1379,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -1109,6 +1489,20 @@ jobs: with: name: manywheel-py3_11-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -1122,14 +1516,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -1206,6 +1608,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_11-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -1226,14 +1640,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -1350,7 +1772,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1397,9 +1823,146 @@ jobs: build_name: manywheel-py3_12-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_12-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.12" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_12-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_12-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_12-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_12-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_12-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_12-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.12" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_12-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_12-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_12-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_12-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_12-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.12" + build_name: manywheel-py3_12-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_12-cuda13_0-build: @@ -1420,7 +1983,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1603,6 +2170,20 @@ jobs: with: name: manywheel-py3_12-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -1616,14 +2197,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -1718,6 +2307,20 @@ jobs: with: name: manywheel-py3_12-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -1731,14 +2334,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -1815,6 +2426,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_12-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -1835,14 +2458,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -1959,7 +2590,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -2006,9 +2641,146 @@ jobs: build_name: manywheel-py3_13-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_13-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_13-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_13-cuda13_0-build: @@ -2029,7 +2801,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -2212,6 +2988,20 @@ jobs: with: name: manywheel-py3_13-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -2225,14 +3015,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -2327,6 +3125,20 @@ jobs: with: name: manywheel-py3_13-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -2340,14 +3152,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -2424,6 +3244,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_13-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -2444,14 +3276,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -2568,7 +3408,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -2615,9 +3459,146 @@ jobs: build_name: manywheel-py3_13t-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13t-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_13t-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13t-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13t-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13t-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13t-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13t-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_13t-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_13t-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13t-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13t-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13t-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.13t" + build_name: manywheel-py3_13t-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_13t-cuda13_0-build: @@ -2638,7 +3619,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -2821,6 +3806,20 @@ jobs: with: name: manywheel-py3_13t-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -2834,14 +3833,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -2936,6 +3943,20 @@ jobs: with: name: manywheel-py3_13t-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -2949,14 +3970,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -3033,6 +4062,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_13t-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -3053,14 +4094,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -3177,7 +4226,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -3224,9 +4277,146 @@ jobs: build_name: manywheel-py3_14-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_14-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_14-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14" + build_name: manywheel-py3_14-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_14-cuda13_0-build: @@ -3247,7 +4437,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -3430,6 +4624,20 @@ jobs: with: name: manywheel-py3_14-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -3443,14 +4651,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -3545,6 +4761,20 @@ jobs: with: name: manywheel-py3_14-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -3558,14 +4788,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -3642,6 +4880,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_14-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -3662,14 +4912,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -3786,7 +5044,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda12_6 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.29.3; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.6.3; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -3833,9 +5095,146 @@ jobs: build_name: manywheel-py3_14t-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14t-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_14t-cuda12_8 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.8.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14t-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14t-cuda12_8-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda12_8 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14t-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14t-cuda12_8-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.8 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_14t-cuda12_9-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14t" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build_name: manywheel-py3_14t-cuda12_9 + build_environment: linux-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==12.9.1; platform_system == 'Linux' | cuda-bindings>=12.9.4,<13; platform_system == 'Linux' | nvidia-cudnn-cu12==9.17.1.4; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.4.5; platform_system == 'Linux' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + manywheel-py3_14t-cuda12_9-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_14t-cuda12_9-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda12_9 + build_environment: linux-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.g4dn.4xlarge.nvidia.gpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_14t-cuda12_9-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_14t-cuda12_9-test + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu129 + GPU_ARCH_VERSION: "12.9" + GPU_ARCH_TYPE: cuda + DOCKER_IMAGE: manylinux2_28-builder + DOCKER_IMAGE_TAG_PREFIX: cuda12.9 + DESIRED_PYTHON: "3.14t" + build_name: manywheel-py3_14t-cuda12_9 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml manywheel-py3_14t-cuda13_0-build: @@ -3856,7 +5255,11 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda13_0 build_environment: linux-binary-manywheel +<<<<<<< HEAD PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +======= + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cublas,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.19.0.56; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.28.9; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' +>>>>>>> upstream/release/2.11 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -4039,6 +5442,20 @@ jobs: with: name: manywheel-py3_14t-rocm7_1 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -4052,14 +5469,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.1 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -4154,6 +5579,20 @@ jobs: with: name: manywheel-py3_14t-rocm7_2 path: "${{ runner.temp }}/artifacts/" +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch +>>>>>>> upstream/release/2.11 - name: ROCm set GPU_FLAG run: | echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}" @@ -4167,14 +5606,22 @@ jobs: role-duration-seconds: 18000 - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: rocm7.2 docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary @@ -4251,6 +5698,18 @@ jobs: id-token: write contents: read steps: +<<<<<<< HEAD +======= + - name: Setup XPU + uses: pytorch/pytorch/.github/actions/setup-xpu@release/2.11 + - name: Login to ECR + uses: pytorch/pytorch/.github/actions/ecr-login@release/2.11 + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: manywheel-py3_14t-xpu + path: "${{ runner.temp }}/artifacts/" +>>>>>>> upstream/release/2.11 - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -4271,14 +5730,22 @@ jobs: path: "${{ runner.temp }}/artifacts/" - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-registry: ${{ startsWith(github.event.ref, 'refs/tags/ciflow/') && '308535385114.dkr.ecr.us-east-1.amazonaws.com' || 'docker.io' }} docker-image-name: manylinux2_28-builder custom-tag-prefix: xpu docker-build-dir: .ci/docker - name: Pull Docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Test Pytorch binary diff --git a/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml index 09405e9d2cc8b..95f5fe4371d2d 100644 --- a/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml @@ -42,7 +42,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml b/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml new file mode 100644 index 0000000000000..ed793bbc8a1f7 --- /dev/null +++ b/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml @@ -0,0 +1,127 @@ +# @generated DO NOT EDIT MANUALLY + +# Template is at: .github/templates/macos_binary_build_workflow.yml.j2 +# Generation script: .github/scripts/generate_ci_workflows.py +name: macos-arm64-binary-libtorch-release + +on: +# TODO: Migrate to new ciflow trigger, reference https://github.com/pytorch/pytorch/pull/70321 + push: + # NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build + branches: + - nightly + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + - 'ciflow/binaries/*' + - 'ciflow/binaries_libtorch/*' + workflow_dispatch: + +env: + ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" + AWS_DEFAULT_REGION: us-east-1 + BUILD_ENVIRONMENT: macos-arm64-binary-libtorch-release + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + SKIP_ALL_TESTS: 0 +concurrency: + group: macos-arm64-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + libtorch-cpu-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: macos-14-xlarge + timeout-minutes: 240 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + # shellcheck disable=SC2129 + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + # shellcheck disable=SC2129 + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + # shellcheck disable=SC2129 + echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.10.4" + freethreaded: false + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + run: | + set -eux -o pipefail + # shellcheck disable=SC1090 + source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" + mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" + + # Build + USE_PYTORCH_METAL_EXPORT=1 + USE_COREML_DELEGATE=1 + TORCH_PACKAGE_NAME="${TORCH_PACKAGE_NAME//-/_}" + export USE_PYTORCH_METAL_EXPORT + export USE_COREML_DELEGATE + export TORCH_PACKAGE_NAME + "${PYTORCH_ROOT}/.ci/wheel/install_libomp.sh" + "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cpu-shared-with-deps-release + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + libtorch-cpu-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cpu-shared-with-deps-release-build + with: + PYTORCH_ROOT: /pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DOCKER_IMAGE: libtorch-cxx11-builder + DOCKER_IMAGE_TAG_PREFIX: cpu + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + build_name: libtorch-cpu-shared-with-deps-release + use_s3: False + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml diff --git a/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml b/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml index 8edab3d40a604..4ce6a01086005 100644 --- a/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml +++ b/.github/workflows/generated-windows-arm64-binary-libtorch-debug-nightly.yml @@ -41,7 +41,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml b/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml new file mode 100644 index 0000000000000..49dab1a56f13f --- /dev/null +++ b/.github/workflows/generated-windows-arm64-binary-libtorch-release-nightly.yml @@ -0,0 +1,208 @@ +# @generated DO NOT EDIT MANUALLY + +# Template is at: .github/templates/windows_binary_build_workflow.yml.j2 +# Generation script: .github/scripts/generate_ci_workflows.py +name: windows-arm64-binary-libtorch-release + +on: + push: + # NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build + branches: + - nightly + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + - 'ciflow/binaries/*' + - 'ciflow/binaries_libtorch/*' + workflow_dispatch: + +env: + # Needed for conda builds + ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" + AWS_DEFAULT_REGION: us-east-1 + BUILD_ENVIRONMENT: windows-arm64-binary-libtorch-release + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + SKIP_ALL_TESTS: 1 + OS: windows-arm64 + PYTORCH_ROOT: /pytorch + DOWNLOADS_DIR: c:\temp\downloads + DEPENDENCIES_DIR: c:\temp\dependencies + ENABLE_APL: 1 + ENABLE_OPENBLAS: 0 + MSVC_VERSION : 14.42 +concurrency: + group: windows-arm64-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + get-label-type: + if: github.repository_owner == 'pytorch' + name: get-label-type + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + libtorch-cpu-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "windows-11-arm64-preview" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Populate binary env + shell: cmd + run: | + echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV% + echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV% + echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV% + - name: Bootstrap folders + shell: cmd + run: | + mkdir "%NIGHTLIES_PYTORCH_ROOT%" + mkdir "%PYTORCH_FINAL_PACKAGE_DIR%" + - name: Enable long paths + shell: cmd + run: | + git config --system --get core.longpaths || echo "core.longpaths is not set, setting it now" + git config --system core.longpaths true + - name: Git checkout PyTorch + uses: actions/checkout@v4 + with: + path: "pytorch" + submodules: recursive + - name: Bootstrap Python + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat" + - name: Bootstrap APL + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat" + - name: Bootstrap Rust + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat" + - name: Bootstrap sccache + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat" + - name: Bootstrap Libuv + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cpu-shared-with-deps-release + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + libtorch-cpu-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cpu-shared-with-deps-release-build + - get-label-type + runs-on: "windows-11-arm64-preview" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Populate binary env + shell: cmd + run: | + echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV% + echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV% + echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV% + - name: Enable long paths + shell: cmd + run: | + git config --system --get core.longpaths || echo "core.longpaths is not set, setting it now" + git config --system core.longpaths true + - name: Git checkout PyTorch + uses: actions/checkout@v4 + with: + path: "pytorch" + submodules: recursive + - name: Bootstrap APL + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat" + - name: Bootstrap Python + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat" + - name: Bootstrap Rust + shell: cmd + run: | + "pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-cpu-shared-with-deps-release + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + libtorch-cpu-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cpu-shared-with-deps-release-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + build_name: libtorch-cpu-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml diff --git a/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml b/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml index e3825b76cdbcc..189dd58bff534 100644 --- a/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml +++ b/.github/workflows/generated-windows-arm64-binary-wheel-nightly.yml @@ -42,7 +42,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml index 6953a11b64677..f213dbc3da19b 100644 --- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml +++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml @@ -35,7 +35,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -84,7 +88,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -185,7 +193,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -325,7 +337,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -427,7 +443,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -520,9 +540,260 @@ jobs: build_name: libtorch-cuda12_6-shared-with-deps-debug secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + libtorch-cuda12_8-shared-with-deps-debug-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: debug + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cuda12_8-shared-with-deps-debug + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + libtorch-cuda12_8-shared-with-deps-debug-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_8-shared-with-deps-debug-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: debug + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-cuda12_8-shared-with-deps-debug + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + libtorch-cuda12_8-shared-with-deps-debug-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_8-shared-with-deps-debug-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + LIBTORCH_CONFIG: debug + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + build_name: libtorch-cuda12_8-shared-with-deps-debug + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml libtorch-cuda13_0-shared-with-deps-debug-build: if: ${{ github.repository_owner == 'pytorch' }} @@ -568,7 +839,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -670,7 +945,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml new file mode 100644 index 0000000000000..16075a8568e35 --- /dev/null +++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml @@ -0,0 +1,1032 @@ +# @generated DO NOT EDIT MANUALLY + +# Template is at: .github/templates/windows_binary_build_workflow.yml.j2 +# Generation script: .github/scripts/generate_ci_workflows.py +name: windows-binary-libtorch-release + +on: + push: + # NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build + branches: + - nightly + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + - 'ciflow/binaries/*' + - 'ciflow/binaries_libtorch/*' + workflow_dispatch: + +env: + # Needed for conda builds + ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine" + AWS_DEFAULT_REGION: us-east-1 + BUILD_ENVIRONMENT: windows-binary-libtorch-release + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ github.event.pull_request.number }} + SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + SKIP_ALL_TESTS: 1 + OS: windows +concurrency: + group: windows-binary-libtorch-release-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + get-label-type: + if: github.repository_owner == 'pytorch' + name: get-label-type + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + libtorch-cpu-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cpu-shared-with-deps-release + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + libtorch-cpu-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cpu-shared-with-deps-release-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-cpu-shared-with-deps-release + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + libtorch-cpu-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cpu-shared-with-deps-release-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + build_name: libtorch-cpu-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + libtorch-cuda12_6-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cuda12_6-shared-with-deps-release + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + libtorch-cuda12_6-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_6-shared-with-deps-release-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-cuda12_6-shared-with-deps-release + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + libtorch-cuda12_6-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_6-shared-with-deps-release-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + build_name: libtorch-cuda12_6-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + libtorch-cuda12_8-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cuda12_8-shared-with-deps-release + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + libtorch-cuda12_8-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda12_8-shared-with-deps-release-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-cuda12_8-shared-with-deps-release + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + libtorch-cuda12_8-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda12_8-shared-with-deps-release-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + build_name: libtorch-cuda12_8-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml + libtorch-cuda13_0-shared-with-deps-release-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: libtorch-cuda13_0-shared-with-deps-release + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + libtorch-cuda13_0-shared-with-deps-release-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - libtorch-cuda13_0-shared-with-deps-release-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: libtorch-cuda13_0-shared-with-deps-release + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + libtorch-cuda13_0-shared-with-deps-release-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: libtorch-cuda13_0-shared-with-deps-release-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: libtorch + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + LIBTORCH_CONFIG: release + LIBTORCH_VARIANT: shared-with-deps + # This is a dummy value for libtorch to work correctly with our batch scripts + # without this value pip does not get installed for some reason + DESIRED_PYTHON: "3.10" + build_name: libtorch-cuda13_0-shared-with-deps-release + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + uses: ./.github/workflows/_binary-upload.yml diff --git a/.github/workflows/generated-windows-binary-wheel-nightly.yml b/.github/workflows/generated-windows-binary-wheel-nightly.yml index 1aeec381ec19b..757f6b877f61d 100644 --- a/.github/workflows/generated-windows-binary-wheel-nightly.yml +++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml @@ -36,7 +36,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -82,7 +86,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -179,7 +187,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -311,7 +323,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -409,7 +425,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -498,22 +518,24 @@ jobs: build_name: wheel-py3_10-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - wheel-py3_10-cuda13_0-build: + wheel-py3_10-cuda12_8-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -542,7 +564,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -575,52 +597,56 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_10-cuda13_0 + name: wheel-py3_10-cuda12_8 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda13_0-test: # Testing + wheel-py3_10-cuda12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_10-cuda13_0-build + - wheel-py3_10-cuda12_8-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -640,7 +666,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -673,11 +699,13 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the # runner.temp variable, which we need. @@ -690,50 +718,50 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_10-cuda13_0 + name: wheel-py3_10-cuda12_8 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Test PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda13_0-upload: # Uploading + wheel-py3_10-cuda12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_10-cuda13_0-test + needs: wheel-py3_10-cuda12_8-test with: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.10" - build_name: wheel-py3_10-cuda13_0 + build_name: wheel-py3_10-cuda12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml - wheel-py3_10-cuda13_2-build: + wheel-py3_10-cuda13_0-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -743,8 +771,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -773,7 +801,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -822,7 +854,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_10-cuda13_2 + name: wheel-py3_10-cuda13_0 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -838,10 +870,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda13_2-test: # Testing + wheel-py3_10-cuda13_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_10-cuda13_2-build + - wheel-py3_10-cuda13_0-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -850,8 +882,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" @@ -871,7 +903,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -921,7 +957,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_10-cuda13_2 + name: wheel-py3_10-cuda13_0 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -942,29 +978,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-cuda13_2-upload: # Uploading + wheel-py3_10-cuda13_0-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_10-cuda13_2-test + needs: wheel-py3_10-cuda13_0-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.10" - build_name: wheel-py3_10-cuda13_2 + build_name: wheel-py3_10-cuda13_0 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_10-xpu-build: + wheel-py3_10-cuda13_2-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -974,11 +1010,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -1053,7 +1089,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_10-xpu + name: wheel-py3_10-cuda13_2 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1069,20 +1105,21 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-xpu-test: # Testing + wheel-py3_10-cuda13_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_10-xpu-build + - wheel-py3_10-cuda13_2-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.10" steps: @@ -1151,7 +1188,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_10-xpu + name: wheel-py3_10-cuda13_2 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -1172,28 +1209,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_10-xpu-upload: # Uploading + wheel-py3_10-cuda13_2-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_10-xpu-test + needs: wheel-py3_10-cuda13_2-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.10" - build_name: wheel-py3_10-xpu + build_name: wheel-py3_10-cuda13_2 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_11-cpu-build: + wheel-py3_10-xpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -1203,11 +1241,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.11" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + DESIRED_PYTHON: "3.10" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -1233,7 +1271,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1282,7 +1324,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_11-cpu + name: wheel-py3_10-xpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1298,10 +1340,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cpu-test: # Testing + wheel-py3_10-xpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_11-cpu-build + - wheel-py3_10-xpu-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 @@ -1310,10 +1352,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.11" + DESIRED_PYTHON: "3.10" steps: - name: Display EC2 information shell: bash @@ -1330,7 +1372,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1380,7 +1426,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_11-cpu + name: wheel-py3_10-xpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -1401,28 +1447,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cpu-upload: # Uploading + wheel-py3_10-xpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_11-cpu-test + needs: wheel-py3_10-xpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DESIRED_PYTHON: "3.11" - build_name: wheel-py3_11-cpu + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + DESIRED_PYTHON: "3.10" + build_name: wheel-py3_10-xpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_11-cuda12_6-build: + wheel-py3_11-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -1432,11 +1478,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -1462,7 +1508,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1511,7 +1561,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_11-cuda12_6 + name: wheel-py3_11-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1527,21 +1577,20 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cuda12_6-test: # Testing + wheel-py3_11-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_11-cuda12_6-build + - wheel-py3_11-cpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" steps: @@ -1560,7 +1609,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1610,7 +1663,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_11-cuda12_6 + name: wheel-py3_11-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -1631,29 +1684,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cuda12_6-upload: # Uploading + wheel-py3_11-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_11-cuda12_6-test + needs: wheel-py3_11-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu DESIRED_PYTHON: "3.11" - build_name: wheel-py3_11-cuda12_6 + build_name: wheel-py3_11-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_11-cuda13_0-build: + wheel-py3_11-cuda12_6-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -1663,8 +1715,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" @@ -1693,7 +1745,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1742,7 +1798,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_11-cuda13_0 + name: wheel-py3_11-cuda12_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -1758,10 +1814,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cuda13_0-test: # Testing + wheel-py3_11-cuda12_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_11-cuda13_0-build + - wheel-py3_11-cuda12_6-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -1770,8 +1826,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" @@ -1791,7 +1847,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1841,7 +1901,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_11-cuda13_0 + name: wheel-py3_11-cuda12_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -1862,40 +1922,42 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cuda13_0-upload: # Uploading + wheel-py3_11-cuda12_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_11-cuda13_0-test + needs: wheel-py3_11-cuda12_6-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.11" - build_name: wheel-py3_11-cuda13_0 + build_name: wheel-py3_11-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - wheel-py3_11-cuda13_2-build: + wheel-py3_11-cuda12_8-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" @@ -1924,7 +1986,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -1957,52 +2019,56 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_11-cuda13_2 + name: wheel-py3_11-cuda12_8 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cuda13_2-test: # Testing + wheel-py3_11-cuda12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_11-cuda13_2-build + - wheel-py3_11-cuda12_8-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" @@ -2022,7 +2088,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2055,11 +2121,13 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the # runner.temp variable, which we need. @@ -2072,50 +2140,50 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_11-cuda13_2 + name: wheel-py3_11-cuda12_8 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Test PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-cuda13_2-upload: # Uploading + wheel-py3_11-cuda12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_11-cuda13_2-test + needs: wheel-py3_11-cuda12_8-test with: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.11" - build_name: wheel-py3_11-cuda13_2 + build_name: wheel-py3_11-cuda12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml - wheel-py3_11-xpu-build: + wheel-py3_11-cuda13_0-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -2125,11 +2193,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -2155,7 +2223,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2204,7 +2276,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_11-xpu + name: wheel-py3_11-cuda13_0 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2220,20 +2292,21 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-xpu-test: # Testing + wheel-py3_11-cuda13_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_11-xpu-build + - wheel-py3_11-cuda13_0-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.11" steps: @@ -2252,7 +2325,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2302,7 +2379,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_11-xpu + name: wheel-py3_11-cuda13_0 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -2323,28 +2400,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_11-xpu-upload: # Uploading + wheel-py3_11-cuda13_0-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_11-xpu-test + needs: wheel-py3_11-cuda13_0-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.11" - build_name: wheel-py3_11-xpu + build_name: wheel-py3_11-cuda13_0 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_12-cpu-build: + wheel-py3_11-cuda13_2-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -2354,11 +2432,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.12" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + DESIRED_PYTHON: "3.11" steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -2433,7 +2511,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_12-cpu + name: wheel-py3_11-cuda13_2 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2449,22 +2527,23 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cpu-test: # Testing + wheel-py3_11-cuda13_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_12-cpu-build + - wheel-py3_11-cuda13_2-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.12" + DESIRED_PYTHON: "3.11" steps: - name: Display EC2 information shell: bash @@ -2531,7 +2610,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_12-cpu + name: wheel-py3_11-cuda13_2 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -2552,28 +2631,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cpu-upload: # Uploading + wheel-py3_11-cuda13_2-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_12-cpu-test + needs: wheel-py3_11-cuda13_2-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DESIRED_PYTHON: "3.12" - build_name: wheel-py3_12-cpu + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.11" + build_name: wheel-py3_11-cuda13_2 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_12-cuda12_6-build: + wheel-py3_11-xpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -2583,11 +2663,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.12" + DESIRED_PYTHON: "3.11" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -2613,7 +2693,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2662,7 +2746,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_12-cuda12_6 + name: wheel-py3_11-xpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2678,23 +2762,22 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cuda12_6-test: # Testing + wheel-py3_11-xpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_12-cuda12_6-build + - wheel-py3_11-xpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.12" + DESIRED_PYTHON: "3.11" steps: - name: Display EC2 information shell: bash @@ -2711,7 +2794,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2761,7 +2848,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_12-cuda12_6 + name: wheel-py3_11-xpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -2782,29 +2869,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cuda12_6-upload: # Uploading + wheel-py3_11-xpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_12-cuda12_6-test + needs: wheel-py3_11-xpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.12" - build_name: wheel-py3_12-cuda12_6 + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + DESIRED_PYTHON: "3.11" + build_name: wheel-py3_11-xpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_12-cuda13_0-build: + wheel-py3_12-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -2814,11 +2900,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.12" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -2844,7 +2930,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2893,7 +2983,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_12-cuda13_0 + name: wheel-py3_12-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -2909,21 +2999,20 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cuda13_0-test: # Testing + wheel-py3_12-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_12-cuda13_0-build + - wheel-py3_12-cpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.12" steps: @@ -2942,7 +3031,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -2992,7 +3085,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_12-cuda13_0 + name: wheel-py3_12-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -3013,29 +3106,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cuda13_0-upload: # Uploading + wheel-py3_12-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_12-cuda13_0-test + needs: wheel-py3_12-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu DESIRED_PYTHON: "3.12" - build_name: wheel-py3_12-cuda13_0 + build_name: wheel-py3_12-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_12-cuda13_2-build: + wheel-py3_12-cuda12_6-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -3045,8 +3137,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.12" @@ -3075,7 +3167,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -3124,7 +3220,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_12-cuda13_2 + name: wheel-py3_12-cuda12_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3140,10 +3236,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cuda13_2-test: # Testing + wheel-py3_12-cuda12_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_12-cuda13_2-build + - wheel-py3_12-cuda12_6-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -3152,8 +3248,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.12" @@ -3173,7 +3269,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -3223,7 +3323,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_12-cuda13_2 + name: wheel-py3_12-cuda12_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -3244,43 +3344,45 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-cuda13_2-upload: # Uploading + wheel-py3_12-cuda12_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_12-cuda13_2-test + needs: wheel-py3_12-cuda12_6-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.12" - build_name: wheel-py3_12-cuda13_2 + build_name: wheel-py3_12-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - wheel-py3_12-xpu-build: + wheel-py3_12-cuda12_8-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.12" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -3306,7 +3408,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -3339,52 +3441,57 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_12-xpu + name: wheel-py3_12-cuda12_8 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-xpu-test: # Testing + wheel-py3_12-cuda12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_12-xpu-build + - wheel-py3_12-cuda12_8-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.12" steps: @@ -3403,7 +3510,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -3436,11 +3543,13 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the # runner.temp variable, which we need. @@ -3453,49 +3562,50 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_12-xpu + name: wheel-py3_12-cuda12_8 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Test PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_12-xpu-upload: # Uploading + wheel-py3_12-cuda12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_12-xpu-test + needs: wheel-py3_12-cuda12_8-test with: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.12" - build_name: wheel-py3_12-xpu + build_name: wheel-py3_12-cuda12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13-cpu-build: + wheel-py3_12-cuda13_0-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -3505,11 +3615,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + DESIRED_PYTHON: "3.12" steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -3535,7 +3645,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -3584,7 +3698,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13-cpu + name: wheel-py3_12-cuda13_0 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3600,22 +3714,23 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cpu-test: # Testing + wheel-py3_12-cuda13_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13-cpu-build + - wheel-py3_12-cuda13_0-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.12" steps: - name: Display EC2 information shell: bash @@ -3632,7 +3747,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -3682,7 +3801,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13-cpu + name: wheel-py3_12-cuda13_0 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -3703,28 +3822,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cpu-upload: # Uploading + wheel-py3_12-cuda13_0-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13-cpu-test + needs: wheel-py3_12-cuda13_0-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu - DESIRED_PYTHON: "3.13" - build_name: wheel-py3_13-cpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.12" + build_name: wheel-py3_12-cuda13_0 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13-cuda12_6-build: + wheel-py3_12-cuda13_2-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -3734,11 +3854,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.12" steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -3813,7 +3933,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13-cuda12_6 + name: wheel-py3_12-cuda13_2 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -3829,10 +3949,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cuda12_6-test: # Testing + wheel-py3_12-cuda13_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13-cuda12_6-build + - wheel-py3_12-cuda13_2-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -3841,11 +3961,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.12" steps: - name: Display EC2 information shell: bash @@ -3912,7 +4032,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13-cuda12_6 + name: wheel-py3_12-cuda13_2 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -3933,29 +4053,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cuda12_6-upload: # Uploading + wheel-py3_12-cuda13_2-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13-cuda12_6-test + needs: wheel-py3_12-cuda13_2-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.13" - build_name: wheel-py3_13-cuda12_6 + DESIRED_PYTHON: "3.12" + build_name: wheel-py3_12-cuda13_2 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13-cuda13_0-build: + wheel-py3_12-xpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -3965,11 +4085,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.12" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -3995,7 +4115,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4044,7 +4168,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13-cuda13_0 + name: wheel-py3_12-xpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4060,12 +4184,853 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cuda13_0-test: # Testing + wheel-py3_12-xpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13-cuda13_0-build + - wheel-py3_12-xpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.12" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_12-xpu + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_12-xpu-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_12-xpu-test + with: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + DESIRED_PYTHON: "3.12" + build_name: wheel-py3_12-xpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13-cpu-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13-cpu + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13-cpu-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13-cpu-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_13-cpu + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_13-cpu-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_13-cpu-test + with: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.13" + build_name: wheel-py3_13-cpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13-cuda12_6-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13-cuda12_6 + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13-cuda12_6-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13-cuda12_6-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_13-cuda12_6 + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_13-cuda12_6-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_13-cuda12_6-test + with: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.13" + build_name: wheel-py3_13-cuda12_6 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13-cuda12_8-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13-cuda12_8 + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13-cuda12_8-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13-cuda12_8-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + path: pytorch + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + working-directory: pytorch + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_13-cuda12_8 + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + working-directory: pytorch + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + working-directory: pytorch + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_13-cuda12_8-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_13-cuda12_8-test + with: + PYTORCH_ROOT: ${{ github.workspace }}/pytorch + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.13" + build_name: wheel-py3_13-cuda12_8 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} +>>>>>>> upstream/release/2.11 + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13-cuda13_0-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} @@ -4076,7 +5041,824 @@ jobs: GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.13" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13-cuda13_0 + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13-cuda13_0-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13-cuda13_0-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_13-cuda13_0 + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_13-cuda13_0-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_13-cuda13_0-test + with: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.13" + build_name: wheel-py3_13-cuda13_0 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13-cuda13_2-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13-cuda13_2 + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13-cuda13_2-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13-cuda13_2-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_13-cuda13_2 + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_13-cuda13_2-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_13-cuda13_2-test + with: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.13" + build_name: wheel-py3_13-cuda13_2 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13-xpu-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13-xpu + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13-xpu-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13-xpu-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13" + steps: + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - uses: actions/download-artifact@v4.1.7 + name: Download Build Artifacts + with: + name: wheel-py3_13-xpu + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Test PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + wheel-py3_13-xpu-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: wheel-py3_13-xpu-test + with: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu + DESIRED_PYTHON: "3.13" + build_name: wheel-py3_13-xpu + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} + R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} + R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} + uses: ./.github/workflows/_binary-upload.yml + wheel-py3_13t-cpu-build: + if: ${{ github.repository_owner == 'pytorch' }} + needs: get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13t" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' + steps: + # NOTE: These environment variables are put here so that they can be applied on every job equally + # They are also here because setting them at a workflow level doesn't give us access to the + # runner.temp variable, which we need. + - name: Populate binary env + shell: bash + run: | + echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}" + echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" + echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}" + - name: Display EC2 information + shell: bash + run: | + set -euo pipefail + function get_ec2_metadata() { + # Pulled from instance metadata endpoint for EC2 + # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + category=$1 + curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}" + } + echo "ami-id: $(get_ec2_metadata ami-id)" + echo "instance-id: $(get_ec2_metadata instance-id)" + echo "instance-type: $(get_ec2_metadata instance-type)" + echo "system info $(uname -a)" + - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 + continue-on-error: true + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + - name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon + shell: bash + run: | + git config --global core.longpaths true + git config --global core.symlinks true + + # https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock + # the directory on Windows and prevent GHA from checking out as reported + # in https://github.com/actions/checkout/issues/1018 + git config --global core.fsmonitor false + # Needed for binary builds, see: https://github.com/pytorch/pytorch/issues/73339#issuecomment-1058981560 + - name: Enable long paths on Windows + shell: powershell + run: | + Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1 + # Since it's just a defensive command, the workflow should continue even the command fails. This step can be + # removed once Windows Defender is removed from the AMI + - name: Disables Windows Defender scheduled and real-time scanning for files in directories used by PyTorch + continue-on-error: true + shell: powershell + run: | + Add-MpPreference -ExclusionPath $(Get-Location).tostring(),$Env:TEMP -ErrorAction Ignore + # Let's both exclude the path and disable Windows Defender completely just to be sure + # that it doesn't interfere + Set-MpPreference -DisableRealtimeMonitoring $True -ErrorAction Ignore + - name: Checkout PyTorch + uses: actions/checkout@v4 + with: + submodules: recursive + show-progress: false + - name: Clean PyTorch checkout + run: | + # Remove any artifacts from the previous checkouts + git clean -fxd + - name: Populate binary env + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + - name: Build PyTorch binary + shell: bash + run: | + "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + - uses: actions/upload-artifact@v4.4.0 + if: always() + with: + name: wheel-py3_13t-cpu + retention-days: 14 + if-no-files-found: error + path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" + - name: Wait until all sessions have drained + shell: powershell + if: always() + timeout-minutes: 120 + run: | + .github\scripts\wait_for_ssh_to_drain.ps1 + - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) + shell: powershell + if: always() + run: | + .github\scripts\kill_active_ssh_sessions.ps1 + + wheel-py3_13t-cpu-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - wheel-py3_13t-cpu-build + - get-label-type + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + timeout-minutes: 360 + env: + PYTORCH_ROOT: ${{ github.workspace }} + PACKAGE_TYPE: wheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + SKIP_ALL_TESTS: 1 + DESIRED_PYTHON: "3.13t" steps: - name: Display EC2 information shell: bash @@ -4093,7 +5875,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4143,7 +5929,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13-cuda13_0 + name: wheel-py3_13t-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -4164,29 +5950,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cuda13_0-upload: # Uploading + wheel-py3_13t-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13-cuda13_0-test + needs: wheel-py3_13t-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.13" - build_name: wheel-py3_13-cuda13_0 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.13t" + build_name: wheel-py3_13t-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13-cuda13_2-build: + wheel-py3_13t-cuda12_6-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -4196,11 +5981,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.13t" steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -4226,7 +6011,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4275,7 +6064,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13-cuda13_2 + name: wheel-py3_13t-cuda12_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4291,10 +6080,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cuda13_2-test: # Testing + wheel-py3_13t-cuda12_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13-cuda13_2-build + - wheel-py3_13t-cuda12_6-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -4303,11 +6092,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.13t" steps: - name: Display EC2 information shell: bash @@ -4324,7 +6113,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4374,7 +6167,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13-cuda13_2 + name: wheel-py3_13t-cuda12_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -4395,43 +6188,45 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-cuda13_2-upload: # Uploading + wheel-py3_13t-cuda12_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13-cuda13_2-test + needs: wheel-py3_13t-cuda12_6-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.13" - build_name: wheel-py3_13-cuda13_2 + DESIRED_PYTHON: "3.13t" + build_name: wheel-py3_13t-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13-xpu-build: + wheel-py3_13t-cuda12_8-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 + DESIRED_PYTHON: "3.13t" steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -4457,7 +6252,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4490,54 +6285,59 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13-xpu + name: wheel-py3_13t-cuda12_8 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-xpu-test: # Testing + wheel-py3_13t-cuda12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13-xpu-build + - wheel-py3_13t-cuda12_8-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13" + DESIRED_PYTHON: "3.13t" steps: - name: Display EC2 information shell: bash @@ -4554,7 +6354,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4587,11 +6387,13 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the # runner.temp variable, which we need. @@ -4604,49 +6406,50 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13-xpu + name: wheel-py3_13t-cuda12_8 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Test PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13-xpu-upload: # Uploading + wheel-py3_13t-cuda12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13-xpu-test + needs: wheel-py3_13t-cuda12_8-test with: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu - DESIRED_PYTHON: "3.13" - build_name: wheel-py3_13-xpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.13t" + build_name: wheel-py3_13t-cuda12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13t-cpu-build: + wheel-py3_13t-cuda13_0-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -4656,11 +6459,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.13t" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -4686,7 +6489,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4735,7 +6542,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13t-cpu + name: wheel-py3_13t-cuda13_0 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4751,20 +6558,21 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cpu-test: # Testing + wheel-py3_13t-cuda13_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13t-cpu-build + - wheel-py3_13t-cuda13_0-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.13t" steps: @@ -4783,7 +6591,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -4833,7 +6645,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13t-cpu + name: wheel-py3_13t-cuda13_0 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -4854,28 +6666,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cpu-upload: # Uploading + wheel-py3_13t-cuda13_0-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13t-cpu-test + needs: wheel-py3_13t-cuda13_0-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.13t" - build_name: wheel-py3_13t-cpu + build_name: wheel-py3_13t-cuda13_0 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13t-cuda12_6-build: + wheel-py3_13t-cuda13_2-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -4885,8 +6698,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.13t" @@ -4964,7 +6777,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13t-cuda12_6 + name: wheel-py3_13t-cuda13_2 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -4980,10 +6793,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cuda12_6-test: # Testing + wheel-py3_13t-cuda13_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13t-cuda12_6-build + - wheel-py3_13t-cuda13_2-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -4992,8 +6805,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.13t" @@ -5063,7 +6876,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13t-cuda12_6 + name: wheel-py3_13t-cuda13_2 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -5084,29 +6897,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cuda12_6-upload: # Uploading + wheel-py3_13t-cuda13_2-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13t-cuda12_6-test + needs: wheel-py3_13t-cuda13_2-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.13t" - build_name: wheel-py3_13t-cuda12_6 + build_name: wheel-py3_13t-cuda13_2 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13t-cuda13_0-build: + wheel-py3_13t-xpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -5116,11 +6929,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.13t" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -5146,7 +6959,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5195,7 +7012,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13t-cuda13_0 + name: wheel-py3_13t-xpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -5211,21 +7028,20 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cuda13_0-test: # Testing + wheel-py3_13t-xpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13t-cuda13_0-build + - wheel-py3_13t-xpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.13t" steps: @@ -5244,7 +7060,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5294,7 +7114,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13t-cuda13_0 + name: wheel-py3_13t-xpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -5315,29 +7135,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cuda13_0-upload: # Uploading + wheel-py3_13t-xpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13t-cuda13_0-test + needs: wheel-py3_13t-xpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu DESIRED_PYTHON: "3.13t" - build_name: wheel-py3_13t-cuda13_0 + build_name: wheel-py3_13t-xpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13t-cuda13_2-build: + wheel-py3_14-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -5347,11 +7166,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13t" + DESIRED_PYTHON: "3.14" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -5377,7 +7196,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5426,7 +7249,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13t-cuda13_2 + name: wheel-py3_14-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -5442,23 +7265,22 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cuda13_2-test: # Testing + wheel-py3_14-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13t-cuda13_2-build + - wheel-py3_14-cpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13t" + DESIRED_PYTHON: "3.14" steps: - name: Display EC2 information shell: bash @@ -5475,7 +7297,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5525,7 +7351,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13t-cuda13_2 + name: wheel-py3_14-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -5546,29 +7372,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-cuda13_2-upload: # Uploading + wheel-py3_14-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13t-cuda13_2-test + needs: wheel-py3_14-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" - GPU_ARCH_TYPE: cuda - DESIRED_PYTHON: "3.13t" - build_name: wheel-py3_13t-cuda13_2 + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.14" + build_name: wheel-py3_14-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_13t-xpu-build: + wheel-py3_14-cuda12_6-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -5578,11 +7403,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13t" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 + DESIRED_PYTHON: "3.14" steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -5608,7 +7433,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5657,7 +7486,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_13t-xpu + name: wheel-py3_14-cuda12_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -5673,22 +7502,23 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-xpu-test: # Testing + wheel-py3_14-cuda12_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_13t-xpu-build + - wheel-py3_14-cuda12_6-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.13t" + DESIRED_PYTHON: "3.14" steps: - name: Display EC2 information shell: bash @@ -5705,7 +7535,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5755,7 +7589,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_13t-xpu + name: wheel-py3_14-cuda12_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -5776,42 +7610,45 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_13t-xpu-upload: # Uploading + wheel-py3_14-cuda12_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_13t-xpu-test + needs: wheel-py3_14-cuda12_6-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu - DESIRED_PYTHON: "3.13t" - build_name: wheel-py3_13t-xpu + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda + DESIRED_PYTHON: "3.14" + build_name: wheel-py3_14-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14-cpu-build: + wheel-py3_14-cuda12_8-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -5837,7 +7674,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5870,52 +7707,57 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14-cpu + name: wheel-py3_14-cuda12_8 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cpu-test: # Testing + wheel-py3_14-cuda12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14-cpu-build + - wheel-py3_14-cuda12_8-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" steps: @@ -5934,7 +7776,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -5967,11 +7809,13 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the # runner.temp variable, which we need. @@ -5984,49 +7828,50 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14-cpu + name: wheel-py3_14-cuda12_8 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Test PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cpu-upload: # Uploading + wheel-py3_14-cuda12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14-cpu-test + needs: wheel-py3_14-cuda12_8-test with: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.14" - build_name: wheel-py3_14-cpu + build_name: wheel-py3_14-cuda12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14-cuda12_6-build: + wheel-py3_14-cuda13_0-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -6036,8 +7881,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" @@ -6066,7 +7911,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -6115,7 +7964,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14-cuda12_6 + name: wheel-py3_14-cuda13_0 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -6131,10 +7980,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cuda12_6-test: # Testing + wheel-py3_14-cuda13_0-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14-cuda12_6-build + - wheel-py3_14-cuda13_0-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -6143,8 +7992,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" @@ -6164,7 +8013,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -6214,7 +8067,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14-cuda12_6 + name: wheel-py3_14-cuda13_0 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -6235,29 +8088,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cuda12_6-upload: # Uploading + wheel-py3_14-cuda13_0-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14-cuda12_6-test + needs: wheel-py3_14-cuda13_0-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu130 + GPU_ARCH_VERSION: "13.0" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.14" - build_name: wheel-py3_14-cuda12_6 + build_name: wheel-py3_14-cuda13_0 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14-cuda13_0-build: + wheel-py3_14-cuda13_2-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -6267,8 +8120,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" @@ -6346,7 +8199,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14-cuda13_0 + name: wheel-py3_14-cuda13_2 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -6362,10 +8215,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cuda13_0-test: # Testing + wheel-py3_14-cuda13_2-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14-cuda13_0-build + - wheel-py3_14-cuda13_2-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 @@ -6374,8 +8227,8 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" @@ -6445,7 +8298,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14-cuda13_0 + name: wheel-py3_14-cuda13_2 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -6466,29 +8319,29 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cuda13_0-upload: # Uploading + wheel-py3_14-cuda13_2-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14-cuda13_0-test + needs: wheel-py3_14-cuda13_2-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu130 - GPU_ARCH_VERSION: "13.0" + DESIRED_CUDA: cu132 + GPU_ARCH_VERSION: "13.2" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.14" - build_name: wheel-py3_14-cuda13_0 + build_name: wheel-py3_14-cuda13_2 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14-cuda13_2-build: + wheel-py3_14-xpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -6498,11 +8351,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -6528,7 +8381,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -6577,7 +8434,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14-cuda13_2 + name: wheel-py3_14-xpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -6593,21 +8450,20 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cuda13_2-test: # Testing + wheel-py3_14-xpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14-cuda13_2-build + - wheel-py3_14-xpu-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14" steps: @@ -6626,7 +8482,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -6676,7 +8536,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14-cuda13_2 + name: wheel-py3_14-xpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -6697,29 +8557,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-cuda13_2-upload: # Uploading + wheel-py3_14-xpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14-cuda13_2-test + needs: wheel-py3_14-xpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu132 - GPU_ARCH_VERSION: "13.2" - GPU_ARCH_TYPE: cuda + DESIRED_CUDA: xpu + GPU_ARCH_TYPE: xpu DESIRED_PYTHON: "3.14" - build_name: wheel-py3_14-cuda13_2 + build_name: wheel-py3_14-xpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14-xpu-build: + wheel-py3_14t-cpu-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -6729,11 +8588,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.14" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: intel-cmplr-lib-rt==2025.3.2 | intel-cmplr-lib-ur==2025.3.2 | intel-cmplr-lic-rt==2025.3.2 | intel-sycl-rt==2025.3.2 | oneccl-devel==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | oneccl==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | impi-rt==2021.17.2; platform_system == 'Linux' and platform_machine == 'x86_64' | onemkl-license==2025.3.1 | onemkl-sycl-blas==2025.3.1 | onemkl-sycl-dft==2025.3.1 | onemkl-sycl-lapack==2025.3.1 | onemkl-sycl-rng==2025.3.1 | onemkl-sycl-sparse==2025.3.1 | dpcpp-cpp-rt==2025.3.2 | intel-opencl-rt==2025.3.2 | mkl==2025.3.1 | intel-openmp==2025.3.2 | tbb==2022.3.1 | tcmlib==1.4.1 | umf==1.0.3 | intel-pti==0.16.0 + DESIRED_PYTHON: "3.14t" + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -6759,7 +8618,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -6808,7 +8671,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14-xpu + name: wheel-py3_14t-cpu retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -6824,10 +8687,10 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-xpu-test: # Testing + wheel-py3_14t-cpu-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14-xpu-build + - wheel-py3_14t-cpu-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" timeout-minutes: 360 @@ -6836,10 +8699,10 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu SKIP_ALL_TESTS: 1 - DESIRED_PYTHON: "3.14" + DESIRED_PYTHON: "3.14t" steps: - name: Display EC2 information shell: bash @@ -6856,7 +8719,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -6906,7 +8773,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14-xpu + name: wheel-py3_14t-cpu path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -6927,28 +8794,28 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14-xpu-upload: # Uploading + wheel-py3_14t-cpu-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14-xpu-test + needs: wheel-py3_14t-cpu-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: xpu - GPU_ARCH_TYPE: xpu - DESIRED_PYTHON: "3.14" - build_name: wheel-py3_14-xpu + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu + DESIRED_PYTHON: "3.14t" + build_name: wheel-py3_14t-cpu secrets: github-token: ${{ secrets.GITHUB_TOKEN }} R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14t-cpu-build: + wheel-py3_14t-cuda12_6-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" @@ -6958,11 +8825,11 @@ jobs: PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14t" - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: cuda-toolkit[nvrtc,cudart,cupti,cufft,curand,cusolver,cusparse,cufile,nvjitlink,nvtx]==13.0.2; platform_system == 'Linux' | nvidia-cublas>=13.1.0.3,<=13.1.1.3; platform_system == 'Linux' | cuda-bindings>=13.0.3,<14; platform_system == 'Linux' | nvidia-cudnn-cu13==9.20.0.48; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.1; platform_system == 'Linux' | nvidia-nccl-cu13==2.29.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.4.5; platform_system == 'Linux' steps: # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the @@ -6988,7 +8855,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -7037,7 +8908,7 @@ jobs: - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14t-cpu + name: wheel-py3_14t-cuda12_6 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" @@ -7053,20 +8924,21 @@ jobs: run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14t-cpu-test: # Testing + wheel-py3_14t-cuda12_6-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14t-cpu-build + - wheel-py3_14t-cuda12_6-build - get-label-type - runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge" + runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14t" steps: @@ -7085,7 +8957,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -7135,7 +9011,7 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14t-cpu + name: wheel-py3_14t-cuda12_6 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash @@ -7156,39 +9032,42 @@ jobs: if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14t-cpu-upload: # Uploading + wheel-py3_14t-cuda12_6-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14t-cpu-test + needs: wheel-py3_14t-cuda12_6-test with: PYTORCH_ROOT: ${{ github.workspace }} PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cpu - GPU_ARCH_TYPE: cpu + DESIRED_CUDA: cu126 + GPU_ARCH_VERSION: "12.6" + GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.14t" - build_name: wheel-py3_14t-cpu + build_name: wheel-py3_14t-cuda12_6 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} +<<<<<<< HEAD R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +======= uses: ./.github/workflows/_binary-upload.yml - wheel-py3_14t-cuda12_6-build: + wheel-py3_14t-cuda12_8-build: if: ${{ github.repository_owner == 'pytorch' }} needs: get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.12xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14t" @@ -7217,7 +9096,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -7250,52 +9129,56 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_build.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh" - uses: actions/upload-artifact@v4.4.0 if: always() with: - name: wheel-py3_14t-cuda12_6 + name: wheel-py3_14t-cuda12_8 retention-days: 14 if-no-files-found: error path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14t-cuda12_6-test: # Testing + wheel-py3_14t-cuda12_8-test: # Testing if: ${{ github.repository_owner == 'pytorch' }} needs: - - wheel-py3_14t-cuda12_6-build + - wheel-py3_14t-cuda12_8-build - get-label-type runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge" timeout-minutes: 360 env: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda SKIP_ALL_TESTS: 1 DESIRED_PYTHON: "3.14t" @@ -7315,7 +9198,7 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" - uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -7348,11 +9231,13 @@ jobs: uses: actions/checkout@v4 with: submodules: recursive + path: pytorch show-progress: false - name: Clean PyTorch checkout run: | # Remove any artifacts from the previous checkouts git clean -fxd + working-directory: pytorch # NOTE: These environment variables are put here so that they can be applied on every job equally # They are also here because setting them at a workflow level doesn't give us access to the # runner.temp variable, which we need. @@ -7365,48 +9250,48 @@ jobs: - uses: actions/download-artifact@v4.1.7 name: Download Build Artifacts with: - name: wheel-py3_14t-cuda12_6 + name: wheel-py3_14t-cuda12_8 path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}" - name: Populate binary env shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_populate_env.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Test PyTorch binary shell: bash run: | - "${PYTORCH_ROOT}/.ci/pytorch/binary_windows_test.sh" + "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh" - name: Wait until all sessions have drained shell: powershell + working-directory: pytorch if: always() timeout-minutes: 120 run: | .github\scripts\wait_for_ssh_to_drain.ps1 - name: Kill active ssh sessions if still around (Useful if workflow was cancelled) shell: powershell + working-directory: pytorch if: always() run: | .github\scripts\kill_active_ssh_sessions.ps1 - wheel-py3_14t-cuda12_6-upload: # Uploading + wheel-py3_14t-cuda12_8-upload: # Uploading if: ${{ github.repository_owner == 'pytorch' }} permissions: id-token: write contents: read - needs: wheel-py3_14t-cuda12_6-test + needs: wheel-py3_14t-cuda12_8-test with: - PYTORCH_ROOT: ${{ github.workspace }} + PYTORCH_ROOT: ${{ github.workspace }}/pytorch PACKAGE_TYPE: wheel # TODO: This is a legacy variable that we eventually want to get rid of in # favor of GPU_ARCH_VERSION - DESIRED_CUDA: cu126 - GPU_ARCH_VERSION: "12.6" + DESIRED_CUDA: cu128 + GPU_ARCH_VERSION: "12.8" GPU_ARCH_TYPE: cuda DESIRED_PYTHON: "3.14t" - build_name: wheel-py3_14t-cuda12_6 + build_name: wheel-py3_14t-cuda12_8 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }} - R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} - R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} +>>>>>>> upstream/release/2.11 uses: ./.github/workflows/_binary-upload.yml wheel-py3_14t-cuda13_0-build: if: ${{ github.repository_owner == 'pytorch' }} @@ -7448,7 +9333,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -7546,7 +9435,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -7910,7 +9803,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} @@ -8007,7 +9904,11 @@ jobs: echo "instance-type: $(get_ec2_metadata instance-type)" echo "system info $(uname -a)" - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.11 +>>>>>>> upstream/release/2.11 continue-on-error: true with: github-secret: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/h100-cutlass-backend.yml b/.github/workflows/h100-cutlass-backend.yml index c3778fcc9c8ef..05dc330b86d06 100644 --- a/.github/workflows/h100-cutlass-backend.yml +++ b/.github/workflows/h100-cutlass-backend.yml @@ -28,7 +28,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/h100-distributed.yml b/.github/workflows/h100-distributed.yml index b004a4b38a481..d97e90ca165bc 100644 --- a/.github/workflows/h100-distributed.yml +++ b/.github/workflows/h100-distributed.yml @@ -25,7 +25,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/h100-symm-mem.yml b/.github/workflows/h100-symm-mem.yml index f00f1d0583ff2..6d77090dab4de 100644 --- a/.github/workflows/h100-symm-mem.yml +++ b/.github/workflows/h100-symm-mem.yml @@ -25,7 +25,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/inductor-micro-benchmark.yml b/.github/workflows/inductor-micro-benchmark.yml index 0bc84e9a26b39..c2d660a48a541 100644 --- a/.github/workflows/inductor-micro-benchmark.yml +++ b/.github/workflows/inductor-micro-benchmark.yml @@ -21,7 +21,11 @@ permissions: jobs: get-default-label-prefix: name: get-default-label-prefix +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-nightly.yml b/.github/workflows/inductor-nightly.yml index dd950d4b1b099..51a3d98664e9a 100644 --- a/.github/workflows/inductor-nightly.yml +++ b/.github/workflows/inductor-nightly.yml @@ -24,7 +24,11 @@ permissions: jobs: get-default-label-prefix: name: get-default-label-prefix +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-pallas.yml b/.github/workflows/inductor-pallas.yml index 88e43044b5af0..b33e9958e80c2 100644 --- a/.github/workflows/inductor-pallas.yml +++ b/.github/workflows/inductor-pallas.yml @@ -21,7 +21,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-compare.yml b/.github/workflows/inductor-perf-compare.yml index 93f3d30fea56e..236ea42542541 100644 --- a/.github/workflows/inductor-perf-compare.yml +++ b/.github/workflows/inductor-perf-compare.yml @@ -19,7 +19,11 @@ jobs: get-default-label-prefix: if: github.repository_owner == 'pytorch' name: get-default-label-prefix +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/inductor-perf-test-b200.yml b/.github/workflows/inductor-perf-test-b200.yml index 029ad1fe8a019..115407a689544 100644 --- a/.github/workflows/inductor-perf-test-b200.yml +++ b/.github/workflows/inductor-perf-test-b200.yml @@ -69,7 +69,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-aarch64.yml b/.github/workflows/inductor-perf-test-nightly-aarch64.yml index 1e1524495a402..049a526bed5c3 100644 --- a/.github/workflows/inductor-perf-test-nightly-aarch64.yml +++ b/.github/workflows/inductor-perf-test-nightly-aarch64.yml @@ -56,7 +56,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-h100.yml b/.github/workflows/inductor-perf-test-nightly-h100.yml index 5b5f89bee0073..135bcbeea0968 100644 --- a/.github/workflows/inductor-perf-test-nightly-h100.yml +++ b/.github/workflows/inductor-perf-test-nightly-h100.yml @@ -79,7 +79,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-rocm-mi300.yml b/.github/workflows/inductor-perf-test-nightly-rocm-mi300.yml index f904bd7ab7b7f..fd2b6c839b79e 100644 --- a/.github/workflows/inductor-perf-test-nightly-rocm-mi300.yml +++ b/.github/workflows/inductor-perf-test-nightly-rocm-mi300.yml @@ -68,7 +68,11 @@ permissions: read-all jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-rocm-mi355.yml b/.github/workflows/inductor-perf-test-nightly-rocm-mi355.yml index 7e06a39b4522b..b5919bb7b8f69 100644 --- a/.github/workflows/inductor-perf-test-nightly-rocm-mi355.yml +++ b/.github/workflows/inductor-perf-test-nightly-rocm-mi355.yml @@ -68,7 +68,11 @@ permissions: read-all jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index 7f78e1706923e..40d4c13a6162a 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -66,7 +66,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-x86.yml b/.github/workflows/inductor-perf-test-nightly-x86.yml index ef1a7224c9098..c62b317e64723 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86.yml @@ -66,7 +66,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly-xpu.yml b/.github/workflows/inductor-perf-test-nightly-xpu.yml index 404c6db4fc381..c044ad89f621d 100644 --- a/.github/workflows/inductor-perf-test-nightly-xpu.yml +++ b/.github/workflows/inductor-perf-test-nightly-xpu.yml @@ -68,7 +68,11 @@ permissions: read-all jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml index ddcdc85e20b2f..093f78b2da4cb 100644 --- a/.github/workflows/inductor-perf-test-nightly.yml +++ b/.github/workflows/inductor-perf-test-nightly.yml @@ -69,7 +69,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index 64e79ee3ab941..953c432558679 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -23,7 +23,11 @@ permissions: jobs: get-default-label-prefix: name: get-default-label-prefix +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} @@ -60,9 +64,12 @@ jobs: { config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, +<<<<<<< HEAD { config: "inductor_huggingface_unbacked_parity", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "inductor_huggingface_unbacked_parity", shard: 1, num_shards: 1, runner: "linux.aws.a100" }, { config: "inductor_huggingface_unbacked_parity", shard: 1, num_shards: 1, runner: "linux.aws.h100" }, +======= +>>>>>>> upstream/release/2.11 { config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, diff --git a/.github/workflows/inductor-rocm-mi200.yml b/.github/workflows/inductor-rocm-mi200.yml index b874352a55701..c4798f931c8fe 100644 --- a/.github/workflows/inductor-rocm-mi200.yml +++ b/.github/workflows/inductor-rocm-mi200.yml @@ -22,7 +22,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-rocm-mi300.yml b/.github/workflows/inductor-rocm-mi300.yml index 44801016478da..459c1d3cadc5b 100644 --- a/.github/workflows/inductor-rocm-mi300.yml +++ b/.github/workflows/inductor-rocm-mi300.yml @@ -27,7 +27,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-rocm-mi355.yml b/.github/workflows/inductor-rocm-mi355.yml index 0380206235c43..5701fb64677a9 100644 --- a/.github/workflows/inductor-rocm-mi355.yml +++ b/.github/workflows/inductor-rocm-mi355.yml @@ -26,7 +26,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor-unittest.yml b/.github/workflows/inductor-unittest.yml index a2eb23c7b68ce..118bd994f8d98 100644 --- a/.github/workflows/inductor-unittest.yml +++ b/.github/workflows/inductor-unittest.yml @@ -23,7 +23,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 66441b8fbba02..a27c9dc06b0f8 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -36,7 +36,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} @@ -74,7 +78,10 @@ jobs: build-environment: ${{ needs.inductor-build.outputs.build-environment }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} +<<<<<<< HEAD enable-torch-trace: "1" +======= +>>>>>>> upstream/release/2.11 secrets: inherit inductor-cpu-build: diff --git a/.github/workflows/lint-autoformat.yml b/.github/workflows/lint-autoformat.yml new file mode 100644 index 0000000000000..66acb3eab1f89 --- /dev/null +++ b/.github/workflows/lint-autoformat.yml @@ -0,0 +1,42 @@ +name: Apply lint suggestions + +on: + pull_request: + types: [opened, synchronize, reopened, labeled, unlabeled] + +jobs: + lintrunner-autoformat: + permissions: + contents: read + pull-requests: write + runs-on: lf.linux.2xlarge + if: ${{ github.repository_owner == 'pytorch' && contains(github.event.pull_request.labels.*.name, 'autoformat') }} + steps: + - name: Checkout pytorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + with: + submodules: true + fetch-depth: 0 + - name: Run lintrunner (nonretryable) + continue-on-error: true + run: | + set -ex + python3 -m venv /tmp/venv + source /tmp/venv/bin/activate + export ADDITIONAL_LINTRUNNER_ARGS="format --all-files" + bash .github/scripts/lintrunner.sh + - name: Check for changes + id: git-check + continue-on-error: true + run: | + git diff --exit-code || echo "changes=true" >> "$GITHUB_OUTPUT" + - name: Suggest changes + if: steps.git-check.outputs.changes == 'true' + continue-on-error: true + uses: parkerbxyz/suggest-changes@a2ec1653b0c4cc8287d682f0066dba4a173cc7f3 # v1.0.8 + with: + comment: "Please commit the suggested changes from pytorch's linter." + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true diff --git a/.github/workflows/lint-bc.yml b/.github/workflows/lint-bc.yml index bdba4fb224f26..99f8967eba843 100644 --- a/.github/workflows/lint-bc.yml +++ b/.github/workflows/lint-bc.yml @@ -20,7 +20,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Run BC Lint Action +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/bc-lint@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/bc-lint@release/2.11 +>>>>>>> upstream/release/2.11 with: repo: ${{ github.event.pull_request.head.repo.full_name }} base_sha: ${{ github.event.pull_request.base.sha }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f1c1bede5004e..61646e20d37eb 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,7 +22,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -36,7 +40,11 @@ jobs: all_files: ${{ contains(github.event.pull_request.labels.*.name, 'lint-all-files') || contains(github.event.pull_request.labels.*.name, 'Reverted') || github.event_name == 'push' }} lintrunner-clang: +<<<<<<< HEAD uses: ./.github/workflows/_lint.yml +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 # Needed to prevent deduping on HUD name: lintrunner-clang-${{ needs.get-changed-files.outputs.changed-files == '*' && 'all' || 'partial' }} needs: [get-label-type, get-changed-files] @@ -72,7 +80,11 @@ jobs: # fails to find types when it should # NOTE: We should be able to disable this and consolidate with Pyrefly lintrunner-pyrefly: +<<<<<<< HEAD uses: ./.github/workflows/_lint.yml +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 name: lintrunner-pyrefly-${{ needs.get-changed-files.outputs.changed-files == '*' && 'all' || 'partial' }} needs: [get-label-type, get-changed-files] # Only run if there are changed files relevant to pyrefly @@ -91,7 +103,11 @@ jobs: ADDITIONAL_LINTRUNNER_ARGS="--take PYREFLY --all-files" .github/scripts/lintrunner.sh lintrunner-noclang: +<<<<<<< HEAD uses: ./.github/workflows/_lint.yml +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 name: lintrunner-noclang-${{ needs.get-changed-files.outputs.changed-files == '*' && 'all' || 'partial' }} needs: [get-label-type, get-changed-files] with: @@ -107,7 +123,11 @@ jobs: fi quick-checks: +<<<<<<< HEAD if: github.repository_owner == 'pytorch' +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 needs: get-label-type uses: ./.github/workflows/_lint.yml with: @@ -144,7 +164,11 @@ jobs: if: ${{ github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'skip-pr-sanity-checks') && github.repository_owner == 'pytorch' }} steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false fetch-depth: -1 @@ -157,7 +181,11 @@ jobs: bash .github/scripts/pr-sanity-check.sh workflow-checks: +<<<<<<< HEAD if: github.repository_owner == 'pytorch' +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 needs: get-label-type uses: ./.github/workflows/_lint.yml with: @@ -165,7 +193,12 @@ jobs: docker-image: ghcr.io/pytorch/test-infra:cpu-x86_64-810d48d script: | # Regenerate workflows +<<<<<<< HEAD RELEASE_VERSION_TAG=2.12 .github/scripts/generate_ci_workflows.py +======= + export RELEASE_VERSION_TAG=2.11 + .github/scripts/generate_ci_workflows.py +>>>>>>> upstream/release/2.11 RC=0 # Assert that regenerating the workflows didn't change them @@ -188,7 +221,11 @@ jobs: exit $RC toc: +<<<<<<< HEAD if: github.repository_owner == 'pytorch' +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 needs: get-label-type uses: ./.github/workflows/_lint.yml with: @@ -222,6 +259,10 @@ jobs: test-tools: name: Test tools if: ${{ github.repository == 'pytorch/pytorch' }} +<<<<<<< HEAD +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 needs: get-label-type uses: ./.github/workflows/_lint.yml with: @@ -239,7 +280,11 @@ jobs: runs-on: linux.24_04.4x steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false fetch-depth: 1 @@ -276,7 +321,11 @@ jobs: # [see note: pytorch repo ref] # deep clone (fetch-depth 0) required, to allow us to use git log - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false fetch-depth: 1 diff --git a/.github/workflows/linux-aarch64.yml b/.github/workflows/linux-aarch64.yml new file mode 100644 index 0000000000000..3636c4e626c27 --- /dev/null +++ b/.github/workflows/linux-aarch64.yml @@ -0,0 +1,62 @@ +name: linux-aarch64 + +on: + push: + branches: + - main + - release/* + tags: + - ciflow/linux-aarch64/* + - ciflow/trunk/* + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' && github.run_id }}-${{ github.event_name == 'schedule' }} + cancel-in-progress: true + +jobs: + + get-label-type: + if: github.repository_owner == 'pytorch' + name: get-label-type + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + + linux-jammy-aarch64-py3_10-build: + name: linux-jammy-aarch64-py3.10 + uses: ./.github/workflows/_linux-build.yml + needs: get-label-type + with: + runner_prefix: ${{ needs.get-label-type.outputs.label-type }} + build-environment: linux-jammy-aarch64-py3.10 + docker-image-name: ci-image:pytorch-linux-jammy-aarch64-py3.10-gcc13 + runner: linux.arm64.m7g.4xlarge + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" }, + { config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" }, + { config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" }, + { config: "openreg", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m7g.4xlarge" }, + { config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m8g.4xlarge" }, + { config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m8g.4xlarge" }, + { config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m8g.4xlarge" }, + { config: "openreg", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.m8g.4xlarge" }, + ]} + secrets: inherit + + linux-jammy-aarch64-py3_10-test: + name: linux-jammy-aarch64-py3.10 + uses: ./.github/workflows/_linux-test.yml + needs: linux-jammy-aarch64-py3_10-build + permissions: + id-token: write + contents: read + with: + build-environment: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.build-environment }} + docker-image: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-jammy-aarch64-py3_10-build.outputs.test-matrix }} + secrets: inherit diff --git a/.github/workflows/llm_td_retrieval.yml b/.github/workflows/llm_td_retrieval.yml index 534cf7aae2807..c6db278c75cb5 100644 --- a/.github/workflows/llm_td_retrieval.yml +++ b/.github/workflows/llm_td_retrieval.yml @@ -12,7 +12,11 @@ jobs: name: get-label-type # Don't run on forked repos if: github.repository_owner == 'pytorch' +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -109,5 +113,9 @@ jobs: AWS_REGION: "" - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() diff --git a/.github/workflows/nightly-s3-uploads.yml b/.github/workflows/nightly-s3-uploads.yml index 5ceb50923be56..0757078885bb2 100644 --- a/.github/workflows/nightly-s3-uploads.yml +++ b/.github/workflows/nightly-s3-uploads.yml @@ -23,7 +23,11 @@ jobs: environment: upload-stats steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: fetch-depth: 1 submodules: false diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index cdb4931097fa6..40d72aae4207d 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -26,7 +26,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} @@ -103,7 +107,11 @@ jobs: if: github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') steps: - name: "${{ matrix.repo-owner }}/${{ matrix.repo-name }} update-commit-hash" +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.11 +>>>>>>> upstream/release/2.11 with: repo-owner: ${{ matrix.repo-owner }} repo-name: ${{ matrix.repo-name }} diff --git a/.github/workflows/nitpicker.yml b/.github/workflows/nitpicker.yml index cb41646745892..487ffec5752e3 100644 --- a/.github/workflows/nitpicker.yml +++ b/.github/workflows/nitpicker.yml @@ -19,8 +19,13 @@ jobs: if: ${{ github.event.pull_request.number != 26921 && github.repository_owner == 'pytorch' }} steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 - uses: ethanis/nitpicker@c102a39683a80c7db9065f8eab7de8b58871f946 # v1 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + - uses: ethanis/nitpicker@v1 +>>>>>>> upstream/release/2.11 with: nitpicks: '.github/nitpicks.yml' token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/operator_microbenchmark.yml b/.github/workflows/operator_microbenchmark.yml index 1a7c2d8294c29..5cae4d6a00ec0 100644 --- a/.github/workflows/operator_microbenchmark.yml +++ b/.github/workflows/operator_microbenchmark.yml @@ -22,7 +22,11 @@ permissions: jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/periodic-rocm-mi200.yml b/.github/workflows/periodic-rocm-mi200.yml index e7d4bee98d4fa..8f3477112c1e6 100644 --- a/.github/workflows/periodic-rocm-mi200.yml +++ b/.github/workflows/periodic-rocm-mi200.yml @@ -32,7 +32,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/periodic-rocm-mi300.yml b/.github/workflows/periodic-rocm-mi300.yml index 459528ef6442f..bdcf6968bfb17 100644 --- a/.github/workflows/periodic-rocm-mi300.yml +++ b/.github/workflows/periodic-rocm-mi300.yml @@ -32,7 +32,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/periodic-rocm-mi355.yml b/.github/workflows/periodic-rocm-mi355.yml index 190041a7a6c3e..ae426ea6fc75f 100644 --- a/.github/workflows/periodic-rocm-mi355.yml +++ b/.github/workflows/periodic-rocm-mi355.yml @@ -33,7 +33,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index b44a3bff003dd..456fc71f7116c 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -38,7 +38,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 70b0218d79f4b..6a11be3f36aa5 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -56,7 +56,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} @@ -504,9 +508,43 @@ jobs: python-version: "3.10" secrets: inherit +<<<<<<< HEAD # ╠══════════════════════════════════════════════════════════════════════╣ # ║ linux-jammy-xpu-n-py3.10 (build) ║ # ╠══════════════════════════════════════════════════════════════════════╣ +======= + linux-jammy-cuda13_0-py3_10-gcc11-inductor-build: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cuda13.0-py3.10-gcc11-sm75 ') }} + name: cuda13.0-py3.10-gcc11-sm75 + uses: ./.github/workflows/_linux-build.yml + needs: + - get-label-type + - job-filter + with: + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build-environment: linux-jammy-cuda13.0-py3.10-gcc11-sm75 + docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11-inductor-benchmarks + cuda-arch-list: '7.5' + test-matrix: | + { include: [ + { config: "pr_time_benchmarks", shard: 1, num_shards: 1, runner: "linux.g4dn.metal.nvidia.gpu" }, + ]} + secrets: inherit + + linux-jammy-cuda13_0-py3_10-gcc11-inductor-test: + if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' cuda13.0-py3.10-gcc11-sm75 ') }} + name: cuda13.0-py3.10-gcc11-sm75 + uses: ./.github/workflows/_linux-test.yml + needs: + - linux-jammy-cuda13_0-py3_10-gcc11-inductor-build + - job-filter + with: + build-environment: linux-jammy-cuda13.0-py3.10-gcc11-sm75 + docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-inductor-build.outputs.test-matrix }} + tests-to-include: ${{ github.event.inputs.tests-to-include || '' }} + secrets: inherit +>>>>>>> upstream/release/2.11 linux-jammy-xpu-n-py3_10-build: if: ${{ needs.job-filter.outputs.jobs == '' || contains(needs.job-filter.outputs.jobs, ' linux-jammy-xpu-n-py3.10 ') }} diff --git a/.github/workflows/quantization-periodic.yml b/.github/workflows/quantization-periodic.yml index 3302ebc351c28..e3cca98d3f941 100644 --- a/.github/workflows/quantization-periodic.yml +++ b/.github/workflows/quantization-periodic.yml @@ -21,7 +21,11 @@ permissions: jobs: get-default-label-prefix: name: get-default-label-prefix +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/rocm-mi200.yml b/.github/workflows/rocm-mi200.yml index 426089e8774ef..c5ceed1f34a80 100644 --- a/.github/workflows/rocm-mi200.yml +++ b/.github/workflows/rocm-mi200.yml @@ -28,7 +28,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/rocm-mi300.yml b/.github/workflows/rocm-mi300.yml index 2bc8e68aaf9c1..f90c1b8ba44a2 100644 --- a/.github/workflows/rocm-mi300.yml +++ b/.github/workflows/rocm-mi300.yml @@ -27,7 +27,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/rocm-mi355.yml b/.github/workflows/rocm-mi355.yml index 452d29b378834..ae9e560a0042b 100644 --- a/.github/workflows/rocm-mi355.yml +++ b/.github/workflows/rocm-mi355.yml @@ -25,7 +25,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/rocm-navi31.yml b/.github/workflows/rocm-navi31.yml index 2c78e25a1785a..c7f0d4543ea53 100644 --- a/.github/workflows/rocm-navi31.yml +++ b/.github/workflows/rocm-navi31.yml @@ -28,7 +28,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/rocm-nightly.yml b/.github/workflows/rocm-nightly.yml index 5dc1ba4d3a410..0c6dd41e1fdd1 100644 --- a/.github/workflows/rocm-nightly.yml +++ b/.github/workflows/rocm-nightly.yml @@ -28,7 +28,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/slow-rocm-mi200.yml b/.github/workflows/slow-rocm-mi200.yml index fe17280b5b2e8..8093dd6f59560 100644 --- a/.github/workflows/slow-rocm-mi200.yml +++ b/.github/workflows/slow-rocm-mi200.yml @@ -36,7 +36,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/slow.yml b/.github/workflows/slow.yml index d5cb9d02c75fa..98334f5df07ff 100644 --- a/.github/workflows/slow.yml +++ b/.github/workflows/slow.yml @@ -36,7 +36,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/target-determination-indexer.yml b/.github/workflows/target-determination-indexer.yml index 01ab07ece6a67..3cbe5323cc204 100644 --- a/.github/workflows/target-determination-indexer.yml +++ b/.github/workflows/target-determination-indexer.yml @@ -13,7 +13,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -38,7 +42,11 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11 working-directory: pytorch @@ -53,13 +61,21 @@ jobs: echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" - name: Pull docker image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG id: install-nvidia-driver +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.11 +>>>>>>> upstream/release/2.11 - name: Clone CodeLlama uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -152,7 +168,11 @@ jobs: "s3://target-determinator-assets/indexes/latest/${ZIP_NAME}" - name: Teardown Linux +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.11 +>>>>>>> upstream/release/2.11 if: always() concurrency: diff --git a/.github/workflows/target_determination.yml b/.github/workflows/target_determination.yml index ded830557d4e9..7fc4ea7da76d9 100644 --- a/.github/workflows/target_determination.yml +++ b/.github/workflows/target_determination.yml @@ -9,7 +9,11 @@ jobs: name: get-label-type # Don't run on forked repos if: github.repository_owner == 'pytorch' +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} @@ -22,8 +26,17 @@ jobs: runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" needs: get-label-type steps: +<<<<<<< HEAD - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 +======= + # [pytorch repo ref] + # Use a pytorch/pytorch reference instead of a reference to the local + # checkout because when we run this action we don't *have* a local + # checkout. In other cases you should prefer a local checkout. + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false diff --git a/.github/workflows/test-b200.yml b/.github/workflows/test-b200.yml index 1a1dba622da74..5f1570ab75013 100644 --- a/.github/workflows/test-b200.yml +++ b/.github/workflows/test-b200.yml @@ -42,7 +42,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/test-check-binary.yml b/.github/workflows/test-check-binary.yml index d60f816d95b08..00977f93afe47 100644 --- a/.github/workflows/test-check-binary.yml +++ b/.github/workflows/test-check-binary.yml @@ -15,7 +15,11 @@ jobs: check_binary_linux_cpu: if: github.repository_owner == 'pytorch' name: Test check_binary.sh for Linux CPU +<<<<<<< HEAD uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.12 +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: docker-image: python:3.11 docker-build-dir: "skip-docker-build" @@ -30,7 +34,11 @@ jobs: check_binary_linux_cuda: if: github.repository_owner == 'pytorch' name: Test check_binary.sh for Linux CUDA +<<<<<<< HEAD uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.12 +======= + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: runner: linux.g4dn.4xlarge.nvidia.gpu docker-image: python:3.11 diff --git a/.github/workflows/test-h100.yml b/.github/workflows/test-h100.yml index 5964585f1a839..ac1a70d208387 100644 --- a/.github/workflows/test-h100.yml +++ b/.github/workflows/test-h100.yml @@ -29,7 +29,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/tools-unit-tests.yml b/.github/workflows/tools-unit-tests.yml index cb58f534eb0da..60aad933eba6a 100644 --- a/.github/workflows/tools-unit-tests.yml +++ b/.github/workflows/tools-unit-tests.yml @@ -25,7 +25,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout pytorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: true fetch-depth: 0 @@ -52,7 +56,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout pytorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: true fetch-depth: 0 diff --git a/.github/workflows/torchbench.yml b/.github/workflows/torchbench.yml index 8195a87720a1e..cfde20de5a3c5 100644 --- a/.github/workflows/torchbench.yml +++ b/.github/workflows/torchbench.yml @@ -19,7 +19,11 @@ jobs: get-default-label-prefix: if: github.repository_owner == 'pytorch' name: get-default-label-prefix +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/.github/workflows/trunk-rocm-sandbox.yml b/.github/workflows/trunk-rocm-sandbox.yml index 9e80cd282769d..bef522fcf78c1 100644 --- a/.github/workflows/trunk-rocm-sandbox.yml +++ b/.github/workflows/trunk-rocm-sandbox.yml @@ -31,7 +31,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index d7e1d0b1cab24..9a202097a6d11 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -53,7 +53,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/unstable.yml b/.github/workflows/unstable.yml index e346eb1d23386..17a826f3ff3bc 100644 --- a/.github/workflows/unstable.yml +++ b/.github/workflows/unstable.yml @@ -46,7 +46,11 @@ jobs: get-label-type: name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} with: triggering_actor: ${{ github.triggering_actor }} diff --git a/.github/workflows/update-viablestrict.yml b/.github/workflows/update-viablestrict.yml index 84b6a073ffd8f..e8423cc2bf1af 100644 --- a/.github/workflows/update-viablestrict.yml +++ b/.github/workflows/update-viablestrict.yml @@ -18,7 +18,11 @@ jobs: environment: ${{ (github.event_name == 'schedule') && 'mergebot' || '' }} steps: - name: Update viable/strict +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/update-viablestrict@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/update-viablestrict@release/2.11 +>>>>>>> upstream/release/2.11 id: update_viablestrict with: repository: pytorch/pytorch diff --git a/.github/workflows/update_pytorch_labels.yml b/.github/workflows/update_pytorch_labels.yml index 066d331ddbbac..1c09f8f8f57eb 100644 --- a/.github/workflows/update_pytorch_labels.yml +++ b/.github/workflows/update_pytorch_labels.yml @@ -17,7 +17,11 @@ jobs: contents: read steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: fetch-depth: 1 submodules: false diff --git a/.github/workflows/upload-test-stats-while-running.yml b/.github/workflows/upload-test-stats-while-running.yml index 43811a3bf4bf2..9d158c09e7e32 100644 --- a/.github/workflows/upload-test-stats-while-running.yml +++ b/.github/workflows/upload-test-stats-while-running.yml @@ -15,6 +15,15 @@ jobs: name: Upload test stats while running runs-on: linux.2xlarge steps: +<<<<<<< HEAD +======= + - name: Checkout PyTorch + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 + with: + fetch-depth: 1 + submodules: false + +>>>>>>> upstream/release/2.11 - name: Setup Linux uses: pytorch/pytorch/.github/actions/setup-linux@release/2.12 with: diff --git a/.github/workflows/upload-test-stats.yml b/.github/workflows/upload-test-stats.yml index 21e49a2b0db17..e766c6c8d01bf 100644 --- a/.github/workflows/upload-test-stats.yml +++ b/.github/workflows/upload-test-stats.yml @@ -66,7 +66,11 @@ jobs: run: echo "${TRIGGERING_WORKFLOW}" - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 - name: Configure aws credentials uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 diff --git a/.github/workflows/upload-torch-dynamo-perf-stats.yml b/.github/workflows/upload-torch-dynamo-perf-stats.yml index 2d0ee2684d0de..d6056562f4bfb 100644 --- a/.github/workflows/upload-torch-dynamo-perf-stats.yml +++ b/.github/workflows/upload-torch-dynamo-perf-stats.yml @@ -32,7 +32,11 @@ jobs: name: Upload dynamo performance stats for ${{ github.event.workflow_run.id }}, attempt ${{ github.event.workflow_run.run_attempt }} steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: submodules: false fetch-depth: 1 diff --git a/.github/workflows/upload_test_stats_intermediate.yml b/.github/workflows/upload_test_stats_intermediate.yml index 2247bba923384..aca8d37abe6f3 100644 --- a/.github/workflows/upload_test_stats_intermediate.yml +++ b/.github/workflows/upload_test_stats_intermediate.yml @@ -17,7 +17,11 @@ jobs: environment: upload-stats steps: - name: Checkout PyTorch +<<<<<<< HEAD uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.12 +======= + uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.11 +>>>>>>> upstream/release/2.11 with: fetch-depth: 1 submodules: false diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index 74f576ae1dadb..3ad3a6dd370f3 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -50,7 +50,11 @@ jobs: torch_cuda_arch_list: '8.0 8.9 9.0 10.0 12.0' build_environment: linux-jammy-cuda13.0-py3.12-gcc11 steps: +<<<<<<< HEAD - uses: pytorch/test-infra/.github/actions/setup-uv@release/2.12 +======= + - uses: pytorch/test-infra/.github/actions/setup-uv@release/2.11 +>>>>>>> upstream/release/2.11 with: python-version: "3.12" activate-environment: "true" @@ -88,7 +92,11 @@ jobs: - name: Calculate docker image id: calculate-docker-image +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.11 +>>>>>>> upstream/release/2.11 with: working-directory: pytorch/pytorch docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3.12-gcc11-vllm diff --git a/.github/workflows/weekly.yml b/.github/workflows/weekly.yml index c5a3311857327..c3d044da25833 100644 --- a/.github/workflows/weekly.yml +++ b/.github/workflows/weekly.yml @@ -22,7 +22,11 @@ jobs: fetch-depth: 0 - name: update-xla-commit-hash continue-on-error: true +<<<<<<< HEAD uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.12 +======= + uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.11 +>>>>>>> upstream/release/2.11 with: repo-name: xla branch: master diff --git a/.github/workflows/xpu.yml b/.github/workflows/xpu.yml index 32853bb956425..fe480e5461b46 100644 --- a/.github/workflows/xpu.yml +++ b/.github/workflows/xpu.yml @@ -24,7 +24,11 @@ jobs: get-label-type: if: github.repository_owner == 'pytorch' name: get-label-type +<<<<<<< HEAD uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.12 +======= + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.11 +>>>>>>> upstream/release/2.11 with: triggering_actor: ${{ github.triggering_actor }} issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} diff --git a/aten/src/ATen/native/cuda/Sorting.cu b/aten/src/ATen/native/cuda/Sorting.cu index dcf535aa0f016..de4fd0309b9c4 100644 --- a/aten/src/ATen/native/cuda/Sorting.cu +++ b/aten/src/ATen/native/cuda/Sorting.cu @@ -29,15 +29,26 @@ __global__ void gatherKthValue( index_t inputWithinSliceStride, cuda::detail::TensorInfo kthValue, cuda::detail::TensorInfo indices) { +<<<<<<< HEAD // smem is used by radixSelect for radix bin counts. Type must be index_t to // handle sliceSize > INT_MAX. #ifndef USE_ROCM __shared__ index_t smem[C10_WARP_SIZE]; // one per each warp, up to warp limit +======= + // Indices are limited to integer fp precision, so counts can fit in + // int32, regardless of index_t +#ifndef USE_ROCM + __shared__ int smem[C10_WARP_SIZE]; // one per each warp, up to warp limit +>>>>>>> upstream/release/2.11 #else // Maximum shared memory size for radix select (used in countRadixAggregateCounts): NUM_BUFFERS * MAX_WARPS * RADIX_SIZE. // HIP workgroups have at most 1024 threads. Warp size is at least 32 (can be 64 on some // architectures), so we use 32 for safety: 2 buffers * (1024/32) warps * 4 radix bins = 256. +<<<<<<< HEAD __shared__ index_t smem[256]; +======= + __shared__ int smem[256]; +>>>>>>> upstream/release/2.11 #endif index_t slice = getLinearBlockId(); @@ -113,15 +124,26 @@ __global__ void gatherMedian( index_t numInputSlices, index_t inputWithinSliceStride, bool ignore_nan) { +<<<<<<< HEAD // smem is used by radixSelect for radix bin counts. Type must be index_t to // handle sliceSize > INT_MAX. #ifndef USE_ROCM __shared__ index_t smem[C10_WARP_SIZE]; // one per each warp, up to warp limit +======= + // Shared memory for the subroutine RadixSelect. Note that RadixSelect converts the + // floating point type to int with the same relative ordering. +#ifndef USE_ROCM + __shared__ int smem[C10_WARP_SIZE]; // one per each warp, up to warp limit +>>>>>>> upstream/release/2.11 #else // Maximum shared memory size for radix select (used in countRadixAggregateCounts): NUM_BUFFERS * MAX_WARPS * RADIX_SIZE. // HIP workgroups have at most 1024 threads. Warp size is at least 32 (can be 64 on some // architectures), so we use 32 for safety: 2 buffers * (1024/32) warps * 4 radix bins = 256. +<<<<<<< HEAD __shared__ index_t smem[256]; +======= + __shared__ int smem[256]; +>>>>>>> upstream/release/2.11 #endif index_t slice = getLinearBlockId(); diff --git a/aten/src/ATen/native/cuda/SortingRadixSelect.cuh b/aten/src/ATen/native/cuda/SortingRadixSelect.cuh index 67c15e0dd3385..7523aeba6c281 100644 --- a/aten/src/ATen/native/cuda/SortingRadixSelect.cuh +++ b/aten/src/ATen/native/cuda/SortingRadixSelect.cuh @@ -474,6 +474,7 @@ __device__ __forceinline__ void countRadixAggregateCounts( // Maximum number of warps per workgroup. HIP workgroups have at most 1024 threads. // Warp size is at least 32 (can be 64 on some architectures), so we use 32 for safety. // This sizes shared memory buffers to accommodate all possible warps: 1024/32 = 32. +<<<<<<< HEAD constexpr uint MAX_WARPS = 1024/C10_WARP_SIZE_LOWER_BOUND; const int buffer_offset = buffer_index * MAX_WARPS * RadixSize; // offset of the buffer in smem. const uint WARP_BITS = __builtin_ctz(C10_WARP_SIZE); @@ -481,6 +482,15 @@ __device__ __forceinline__ void countRadixAggregateCounts( const uint num_warps = blockDim.x >> WARP_BITS; // Actual number of warps in this block const uint warp_id = threadIdx.x >> WARP_BITS; // = threadIdx.x / C10_WARP_SIZE const int lane_id = at::cuda::getLaneId(); // = threadIdx.x % C10_WARP_SIZE +======= + constexpr uint MAX_WARPS = 1024/32; + const int buffer_offset = buffer_index * MAX_WARPS * RadixSize; // offset of the buffer in smem. + const uint WARP_BITS = __builtin_ctz(warpSize); + + const uint num_warps = blockDim.x >> WARP_BITS; // Actual number of warps in this block + const uint warp_id = threadIdx.x >> WARP_BITS; // = threadIdx.x / warpSize + const int lane_id = at::cuda::getLaneId(); // = threadIdx.x % warpSize +>>>>>>> upstream/release/2.11 // Stage 1: Each warp's lane 0 stores its counts in smem. // Layout after Stage 1: [warp0: all radix bins], [warp1: all radix bins], ... @@ -573,7 +583,11 @@ __device__ void countRadixUsingMaskDataSmem( // current warp. if (dataSmemSize > 0) { // if shared memory is filled, use dataSmem as the input data. +<<<<<<< HEAD countRadixLoop( +======= + countRadixLoop( +>>>>>>> upstream/release/2.11 counts, desired, desiredMask, @@ -581,7 +595,11 @@ __device__ void countRadixUsingMaskDataSmem( dataSmemSize, [&](index_t i) -> scalar_t { return dataSmem[i]; }); } else { // if shared memory is not filled, fall back to global memory. +<<<<<<< HEAD countRadixLoop( +======= + countRadixLoop( +>>>>>>> upstream/release/2.11 counts, desired, desiredMask, diff --git a/aten/src/ATen/native/cuda/TensorTopK.cu b/aten/src/ATen/native/cuda/TensorTopK.cu index 2d21944e92ceb..eb1e600be2a4c 100644 --- a/aten/src/ATen/native/cuda/TensorTopK.cu +++ b/aten/src/ATen/native/cuda/TensorTopK.cu @@ -262,15 +262,25 @@ __global__ void gatherTopK(at::cuda::detail::TensorInfo inpu IndexType indicesWithinSliceStride, T* kthValues) { +<<<<<<< HEAD // smem and counts must use IndexType to safely handle sliceSize > INT_MAX. // In radix selection, counts tracks elements matching a radix pattern, // which can exceed INT_MAX when billions of elements fall into one bin. +======= + // Indices are limited to integer fp precision, so counts can fit in + // int32, regardless of IndexType +>>>>>>> upstream/release/2.11 // Maximum shared memory size for radix select (used in countRadixAggregateCounts): NUM_BUFFERS * MAX_WARPS * RADIX_SIZE. // HIP workgroups have at most 1024 threads. Warp size is at least 32 (can be 64 on some // architectures), so we use 32 for safety: 2 buffers * (1024/32) warps * 4 radix bins = 256. +<<<<<<< HEAD __shared__ IndexType smem[256]; __shared__ IndexType writeIndexStart; // index to track where to write results. This is shared by all threads in the block. Increases atomically. +======= + __shared__ int smem[256]; + __shared__ int writeIndexStart; // index to track where to write results. This is shared by all threads in the block. Increases atomically. +>>>>>>> upstream/release/2.11 IndexType slice = getLinearBlockId(); if (slice >= numInputSlices) { diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv index 2712e2d1a5012..568c9b8997e81 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv @@ -50,7 +50,7 @@ LayoutLMForMaskedLM,pass,0 -M2M100ForConditionalGeneration,pass,0 +M2M100ForConditionalGeneration,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv index 2712e2d1a5012..568c9b8997e81 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv @@ -50,7 +50,7 @@ LayoutLMForMaskedLM,pass,0 -M2M100ForConditionalGeneration,pass,0 +M2M100ForConditionalGeneration,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv index 1333ff866469e..48bcca9f4e1ab 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv @@ -50,7 +50,7 @@ LayoutLMForMaskedLM,pass,0 -M2M100ForConditionalGeneration,pass,0 +M2M100ForConditionalGeneration,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv index 452bf8707fbfb..e2869a15a1263 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv @@ -50,7 +50,7 @@ LayoutLMForMaskedLM,pass,5 -M2M100ForConditionalGeneration,pass,4 +M2M100ForConditionalGeneration,pass,11 diff --git a/benchmarks/dynamo/timm_models.py b/benchmarks/dynamo/timm_models.py index 09e7647537a4a..d5aab015869c4 100755 --- a/benchmarks/dynamo/timm_models.py +++ b/benchmarks/dynamo/timm_models.py @@ -56,6 +56,57 @@ def pip_install(package): TIMM_MODELS[model_name] = int(batch_size) +<<<<<<< HEAD +======= +# TODO - Figure out the reason of cold start memory spike + +BATCH_SIZE_DIVISORS = { + "beit_base_patch16_224": 2, + "deit_base_distilled_patch16_224": 2, + "gluon_xception65": 2, + "mobilevit_s": 2, + "swin_base_patch4_window7_224": 2, +} + +REQUIRE_HIGHER_TOLERANCE = { + "inception_v3", + "mobilenetv3_large_100", +} + +REQUIRE_HIGHER_TOLERANCE_FP16_XPU = { + "botnet26t_256", +} + +REQUIRE_HIGHER_TOLERANCE_AMP = {} + +REQUIRE_EVEN_HIGHER_TOLERANCE = { + "deit_base_distilled_patch16_224", + "vit_base_patch16_siglip_256", +} + +# These models need higher tolerance in MaxAutotune mode +REQUIRE_EVEN_HIGHER_TOLERANCE_MAX_AUTOTUNE = {} + +REQUIRE_HIGHER_TOLERANCE_FOR_FREEZING = { + "adv_inception_v3", +} + +SCALED_COMPUTE_LOSS = { + "mobilevit_s", +} + +FORCE_AMP_FOR_FP16_BF16_MODELS = {} + +SKIP_ACCURACY_CHECK_AS_EAGER_NON_DETERMINISTIC_MODELS = {} + +REQUIRE_LARGER_MULTIPLIER_FOR_SMALLER_TENSOR = { + "inception_v3", + "mobilenetv3_large_100", + "vit_base_patch14_dinov2.lvd142m", +} + + +>>>>>>> upstream/release/2.11 def refresh_model_names(): import glob diff --git a/c10/cuda/CUDAAllocatorConfig.h b/c10/cuda/CUDAAllocatorConfig.h index 928384a874394..3266938d866a9 100644 --- a/c10/cuda/CUDAAllocatorConfig.h +++ b/c10/cuda/CUDAAllocatorConfig.h @@ -34,8 +34,12 @@ class C10_CUDA_API CUDAAllocatorConfig { static bool expandable_segments() { bool enabled = c10::CachingAllocator::AcceleratorAllocatorConfig:: use_expandable_segments(); +<<<<<<< HEAD #if !defined(PYTORCH_C10_DRIVER_API_SUPPORTED) && \ (!defined(USE_ROCM) || (ROCM_VERSION < 70000)) +======= +#if !defined(PYTORCH_C10_DRIVER_API_SUPPORTED) && !defined(USE_ROCM) +>>>>>>> upstream/release/2.11 if (enabled) { TORCH_WARN_ONCE("expandable_segments not supported on this platform") } diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp index 774c79102b8d6..25a5c384c49dd 100644 --- a/c10/cuda/CUDACachingAllocator.cpp +++ b/c10/cuda/CUDACachingAllocator.cpp @@ -689,6 +689,7 @@ struct ExpandableSegment { C10_CUDA_CHECK(hipMemImportFromShareableHandle( &handle, myfd_handle, hipMemHandleTypePosixFileDescriptor)); #else +<<<<<<< HEAD C10_CUDA_DRIVER_CHECK_MSG( DriverAPI::get()->cuMemImportFromShareableHandle_( &handle, @@ -698,6 +699,13 @@ struct ExpandableSegment { " fabric_info: {", get_nvml_fabric_info(device), "}"); +======= + C10_CUDA_DRIVER_CHECK(DriverAPI::get()->cuMemImportFromShareableHandle_( + &handle, + // NOLINTNEXTLINE(performance-no-int-to-ptr) + (void*)(uintptr_t)myfd, + CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR)); +>>>>>>> upstream/release/2.11 #endif LOG(INFO) << "use posix fd to import expandable segments."; close(static_cast(myfd)); @@ -786,12 +794,18 @@ struct ExpandableSegment { #endif desc[0].location.type = CU_MEM_LOCATION_TYPE_DEVICE; // NOLINTNEXTLINE(bugprone-signed-char-misuse) +<<<<<<< HEAD desc[0].location.id = static_cast(device); desc[0].flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; +======= + desc.location.id = static_cast(device); + desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; +>>>>>>> upstream/release/2.11 #ifdef USE_ROCM C10_CUDA_CHECK(hipMemSetAccess( ptr() + begin * segment_size_, (end - begin) * segment_size_, +<<<<<<< HEAD &desc[0], num_desc)); #else @@ -800,6 +814,13 @@ struct ExpandableSegment { (end - begin) * segment_size_, &desc[0], num_desc)); +======= + &desc, + 1)); +#else + C10_CUDA_DRIVER_CHECK(DriverAPI::get()->cuMemSetAccess_( + ptr_ + begin * segment_size_, (end - begin) * segment_size_, &desc, 1)); +>>>>>>> upstream/release/2.11 #endif } diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 70c5d3de4cf23..c6a5bcb80652d 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1107,6 +1107,13 @@ if(USE_ROCM) ) endif() + # ROCM-SMI needed to support symmetric memory + if(USE_DISTRIBUTED AND UNIX) + list(APPEND Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS + rocm_smi64 + ) + endif() + # ---[ Kernel asserts # Kernel asserts is disabled for ROCm by default. # It can be turned on by turning on the env USE_ROCM_KERNEL_ASSERT to the build system. diff --git a/related_commits b/related_commits index 20d917b902254..78a75e17aa1c5 100644 --- a/related_commits +++ b/related_commits @@ -1,3 +1,16 @@ +<<<<<<< HEAD ubuntu|pytorch|apex|release/1.12.0|4fe55b966de2458e4591bed2b0c0f990ffcca683|https://github.com/ROCm/apex ubuntu|pytorch|torchvision|release/0.27|78839c2b06c83c6cfb5c4da692ffb331bbd4c4cc|https://github.com/pytorch/vision ubuntu|pytorch|torchaudio|main|c0cbdb95674556cdff7266f2d44bb855f634cfde|https://github.com/pytorch/audio +======= +ubuntu|pytorch|apex|release/1.11.0|4fe55b966de2458e4591bed2b0c0f990ffcca683|https://github.com/ROCm/apex +centos|pytorch|apex|release/1.11.0|4fe55b966de2458e4591bed2b0c0f990ffcca683|https://github.com/ROCm/apex +ubuntu|pytorch|torchvision|release/0.26|336d36e8db990a905498c73933e35231876e28bc|https://github.com/pytorch/vision +centos|pytorch|torchvision|release/0.26|336d36e8db990a905498c73933e35231876e28bc|https://github.com/pytorch/vision +ubuntu|pytorch|torchdata|release/0.11|377e64c1be69a9be6649d14c9e3664070323e464|https://github.com/pytorch/data +centos|pytorch|torchdata|release/0.11|377e64c1be69a9be6649d14c9e3664070323e464|https://github.com/pytorch/data +ubuntu|pytorch|torchaudio|release/2.11|34c52a67e8941bbd8e6adaca0eb0b9eabec11d78|https://github.com/pytorch/audio +centos|pytorch|torchaudio|release/2.11|34c52a67e8941bbd8e6adaca0eb0b9eabec11d78|https://github.com/pytorch/audio +ubuntu|pytorch|ao|release/0.17.0|afb2844be99514f0d5ff42badd9c3ed0d1811d73|https://github.com/pytorch/ao +centos|pytorch|ao|release/0.17.0|afb2844be99514f0d5ff42badd9c3ed0d1811d73|https://github.com/pytorch/ao +>>>>>>> upstream/release/2.11 diff --git a/requirements-build.txt b/requirements-build.txt index 32367b67bf356..28f43b08394cc 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -1,6 +1,11 @@ # Build System requirements +<<<<<<< HEAD # Build requirements pinned to match rocm/pytorch release/2.11: # https://github.com/ROCm/pytorch/blob/83524a4c7d748e5de89a7a93cd72dab4bd1fa42d/requirements-build.txt +======= +# setuptools and cmake pinned to match rocm/pytorch release/2.10: +# https://github.com/ROCm/pytorch/blob/0b21eac93ff682d862b257770fff5f9fc069b30a/requirements-build.txt +>>>>>>> upstream/release/2.11 setuptools==79.0.1 cmake==4.0.0 ninja==1.11.1.4 diff --git a/requirements.txt b/requirements.txt index d6552edad483b..3d36cac75447e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,13 @@ hypothesis==6.56.4 jinja2==3.1.6 lintrunner==0.12.11 ; platform_machine != "s390x" networkx==2.8.8 +<<<<<<< HEAD +======= +ninja==1.11.1.4 +numpy==2.0.2 ; python_version == "3.9" +numpy==2.1.2 ; python_version > "3.9" and python_version < "3.14" +numpy==2.4.3 ; python_version >= "3.14" +>>>>>>> upstream/release/2.11 optree==0.13.0 ; python_version < "3.14" optree==0.17.0 ; python_version >= "3.14" psutil==7.2.2 diff --git a/test/cpp_extensions/test_libtorch_agnostic.py b/test/cpp_extensions/test_libtorch_agnostic.py index ebe8dd2536266..30d1a586ec42f 100644 --- a/test/cpp_extensions/test_libtorch_agnostic.py +++ b/test/cpp_extensions/test_libtorch_agnostic.py @@ -1877,6 +1877,60 @@ def test_my_from_blob_with_cuda_deleter_no_leak(self, device): curr_mem = torch.cuda.memory_allocated(device) self.assertEqual(curr_mem, init_mem) +<<<<<<< HEAD +======= + @skipIfTorchVersionLessThan(2, 11) + @skipIfTorchDynamo("no data pointer defined for FakeTensor, FunctionalTensor") + def test_my_from_blob_with_lambda_deleter(self, device): + """Test for from_blob with capturing-lambda deleter (2.11 feature).""" + import libtorch_agn_2_11 as libtorch_agnostic + + from_blob_fn = libtorch_agnostic.ops.my_from_blob_with_lambda_deleter + get_count = libtorch_agnostic.ops.get_lambda_deleter_call_count + reset_count = libtorch_agnostic.ops.reset_lambda_deleter_call_count + + is_cuda = torch.device(device).type == "cuda" + if is_cuda: + init_mem = torch.cuda.memory_allocated(device) + + def inner(): + reset_count() + self.assertEqual(get_count(), 0) + + # We need an original tensor to create the tensor with from_blob. + original = torch.rand(2, 3, device=device, dtype=torch.float32) + blob_tensor = from_blob_fn( + original.data_ptr(), + original.size(), + original.stride(), + device, + torch.float32, + ) + + self.assertEqual(blob_tensor, original) + self.assertEqual(blob_tensor.data_ptr(), original.data_ptr()) + + self.assertEqual(get_count(), 0) + + del blob_tensor + gc.collect() + + # Ensure the deleter was called. The original tensor still exists + # and can be used. + self.assertEqual(get_count(), 1) + original += 1 + # original goes out of scope here and its cuda memory should be + # freed. + + inner() + + if is_cuda: + # original tensor is out of scope, all the memory should be freed + torch.cuda.synchronize(device) + curr_mem = torch.cuda.memory_allocated(device) + self.assertEqual(curr_mem, init_mem) + +>>>>>>> upstream/release/2.11 @onlyCUDA @skipIfTorchVersionLessThan(2, 11) def test_my_from_blob_with_cuda_lambda_deleter_no_leak(self, device): @@ -1901,6 +1955,7 @@ def test_my_from_blob_with_cuda_lambda_deleter_no_leak(self, device): curr_mem = torch.cuda.memory_allocated(device) self.assertEqual(curr_mem, init_mem) +<<<<<<< HEAD @skipIfTorchVersionLessThan(2, 12) @onlyCPU def test_tagged_op(self, device): @@ -1911,6 +1966,8 @@ def test_tagged_op(self, device): self.assertIn(torch.Tag.pt2_compliant_tag, op.tags) self.assertIn(torch.Tag.core, op.tags) +======= +>>>>>>> upstream/release/2.11 @onlyCPU def test_my_layout(self, device): """Test layout() method for various tensor layouts.""" diff --git a/test/distributed/tensor/test_tensor_ops.py b/test/distributed/tensor/test_tensor_ops.py index f5ab8e6844e93..d19ad9b410378 100644 --- a/test/distributed/tensor/test_tensor_ops.py +++ b/test/distributed/tensor/test_tensor_ops.py @@ -19,6 +19,7 @@ from torch.distributed.tensor._sharding_prop import ShardingPropagator from torch.distributed.tensor.debug import CommDebugMode from torch.testing._internal.common_distributed import skip_if_lt_x_gpu +<<<<<<< HEAD from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, MI200_ARCH, @@ -27,6 +28,9 @@ serialTest, skipIfRocmArch, ) +======= +from torch.testing._internal.common_utils import MI200_ARCH, run_tests, skipIfRocmArch +>>>>>>> upstream/release/2.11 from torch.testing._internal.distributed._tensor.common_dtensor import ( create_local_tensor_test_class, DTensorContinuousTestBase, @@ -627,7 +631,11 @@ def test_gather(self): self.assertEqual(output_dt.full_tensor(), global_output) @skipIfRocmArch(MI200_ARCH) +<<<<<<< HEAD @serialTest() # heavy combinatorial _test_op calls, serialize to avoid OOM +======= + @with_comms +>>>>>>> upstream/release/2.11 def test_index(self): meshes = [ self.build_device_mesh(), # 1D mesh diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py index fc4bb687e7d2c..d5b59a6fc5ed2 100644 --- a/test/distributed/test_dynamo_distributed.py +++ b/test/distributed/test_dynamo_distributed.py @@ -373,7 +373,12 @@ def run_hf_bert_ddp(self, model, inputs, backend): class TestFakeDistributedSingleProc(torch._dynamo.test_case.TestCase): +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @patch.object(config, "optimize_ddp", True) @patch.object(torch._inductor.config, "fallback_random", True) @@ -386,7 +391,12 @@ def test_hf_bert_ddp_inductor(self): model = FakeDDP(model) run_hf_bert_ddp(self, model, inputs, "inductor") +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @patch.object(config, "optimize_ddp", True) def test_hf_bert_ddp_aot_eager(self): model, inputs = get_hf_bert(0) @@ -891,7 +901,12 @@ def _test_hf_bert_ddp_inductor(self, static_graph): model = DDP(model, static_graph=static_graph) run_hf_bert_ddp(self, model, inputs, "inductor") +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @skip_if_lt_x_gpu(2) @import_transformers_or_skip() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -900,7 +915,12 @@ def _test_hf_bert_ddp_inductor(self, static_graph): def test_hf_bert_ddp_inductor(self): self._test_hf_bert_ddp_inductor(static_graph=False) +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @skip_if_lt_x_gpu(2) @import_transformers_or_skip() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -915,14 +935,24 @@ def _test_hf_bert_aot_eager(self, static_graph): model = DDP(model, static_graph=static_graph) run_hf_bert_ddp(self, model, inputs, "aot_eager") +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @skip_if_lt_x_gpu(2) @import_transformers_or_skip() @config.patch(optimize_ddp=True, enable_compiler_collectives=True) def test_hf_bert_ddp_aot_eager(self): self._test_hf_bert_aot_eager(static_graph=False) +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @skip_if_lt_x_gpu(2) @import_transformers_or_skip() @config.patch(optimize_ddp=True, enable_compiler_collectives=True) @@ -1125,7 +1155,12 @@ def test_fsdp_activation_checkpointing(self): find_first_node(cnt.graphs[0], tag_activation_checkpoint) is not None ) +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @import_transformers_or_skip() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") # TODO(whc) Investigate why cudagraphs breaks inductor+fsdp for hf_bert @@ -1171,7 +1206,12 @@ def apply_fsdp(model, wrap_policy): ) self.assertTrue(same(correct_results, opt_results)) +<<<<<<< HEAD @_expectedFailureIf_transformers_ge_5_2 +======= + @unittest.expectedFailure + # https://github.com/huggingface/transformers/issues/44188 +>>>>>>> upstream/release/2.11 @import_transformers_or_skip() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") # TODO(whc) Investigate why cudagraphs breaks inductor+fsdp for hf_bert diff --git a/test/inductor/test_ck_backend.py b/test/inductor/test_ck_backend.py index 65da067671a0e..0b6789c636603 100644 --- a/test/inductor/test_ck_backend.py +++ b/test/inductor/test_ck_backend.py @@ -16,8 +16,13 @@ from torch.testing._internal.common_cuda import tf32_off from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, + MI350_ARCH, parametrize, +<<<<<<< HEAD skipIfRocm, +======= + skipIfRocmArch, +>>>>>>> upstream/release/2.11 ) from torch.testing._internal.inductor_utils import ( _quantize_rowwise, @@ -240,6 +245,8 @@ def mm(a, b): Y_eager = a @ b torch.testing.assert_close(Y_compiled, Y_eager, equal_nan=True) + # regression in ROCm 7.2, Mismatched elements, significantly + @skipIfRocmArch(MI350_ARCH) @unittest.skipIf(not torch.version.hip, "ROCM only") @unittest.mock.patch.dict(os.environ, _test_env) @parametrize("max_autotune_gemm_backends", ("CK", "ATen,Triton,CK")) diff --git a/test/inductor/test_mix_order_reduction.py b/test/inductor/test_mix_order_reduction.py index 460dfe4199cfe..ac77112ec5ea9 100644 --- a/test/inductor/test_mix_order_reduction.py +++ b/test/inductor/test_mix_order_reduction.py @@ -772,6 +772,7 @@ def f(x): compile_metrics = torch._dynamo.utils._compilation_metrics self.assertEqual(len(compile_metrics), 1, "Don't recompile") +<<<<<<< HEAD @skipIfXpu(msg="https://github.com/intel/intel-xpu-backend-for-triton/issues/6398") def test_additive_rnumel(self): """ @@ -831,6 +832,8 @@ def model(x): torch.testing.assert_close(ref, act, atol=1e-3, rtol=1e-3) +======= +>>>>>>> upstream/release/2.11 @largeTensorTest("36GB", device=GPU_TYPE, inductor=True) def test_out_of_shared_memory(self): """ @@ -952,6 +955,7 @@ def causal_mask(_b, _h, q, kv): loss.backward() self.assertTrue(metrics.codegen_mix_order_reduction > 1) +<<<<<<< HEAD @inductor_config.patch("triton.mix_order_reduction", True) @inductor_config.patch("triton.mix_order_reduction_non_strict_mode", True) def test_dimension_refactoring_mismatch(self): @@ -1065,6 +1069,8 @@ def fwd_bwd(model, x, dy): "Mix order reduction should be triggered", ) +======= +>>>>>>> upstream/release/2.11 @inductor_config.patch( "triton.mix_order_reduction", not inductor_config.triton.mix_order_reduction diff --git a/test/inductor/test_mps_basic.py b/test/inductor/test_mps_basic.py index 5d1d68d391a66..a377628723b81 100644 --- a/test/inductor/test_mps_basic.py +++ b/test/inductor/test_mps_basic.py @@ -207,6 +207,7 @@ def fn(q, k, v): self.common(fn, (q, k, v), atol=1e-4, rtol=1e-4, check_lowp=False) +<<<<<<< HEAD def test_nested_masked_cat(self): # Regression test for YOLOv3 compilation failure on MPS. # See https://github.com/pytorch/pytorch/actions/runs/23477894502 @@ -245,6 +246,8 @@ def fn(p1, p2, grid1, grid2, anchor_wh1, anchor_wh2): ), ) +======= +>>>>>>> upstream/release/2.11 class MPSBasicTestsAOTI(TestCase): def check_model(self, m, inp, dynamic_shapes=None): diff --git a/test/inductor/test_pattern_matcher.py b/test/inductor/test_pattern_matcher.py index 8ed4ce990d35a..3dc41b3bbbb9a 100644 --- a/test/inductor/test_pattern_matcher.py +++ b/test/inductor/test_pattern_matcher.py @@ -1240,12 +1240,15 @@ def fn2(inp, a, b): FileCheck().check_not("extern_kernels.addmm(").run(code[0]) @parametrize("dtype", [torch.bfloat16, torch.float16]) +<<<<<<< HEAD @inductor_config.patch( { "fx_graph_remote_cache": False, "keep_addmm_fused_for_half_dtypes": True, } ) +======= +>>>>>>> upstream/release/2.11 def test_unfuse_bias_addmm_half_dtypes(self, dtype): args = [ torch.randn(20, device=GPU_TYPE, dtype=dtype), @@ -1262,6 +1265,7 @@ def fn(inp, a, b): _, (code) = run_and_get_code(fn, args[0], args[1], args[2]) FileCheck().check("extern_kernels.addmm(").run(code[0]) +<<<<<<< HEAD @parametrize("dtype", [torch.bfloat16, torch.float16]) @inductor_config.patch( { @@ -1283,6 +1287,8 @@ def fn(inp, a, b): _, (code) = run_and_get_code(fn, args[0], args[1], args[2]) FileCheck().check_not("extern_kernels.addmm(").run(code[0]) +======= +>>>>>>> upstream/release/2.11 def test_addmm_alpha_beta_with_pointwise(self): # Test that addmm with alpha/beta != 1 is unfused correctly with pointwise ops # See https://github.com/pytorch/pytorch/issues/167313 diff --git a/test/inductor/test_torchinductor_dynamic_shapes.py b/test/inductor/test_torchinductor_dynamic_shapes.py index 1144c1f9c4553..351401da777c5 100644 --- a/test/inductor/test_torchinductor_dynamic_shapes.py +++ b/test/inductor/test_torchinductor_dynamic_shapes.py @@ -30,7 +30,11 @@ MI350_ARCH, parametrize, serialTest, +<<<<<<< HEAD skipIfRocmArch, +======= + skipIfRocm, +>>>>>>> upstream/release/2.11 TEST_CUDA_MEM_LEAK_CHECK, TEST_WITH_ASAN, ) @@ -643,6 +647,7 @@ def f(x, w): torch.compile(fullgraph=True)(f)(x, w).sum().backward() self.assertEqual(orig_w, w.grad) + @skipIfRocm # regression in ROCm 7.2, XBLOCK should remain 64 (got 256) @torch._dynamo.config.patch( capture_scalar_outputs=True, capture_dynamic_output_shape_ops=True ) diff --git a/test/test_cuda.py b/test/test_cuda.py index b33f21e6dfce2..d7eb04f7e2a76 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -518,6 +518,9 @@ def test_out_of_memory_retry(self): IS_JETSON, "oom reporting has issues on jetson igx due to partial nvml support" ) def test_set_per_process_memory_fraction(self): + if torch.version.hip and ('gfx1101' in torch.cuda.get_device_properties(0).gcnArchName): + torch.cuda.empty_cache() + torch.cuda.reset_peak_memory_stats() orig = torch.cuda.get_per_process_memory_fraction(0) torch.cuda.reset_peak_memory_stats(0) try: @@ -6776,6 +6779,10 @@ def test_graph_capture_reclaim_4_streams(self): "graph_capture_record_stream_reuse:False" ) +<<<<<<< HEAD +======= + +>>>>>>> upstream/release/2.11 @unittest.skipIf( not TEST_CUDA_GRAPH, "CUDA >= 11.0 or ROCM >= 5.3 required for graphs" ) diff --git a/test/test_mps.py b/test/test_mps.py index 9cdcd4b484b3c..d8c9070437f72 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -9953,6 +9953,7 @@ def test_sdpa_full_mask(self, dtype): out_mps = F.scaled_dot_product_attention(q.to('mps'), k.to('mps'), v.to('mps'), attn_mask=mask.to('mps')) self._compare_tensors(out_mps.cpu(), out_cpu) +<<<<<<< HEAD @parametrize("dtype", [torch.float32, torch.bfloat16]) def test_sdpa_math_mps_bool_mask_1pass(self, dtype): torch.manual_seed(0) @@ -9969,6 +9970,8 @@ def test_sdpa_math_mps_bool_mask_1pass(self, dtype): ) self._compare_tensors(out_mps.cpu(), out_cpu) +======= +>>>>>>> upstream/release/2.11 @parametrize("dtype", [torch.bfloat16, torch.float16, torch.float]) def test_sdpa_2pass(self, dtype): # Regression test for https://github.com/pytorch/pytorch/issues/174861 diff --git a/test/test_transformers.py b/test/test_transformers.py index ced9b0133e11d..0dea2a6293de5 100644 --- a/test/test_transformers.py +++ b/test/test_transformers.py @@ -52,7 +52,11 @@ PLATFORM_SUPPORTS_MEM_EFF_ATTENTION, PLATFORM_SUPPORTS_FUSED_ATTENTION, PLATFORM_SUPPORTS_CUDNN_ATTENTION, +<<<<<<< HEAD PLATFORM_SUPPORTS_CK_SDPA, +======= + tf32_off, +>>>>>>> upstream/release/2.11 tf32_on_and_off, tf32_enabled, ) @@ -3801,8 +3805,9 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset, if dropout_p == 0.0: with sdpa_kernel(backends=[SDPBackend.MATH]): # High Precision Math Reference - out_ref = F.scaled_dot_product_attention(query_ref, key_ref, value_ref, - dropout_p=dropout_p, is_causal=is_causal, scale=scale) + with tf32_off(): + out_ref = F.scaled_dot_product_attention(query_ref, key_ref, value_ref, + dropout_p=dropout_p, is_causal=is_causal, scale=scale) # Low Precision Math Reference out_lp_ref = F.scaled_dot_product_attention(query, key, value, dropout_p=dropout_p, is_causal=is_causal, scale=scale) @@ -3813,8 +3818,9 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset, torch.manual_seed(seed) dropout_mask = _get_mem_eff_drop_mask(batch_size, n_heads, seq_len_q, seq_len_k, dropout_p, seed, 0, device=device) # High Precision Math Reference - out_ref = torch.ops.aten._scaled_dot_product_attention_math( - query_ref, key_ref, value_ref, dropout_p=dropout_p, is_causal=is_causal, scale=scale, dropout_mask=dropout_mask)[0] + with tf32_off(): + out_ref = torch.ops.aten._scaled_dot_product_attention_math( + query_ref, key_ref, value_ref, dropout_p=dropout_p, is_causal=is_causal, scale=scale, dropout_mask=dropout_mask)[0] # Low Precision Math Reference out_lp_ref = torch.ops.aten._scaled_dot_product_attention_math( query, key, value, dropout_p=dropout_p, is_causal=is_causal, scale=scale, @@ -3824,7 +3830,8 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset, grads = torch.autograd.grad(out, (query, key, value), upstream_grad) grads_ref_lp = torch.autograd.grad(out_lp_ref, (query, key, value), upstream_grad) - grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref), upstream_grad) + with tf32_off(): + grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref), upstream_grad) fudge_factors = { 'out': 3.0 , @@ -3921,8 +3928,9 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset, if dropout_p == 0.0: with sdpa_kernel(backends=[SDPBackend.MATH]): # High Precision Math Reference - out_ref = F.scaled_dot_product_attention(query_ref, key_ref, value_ref, attn_mask_ref, - dropout_p=dropout_p, is_causal=is_causal, scale=scale) + with tf32_off(): + out_ref = F.scaled_dot_product_attention(query_ref, key_ref, value_ref, attn_mask_ref, + dropout_p=dropout_p, is_causal=is_causal, scale=scale) # Low Precision Math Reference out_lp_ref = F.scaled_dot_product_attention(query, key, value, attn_mask, dropout_p=dropout_p, is_causal=is_causal, scale=scale) @@ -3934,9 +3942,10 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset, dropout_mask = _get_mem_eff_drop_mask(batch_size, n_heads, seq_len_q, seq_len_k, dropout_p, seed, 0, device=device) # High Precision Math Reference - out_ref = torch.ops.aten._scaled_dot_product_attention_math( - query_ref, key_ref, value_ref, attn_mask_ref, dropout_p=dropout_p, is_causal=is_causal, - scale=scale, dropout_mask=dropout_mask)[0] + with tf32_off(): + out_ref = torch.ops.aten._scaled_dot_product_attention_math( + query_ref, key_ref, value_ref, attn_mask_ref, dropout_p=dropout_p, is_causal=is_causal, + scale=scale, dropout_mask=dropout_mask)[0] # Low Precision Math Reference out_lp_ref = torch.ops.aten._scaled_dot_product_attention_math( query, key, value, attn_mask, @@ -3947,7 +3956,8 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset, grads = torch.autograd.grad(out, (query, key, value, attn_mask), upstream_grad) grads_ref_lp = torch.autograd.grad(out_lp_ref, (query, key, value, attn_mask), upstream_grad) - grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref, attn_mask_ref), upstream_grad) + with tf32_off(): + grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref, attn_mask_ref), upstream_grad) fudge_factors = { "out": 4, @@ -4059,8 +4069,9 @@ def test_flash_attention_vs_math_ref_grads(self, device, batch_size: int, seq_le query, key, value, dropout_p=dropout_p, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa) with sdpa_kernel(backends=[SDPBackend.MATH]): # High Precision Math Reference - out_ref = F.scaled_dot_product_attention( - query_ref, key_ref, value_ref, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa) + with tf32_off(): + out_ref = F.scaled_dot_product_attention( + query_ref, key_ref, value_ref, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa) # Low Precision Math Reference out_lp_ref = F.scaled_dot_product_attention( query, key, value, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa) @@ -4098,9 +4109,10 @@ def test_flash_attention_vs_math_ref_grads(self, device, batch_size: int, seq_le dropout_mask = (softmax_mask <= int((1.0 - dropout_p) * 255.0)).to(torch.float32) # High Precision Math Reference - out_ref = torch.ops.aten._scaled_dot_product_attention_math( - query_ref, key_ref, value_ref, dropout_p=dropout_p, is_causal=is_causal, - scale=scale, dropout_mask=dropout_mask, enable_gqa=enable_gqa)[0] + with tf32_off(): + out_ref = torch.ops.aten._scaled_dot_product_attention_math( + query_ref, key_ref, value_ref, dropout_p=dropout_p, is_causal=is_causal, + scale=scale, dropout_mask=dropout_mask, enable_gqa=enable_gqa)[0] # Low Precision Math Reference out_lp_ref = torch.ops.aten._scaled_dot_product_attention_math( query, key, value, dropout_mask=dropout_mask, dropout_p=dropout_p, @@ -4115,7 +4127,8 @@ def test_flash_attention_vs_math_ref_grads(self, device, batch_size: int, seq_le grads = torch.autograd.grad(out, (query, key, value), upstream_grad) grads_ref_lp = torch.autograd.grad(out_lp_ref, (query, key, value), upstream_grad) - grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref), upstream_grad) + with tf32_off(): + grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref), upstream_grad) fudge_factors = { 'out': 4, @@ -4278,8 +4291,9 @@ def get_dropout_mask(output, fused_kernel, batch_size, n_heads, q_len, kv_len, d with sdpa_kernel(backends=[SDPBackend.MATH]): if dropout_p == 0.0: # High Precision Math Reference - out_ref = F.scaled_dot_product_attention(query_ref, key_ref, value_ref, - dropout_p=dropout_p, is_causal=is_causal) + with tf32_off(): + out_ref = F.scaled_dot_product_attention(query_ref, key_ref, value_ref, + dropout_p=dropout_p, is_causal=is_causal) # Low Precision Math Reference out_lp_ref = F.scaled_dot_product_attention(query, key, value, dropout_p=dropout_p, is_causal=is_causal) @@ -4289,9 +4303,10 @@ def get_dropout_mask(output, fused_kernel, batch_size, n_heads, q_len, kv_len, d dropout_mask = get_dropout_mask(output_tuple, fused_kernel, batch_size, n_heads, seq_len_q, seq_len_k, dropout_p, device) # High Precision Math Reference - out_ref = torch.ops.aten._scaled_dot_product_attention_math( - query_ref, key_ref, value_ref, dropout_p=dropout_p, is_causal=is_causal, - dropout_mask=dropout_mask)[0] + with tf32_off(): + out_ref = torch.ops.aten._scaled_dot_product_attention_math( + query_ref, key_ref, value_ref, dropout_p=dropout_p, is_causal=is_causal, + dropout_mask=dropout_mask)[0] # Low Precision Math Reference out_lp_ref = torch.ops.aten._scaled_dot_product_attention_math( query, key, value, dropout_p=dropout_p, is_causal=is_causal, @@ -4303,7 +4318,8 @@ def get_dropout_mask(output, fused_kernel, batch_size, n_heads, q_len, kv_len, d g1.replay() if fused_kernel != SDPBackend.CUDNN_ATTENTION or dropout_p == 0.0: grads_ref_lp = torch.autograd.grad(out_lp_ref, (query, key, value), upstream_grad) - grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref), upstream_grad) + with tf32_off(): + grads_ref = torch.autograd.grad(out_ref, (query_ref, key_ref, value_ref), upstream_grad) fudge_factors = { 'out': 3.0, @@ -4527,8 +4543,9 @@ def rand_nt(sequence_list, num_heads, head_dim): out = F.scaled_dot_product_attention(query, key, value, dropout_p=dropout_p, is_causal=is_causal, scale=scale) with sdpa_kernel(backends=[SDPBackend.MATH]): # High Precision Math Reference - out_ref = F.scaled_dot_product_attention( - query_ref, key_ref, value_ref, is_causal=is_causal, scale=scale) + with tf32_off(): + out_ref = F.scaled_dot_product_attention( + query_ref, key_ref, value_ref, is_causal=is_causal, scale=scale) # Low Precision Math Reference out_lp_ref = F.scaled_dot_product_attention( query_ref_lp, key_ref_lp, value_ref_lp, is_causal=is_causal, scale=scale) @@ -4563,9 +4580,10 @@ def rand_nt(sequence_list, num_heads, head_dim): nt_stack.append(torch.cat(batch_stack)) nested_dropout_mask = torch.nested.nested_tensor(nt_stack) # High Precision Math Reference - out_ref = torch.ops.aten._scaled_dot_product_attention_math( - query_ref, key_ref, value_ref, dropout_p=dropout_p, - is_causal=is_causal, scale=scale, dropout_mask=nested_dropout_mask)[0] + with tf32_off(): + out_ref = torch.ops.aten._scaled_dot_product_attention_math( + query_ref, key_ref, value_ref, dropout_p=dropout_p, + is_causal=is_causal, scale=scale, dropout_mask=nested_dropout_mask)[0] # Low Precision Math Reference out_lp_ref = torch.ops.aten._scaled_dot_product_attention_math( query_ref_lp, key_ref_lp, value_ref_lp, dropout_p=dropout_p, is_causal=is_causal, scale=scale, @@ -4574,7 +4592,8 @@ def rand_nt(sequence_list, num_heads, head_dim): upstream_grad = out.detach().clone().contiguous() out.backward(upstream_grad) - out_ref.backward(upstream_grad.to(out_ref.dtype)) + with tf32_off(): + out_ref.backward(upstream_grad.to(out_ref.dtype)) out_lp_ref.backward(upstream_grad.to(out_lp_ref.dtype)) dropout_fudge_factor = 1.0 if dropout_p == 0.0 else 2.0 diff --git a/tools/stats/import_test_stats.py b/tools/stats/import_test_stats.py index 3c3dfcdfd08f8..7af0b0114ae96 100644 --- a/tools/stats/import_test_stats.py +++ b/tools/stats/import_test_stats.py @@ -112,7 +112,11 @@ def process_disabled_test(the_response: dict[str, Any]) -> dict[str, Any]: return disabled_test_from_issues try: +<<<<<<< HEAD url = "https://ossci-metrics.s3.amazonaws.com/disabled-tests-condensed.json?versionId=XRBoKk5TT4f6n48PbQ9OntiMMQveSs3J" +======= + url = "https://ossci-metrics.s3.amazonaws.com/disabled-tests-condensed.json?versionId=cnSTGFIe2xdODOeLj3qZMwi4tgoH6y67" +>>>>>>> upstream/release/2.11 return fetch_and_cache(dirpath, filename, url, process_disabled_test) except Exception: print("Couldn't download test skip set, leaving all tests enabled...") diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index f68171baef7c0..3ad06fb1fedf7 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -1965,6 +1965,13 @@ class triton: # Don't allow multi-stages by default to avoid out of shared memory mix_order_reduction_allow_multi_stages = ( os.environ.get("TORCHINDUCTOR_MIX_ORDER_REDUCTION_ALLOW_MULTI_STAGES") == "1" +<<<<<<< HEAD +======= + ) + + enable_tlx_templates: bool = ( + os.environ.get("TORCHINDUCTOR_ENABLE_TLX_TEMPLATES", "0") == "1" +>>>>>>> upstream/release/2.11 ) # Map for storing the amount of kernel runs with dumped input tensors diff --git a/torch/_inductor/fx_passes/post_grad.py b/torch/_inductor/fx_passes/post_grad.py index 02ec541caf7a9..bcc03e33e4108 100644 --- a/torch/_inductor/fx_passes/post_grad.py +++ b/torch/_inductor/fx_passes/post_grad.py @@ -1567,10 +1567,16 @@ def should_prefer_unfused_addmm(match): extra_check=should_prefer_unfused_addmm, ) def unfuse_bias_add_to_pointwise(match: Match, mat1, mat2, *, inp, alpha, beta): +<<<<<<< HEAD if config.keep_addmm_fused_for_half_dtypes and inp.meta["val"].dtype in ( torch.bfloat16, torch.float16, ): +======= + # Unfusing addmm introduces an extra bf16/fp16 truncation at the mm output + # that compounds through deep models and causes accuracy failures. + if inp.meta["val"].dtype in (torch.bfloat16, torch.float16): +>>>>>>> upstream/release/2.11 return def repl(inp, x1, x2, alpha, beta): diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py index 1b6dae341aae6..f474ccd474496 100644 --- a/torch/_inductor/runtime/triton_heuristics.py +++ b/torch/_inductor/runtime/triton_heuristics.py @@ -3177,6 +3177,7 @@ def pointwise( ] # Additional configs appended for ROCm builds if torch.version.hip: +<<<<<<< HEAD configs.extend( [ triton_config_with_settings( @@ -3195,6 +3196,33 @@ def pointwise( ), ] ) +======= + if inductor_meta.get("max_autotune_pointwise"): + configs.extend( + [ + triton_config_with_settings( + size_hints, TRITON_MAX_BLOCK["X"], waves_per_eu=2 + ), + triton_config_with_settings( + size_hints, + 4096, # wrt: better than the max_block for some kernel + ), + triton_config_with_settings( + size_hints, + 2048, + num_warps=8, + num_stages=2, + waves_per_eu=1, # 20% improvement + ), + triton_config_with_settings( + size_hints, + 512, + num_warps=4, + num_stages=4, # 30% improvement + ), + ] + ) +>>>>>>> upstream/release/2.11 if inductor_meta.get("atomic_add_found"): configs.extend( [ @@ -3564,6 +3592,7 @@ def outer_config_opt(): ] if torch.version.hip: +<<<<<<< HEAD hip_configs = [ make_config(1024, 8, num_warps=4, num_stages=1, waves_per_eu=2), make_config(512, 8, num_warps=4, num_stages=1, waves_per_eu=1), @@ -3582,6 +3611,13 @@ def outer_config_opt(): c for c in result_configs if c.kwargs.get("XBLOCK", 0) * max_persistent_rblock <= 4096 +======= + result_configs.extend( + [ + make_config(1024, 8, num_warps=4, num_stages=1, waves_per_eu=2), + make_config(512, 8, num_warps=4, num_stages=1, waves_per_eu=1), + make_config(32, 128, num_warps=1, num_stages=1), # 30% improvement +>>>>>>> upstream/release/2.11 ] return result_configs @@ -4014,6 +4050,17 @@ def _persistent_reduction_configs( if conf not in configs: configs.append(conf) + # Additional custom configs in support of customer workloads + configs.append( + triton_config_reduction( + size_hints, + 1, + rnumel, + num_stages=3, + num_warps=2, + ) # 18% improvement + ) + for c in configs: # we don't need Rn_BLOCK for persistent reduction for prefix in size_hints: diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index 251a2e64bc6fa..2a8a81cde334e 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -1738,9 +1738,14 @@ def get_stride_and_maybe_freeze_layout(self, node) -> list[int]: if isinstance(layout, ir.FlexibleLayout) and not isinstance( node, ir.ReinterpretView ): +<<<<<<< HEAD if not use_aten_gemm_kernels() or self.always_freeze_layout: # No ExternKernel fallback available, or always_freeze_layout is set # (e.g., for FlexAttention templates), freeze immediately +======= + if not use_aten_gemm_kernels(): + # No ExternKernel fallback available, freeze immediately +>>>>>>> upstream/release/2.11 node.data.freeze_layout() else: # Compute what strides WOULD be if frozen, without actually freezing diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index 641c8568fb298..56924613303f3 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -1524,6 +1524,9 @@ def __init__(self) -> None: ), ROCmGemmConfig(256, 128, 32, self.default_num_stages, 8, group_m=16), ROCmGemmConfig(256, 128, 64, self.default_num_stages, 8, group_m=4), + ROCmGemmConfig(256, 128, 64, self.default_num_stages, 8, group_m=16, matrix_instr_nonkdim=0), + ROCmGemmConfig(256, 128, 64, self.default_num_stages, 8, group_m=8, matrix_instr_nonkdim=0), + ROCmGemmConfig(128, 128, 64, self.default_num_stages, 8, group_m=4, matrix_instr_nonkdim=0), ROCmGemmConfig(256, 256, 64, self.default_num_stages, 8, group_m=4), ] diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 9b1665c67d0b3..f9802160c6164 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -19233,6 +19233,17 @@ def sample_inputs_abs(op_info, device, dtype, requires_grad, op_kwargs=None, **k 'TestSchemaCheckModeOpInfo', 'test_schema_correctness', dtypes=(torch.complex64, torch.complex128)), +<<<<<<< HEAD +======= + DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_out', + device_type='mps', dtypes=[torch.float32]), + DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_variant_consistency_eager', + device_type='mps', dtypes=[torch.float32]), + DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', + device_type='mps', dtypes=[torch.float32]), + # The operator 'aten::take' is not currently implemented for the MPS device + DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_out_warning', device_type='mps'), +>>>>>>> upstream/release/2.11 )), OpInfo('svd_lowrank', op=lambda *args, **kwargs: wrapper_set_seed( diff --git a/torch/testing/_internal/opinfo/definitions/linalg.py b/torch/testing/_internal/opinfo/definitions/linalg.py index b96a66af8db95..381bcddd547db 100644 --- a/torch/testing/_internal/opinfo/definitions/linalg.py +++ b/torch/testing/_internal/opinfo/definitions/linalg.py @@ -2630,6 +2630,16 @@ def make_input(): dtypes=[torch.float32], active_if=TEST_WITH_ROCM, ), +<<<<<<< HEAD +======= + # MPS: AssertionError: The values for attribute 'shape' do not match: torch.Size([0, 0]) != torch.Size([0, 1]). + DecorateInfo( + unittest.expectedFailure, + "TestCommon", + "test_out_warning", + device_type="mps", + ), +>>>>>>> upstream/release/2.11 ), ), OpInfo( diff --git a/version.txt b/version.txt index d8b698973a491..b28bec75bff6d 100644 --- a/version.txt +++ b/version.txt @@ -1 +1,5 @@ +<<<<<<< HEAD 2.12.0 +======= +2.11.0 +>>>>>>> upstream/release/2.11