diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index 96c16e31ac4..f6e39a63b92 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -659af3c353e49b35c191cdd2dba3b3c79d0e6822 +release/2.11 \ No newline at end of file diff --git a/.ci/docker/common/install_pytorch.sh b/.ci/docker/common/install_pytorch.sh index 9809b6a8e3c..548a24f885d 100755 --- a/.ci/docker/common/install_pytorch.sh +++ b/.ci/docker/common/install_pytorch.sh @@ -32,9 +32,9 @@ install_pytorch_and_domains() { pip_install "$(echo dist/*.whl)" # Grab the pinned audio and vision commits from PyTorch - TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt) + TORCHAUDIO_VERSION=release/2.11 export TORCHAUDIO_VERSION - TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt) + TORCHVISION_VERSION=release/0.26 export TORCHVISION_VERSION install_domains diff --git a/.ci/scripts/test_model_e2e.sh b/.ci/scripts/test_model_e2e.sh index cb7785036d3..7014b3caef6 100755 --- a/.ci/scripts/test_model_e2e.sh +++ b/.ci/scripts/test_model_e2e.sh @@ -248,7 +248,7 @@ if [ "$AUDIO_URL" != "" ]; then elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then conda install -y -c conda-forge "ffmpeg<8" pip install datasets soundfile - pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu + pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])" fi diff --git a/.ci/scripts/test_wheel_package_qnn.sh b/.ci/scripts/test_wheel_package_qnn.sh index 861541b3083..1c66426f0af 100644 --- a/.ci/scripts/test_wheel_package_qnn.sh +++ b/.ci/scripts/test_wheel_package_qnn.sh @@ -158,17 +158,17 @@ print(module_vars["TORCH_VERSION"]) PY ) - NIGHTLY_VERSION=$( - "$PYBIN" - <<'PY' -import runpy -module_vars = runpy.run_path("torch_pin.py") -print(module_vars["NIGHTLY_VERSION"]) -PY -) - echo "=== [$LABEL] Install torch==${TORCH_VERSION}.${NIGHTLY_VERSION} ===" - - # Install torchao based on the pinned PyTorch version - "$PIPBIN" install torch=="${TORCH_VERSION}.${NIGHTLY_VERSION}" --index-url "https://download.pytorch.org/whl/nightly/cpu" +# NIGHTLY_VERSION=$( +# "$PYBIN" - <<'PY' +# import runpy +# module_vars = runpy.run_path("torch_pin.py") +# print(module_vars["NIGHTLY_VERSION"]) +# PY +# ) + echo "=== [$LABEL] Install torch==${TORCH_VERSION} ===" + + # Install torch based on the pinned PyTorch version, preferring the PyTorch test index + "$PIPBIN" install torch=="${TORCH_VERSION}" --extra-index-url "https://download.pytorch.org/whl/test" "$PIPBIN" install wheel # Install torchao based on the pinned commit from third-party/ao submodule diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index 18038e36831..86e54b478ef 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -53,7 +53,7 @@ dedupe_macos_loader_path_rpaths() { pushd .. torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib popd - + if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then return fi @@ -141,9 +141,9 @@ install_pytorch_and_domains() { dedupe_macos_loader_path_rpaths # Grab the pinned audio and vision commits from PyTorch - TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt) + TORCHAUDIO_VERSION=release/2.11 export TORCHAUDIO_VERSION - TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt) + TORCHVISION_VERSION=release/0.26 export TORCHVISION_VERSION install_domains diff --git a/examples/models/moshi/mimi/install_requirements.sh b/examples/models/moshi/mimi/install_requirements.sh index de179dc8c92..9fc12f64bc9 100755 --- a/examples/models/moshi/mimi/install_requirements.sh +++ b/examples/models/moshi/mimi/install_requirements.sh @@ -8,7 +8,7 @@ set -x sudo apt install ffmpeg -y -pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu +pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu pip install moshi==0.2.11 pip install bitsandbytes soundfile einops # Run llama2/install requirements for torchao deps diff --git a/examples/models/parakeet/export_parakeet_tdt.py b/examples/models/parakeet/export_parakeet_tdt.py index f3ed0d2b070..8dd9accd866 100644 --- a/examples/models/parakeet/export_parakeet_tdt.py +++ b/examples/models/parakeet/export_parakeet_tdt.py @@ -508,13 +508,11 @@ def _create_metal_partitioners(programs): # Run decompositions for non-preprocessor programs updated_programs = {} + decomp_table = torch.export.default_decompositions() + decomp_table[torch.ops.aten.linear.default] = _linear_bias_decomposition for key, ep in programs.items(): - # print(f"Running decompositions for {key}") - # print(ep.graph_module) if key != "preprocessor": - updated_programs[key] = ep.run_decompositions( - {torch.ops.aten.linear.default: _linear_bias_decomposition} - ) + updated_programs[key] = ep.run_decompositions(decomp_table) else: updated_programs[key] = ep diff --git a/examples/models/voxtral_realtime/export_voxtral_rt.py b/examples/models/voxtral_realtime/export_voxtral_rt.py index d3fc9323806..951f1f606d5 100644 --- a/examples/models/voxtral_realtime/export_voxtral_rt.py +++ b/examples/models/voxtral_realtime/export_voxtral_rt.py @@ -394,10 +394,10 @@ def lower_to_executorch(programs, metadata, backend="xnnpack"): # Run decompositions for Metal backend updated_programs = {} + decomp_table = torch.export.default_decompositions() + decomp_table[torch.ops.aten.linear.default] = _linear_bias_decomposition for key, ep in programs.items(): - updated_programs[key] = ep.run_decompositions( - {torch.ops.aten.linear.default: _linear_bias_decomposition} - ) + updated_programs[key] = ep.run_decompositions(decomp_table) programs = updated_programs partitioner = {} diff --git a/exir/sym_util.py b/exir/sym_util.py index 64f4b64a32a..10b8fbdc5dc 100644 --- a/exir/sym_util.py +++ b/exir/sym_util.py @@ -25,7 +25,11 @@ def eval_expr(symint: Union[int, torch.SymInt]) -> Optional[int]: shape_env = node.shape_env expr = node.expr try: - output = shape_env.size_hint(expr) + if hasattr(shape_env, "guarding_hint_or_throw"): + output = shape_env.guarding_hint_or_throw(expr) + else: + # size_hint is deprecated, delete this code path. + output = shape_env.size_hint(expr) except torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode: return None return int(output) diff --git a/install_requirements.py b/install_requirements.py index 56ba40ff113..b30068cbdb8 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -12,11 +12,9 @@ from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible -from torch_pin import NIGHTLY_VERSION, TORCH_VERSION - # The pip repository that hosts nightly torch packages. # This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled. -TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly" +TORCH_URL_BASE = "https://download.pytorch.org/whl/test" # Since ExecuTorch often uses main-branch features of pytorch, only the nightly # pip versions will have the required features. @@ -44,18 +42,14 @@ def install_requirements(use_pytorch_nightly): sys.exit(1) # Determine the appropriate PyTorch URL based on CUDA delegate status - torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE) + torch_url = determine_torch_url(TORCH_URL_BASE) # pip packages needed by exir. TORCH_PACKAGE = [ # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them - ( - f"torch=={TORCH_VERSION}.{NIGHTLY_VERSION}" - if use_pytorch_nightly - else "torch" - ), + ("torch==2.11.0" if use_pytorch_nightly else "torch"), ] # Install the requirements for core ExecuTorch package. @@ -114,20 +108,12 @@ def install_requirements(use_pytorch_nightly): def install_optional_example_requirements(use_pytorch_nightly): # Determine the appropriate PyTorch URL based on CUDA delegate status - torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE) + torch_url = determine_torch_url(TORCH_URL_BASE) print("Installing torch domain libraries") DOMAIN_LIBRARIES = [ - ( - f"torchvision==0.26.0.{NIGHTLY_VERSION}" - if use_pytorch_nightly - else "torchvision" - ), - ( - f"torchaudio==2.11.0.{NIGHTLY_VERSION}" - if use_pytorch_nightly - else "torchaudio" - ), + ("torchvision==0.26.0" if use_pytorch_nightly else "torchvision"), + ("torchaudio==2.11.0" if use_pytorch_nightly else "torchaudio"), ] # Then install domain libraries subprocess.run( diff --git a/runtime/core/portable_type/c10/c10/util/complex_math.h b/runtime/core/portable_type/c10/c10/util/complex_math.h index 2b591026c94..d369df50592 100644 --- a/runtime/core/portable_type/c10/c10/util/complex_math.h +++ b/runtime/core/portable_type/c10/c10/util/complex_math.h @@ -86,6 +86,41 @@ C10_HOST_DEVICE inline c10::complex pow( #endif } +// Regression in ROCm 7.2. See https://github.com/ROCm/rocm-libraries/pull/3836. +// Specialized version for complex on AMD GPUs to use FMA-based +// multiplication +#if defined(__HIPCC__) +namespace detail { +// FMA-aware complex multiplication for float precision on AMD GPUs. +// This prevents SLP vectorizer from breaking FMA formation, which causes +// numerical precision loss in complex arithmetic. +// The issue occurs when vectorizer packs scalar multiplies before backend +// can form FMA instructions, resulting in double rounding instead of single. +C10_HOST_DEVICE inline thrust::complex complex_mul_fma( + thrust::complex a, + thrust::complex b) { + // Complex multiplication: (a.r + a.i*i) * (b.r + b.i*i) + // = (a.r*b.r - a.i*b.i) + (a.r*b.i + a.i*b.r)*i + // Using __builtin_fmaf ensures FMA at source level: + // real: a.r*b.r + (-(a.i*b.i)) = FMA(a.r, b.r, -(a.i*b.i)) + // imag: a.i*b.r + a.r*b.i = FMA(a.r, b.i, a.i*b.r) + float real_part = __builtin_fmaf(a.real(), b.real(), -(a.imag() * b.imag())); + float imag_part = __builtin_fmaf(a.real(), b.imag(), a.imag() * b.real()); + return thrust::complex(real_part, imag_part); +} +} // namespace detail + +template <> +C10_HOST_DEVICE inline c10::complex pow( + const c10::complex& x, + const c10::complex& y) { + auto log_x = thrust::log(static_cast>(x)); + auto y_log_x = + detail::complex_mul_fma(static_cast>(y), log_x); + return static_cast>(thrust::exp(y_log_x)); +} +#endif + template C10_HOST_DEVICE inline c10::complex pow( const c10::complex& x, diff --git a/torch_pin.py b/torch_pin.py index 2dd1ac62f51..3575d9a376d 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ TORCH_VERSION = "2.11.0" -NIGHTLY_VERSION = "dev20260215" +# NIGHTLY_VERSION = "dev20260318" Temporarily pinning to stable release candidate. Revert https://github.com/pytorch/executorch/pull/18287