Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
659af3c353e49b35c191cdd2dba3b3c79d0e6822
release/2.11
4 changes: 2 additions & 2 deletions .ci/docker/common/install_pytorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ install_pytorch_and_domains() {
pip_install "$(echo dist/*.whl)"

# Grab the pinned audio and vision commits from PyTorch
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
TORCHAUDIO_VERSION=release/2.11
export TORCHAUDIO_VERSION
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
TORCHVISION_VERSION=release/0.26
export TORCHVISION_VERSION
Comment on lines +35 to 38

install_domains
Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/test_model_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ if [ "$AUDIO_URL" != "" ]; then
elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
conda install -y -c conda-forge "ffmpeg<8"
pip install datasets soundfile
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
fi

Expand Down
22 changes: 11 additions & 11 deletions .ci/scripts/test_wheel_package_qnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,17 @@ print(module_vars["TORCH_VERSION"])
PY
)

NIGHTLY_VERSION=$(
"$PYBIN" - <<'PY'
import runpy
module_vars = runpy.run_path("torch_pin.py")
print(module_vars["NIGHTLY_VERSION"])
PY
)
echo "=== [$LABEL] Install torch==${TORCH_VERSION}.${NIGHTLY_VERSION} ==="

# Install torchao based on the pinned PyTorch version
"$PIPBIN" install torch=="${TORCH_VERSION}.${NIGHTLY_VERSION}" --index-url "https://download.pytorch.org/whl/nightly/cpu"
# NIGHTLY_VERSION=$(
# "$PYBIN" - <<'PY'
# import runpy
# module_vars = runpy.run_path("torch_pin.py")
# print(module_vars["NIGHTLY_VERSION"])
# PY
# )
echo "=== [$LABEL] Install torch==${TORCH_VERSION} ==="

# Install torch based on the pinned PyTorch version, preferring the PyTorch test index
"$PIPBIN" install torch=="${TORCH_VERSION}" --extra-index-url "https://download.pytorch.org/whl/test"
"$PIPBIN" install wheel

# Install torchao based on the pinned commit from third-party/ao submodule
Expand Down
6 changes: 3 additions & 3 deletions .ci/scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ dedupe_macos_loader_path_rpaths() {
pushd ..
torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib
popd

if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then
return
fi
Expand Down Expand Up @@ -141,9 +141,9 @@ install_pytorch_and_domains() {

dedupe_macos_loader_path_rpaths
# Grab the pinned audio and vision commits from PyTorch
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
TORCHAUDIO_VERSION=release/2.11
export TORCHAUDIO_VERSION
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
TORCHVISION_VERSION=release/0.26
export TORCHVISION_VERSION
Comment on lines +144 to 147

install_domains
Expand Down
2 changes: 1 addition & 1 deletion examples/models/moshi/mimi/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
set -x

sudo apt install ffmpeg -y
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu
pip install moshi==0.2.11
pip install bitsandbytes soundfile einops
# Run llama2/install requirements for torchao deps
Expand Down
8 changes: 3 additions & 5 deletions examples/models/parakeet/export_parakeet_tdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,13 +508,11 @@ def _create_metal_partitioners(programs):

# Run decompositions for non-preprocessor programs
updated_programs = {}
decomp_table = torch.export.default_decompositions()
decomp_table[torch.ops.aten.linear.default] = _linear_bias_decomposition
for key, ep in programs.items():
# print(f"Running decompositions for {key}")
# print(ep.graph_module)
if key != "preprocessor":
updated_programs[key] = ep.run_decompositions(
{torch.ops.aten.linear.default: _linear_bias_decomposition}
)
updated_programs[key] = ep.run_decompositions(decomp_table)
else:
updated_programs[key] = ep

Expand Down
6 changes: 3 additions & 3 deletions examples/models/voxtral_realtime/export_voxtral_rt.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,10 @@ def lower_to_executorch(programs, metadata, backend="xnnpack"):

# Run decompositions for Metal backend
updated_programs = {}
decomp_table = torch.export.default_decompositions()
decomp_table[torch.ops.aten.linear.default] = _linear_bias_decomposition
for key, ep in programs.items():
updated_programs[key] = ep.run_decompositions(
{torch.ops.aten.linear.default: _linear_bias_decomposition}
)
updated_programs[key] = ep.run_decompositions(decomp_table)
programs = updated_programs

partitioner = {}
Expand Down
6 changes: 5 additions & 1 deletion exir/sym_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ def eval_expr(symint: Union[int, torch.SymInt]) -> Optional[int]:
shape_env = node.shape_env
expr = node.expr
try:
output = shape_env.size_hint(expr)
if hasattr(shape_env, "guarding_hint_or_throw"):
output = shape_env.guarding_hint_or_throw(expr)
else:
# size_hint is deprecated, delete this code path.
output = shape_env.size_hint(expr)
except torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode:
return None
return int(output)
Expand Down
26 changes: 6 additions & 20 deletions install_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@

from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible

from torch_pin import NIGHTLY_VERSION, TORCH_VERSION

# The pip repository that hosts nightly torch packages.
# This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled.
TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly"
TORCH_URL_BASE = "https://download.pytorch.org/whl/test"

# Since ExecuTorch often uses main-branch features of pytorch, only the nightly
# pip versions will have the required features.
Comment on lines 15 to 20
Expand Down Expand Up @@ -44,18 +42,14 @@ def install_requirements(use_pytorch_nightly):
sys.exit(1)

# Determine the appropriate PyTorch URL based on CUDA delegate status
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE)
torch_url = determine_torch_url(TORCH_URL_BASE)

# pip packages needed by exir.
TORCH_PACKAGE = [
# Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note
# that we don't need to set any version number there because they have already
# been installed on CI before this step, so pip won't reinstall them
(
f"torch=={TORCH_VERSION}.{NIGHTLY_VERSION}"
if use_pytorch_nightly
else "torch"
),
("torch==2.11.0" if use_pytorch_nightly else "torch"),
]

# Install the requirements for core ExecuTorch package.
Expand Down Expand Up @@ -114,20 +108,12 @@ def install_requirements(use_pytorch_nightly):

def install_optional_example_requirements(use_pytorch_nightly):
# Determine the appropriate PyTorch URL based on CUDA delegate status
torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE)
torch_url = determine_torch_url(TORCH_URL_BASE)

print("Installing torch domain libraries")
DOMAIN_LIBRARIES = [
(
f"torchvision==0.26.0.{NIGHTLY_VERSION}"
if use_pytorch_nightly
else "torchvision"
),
(
f"torchaudio==2.11.0.{NIGHTLY_VERSION}"
if use_pytorch_nightly
else "torchaudio"
),
("torchvision==0.26.0" if use_pytorch_nightly else "torchvision"),
("torchaudio==2.11.0" if use_pytorch_nightly else "torchaudio"),
]
Comment on lines 48 to 117
# Then install domain libraries
subprocess.run(
Expand Down
35 changes: 35 additions & 0 deletions runtime/core/portable_type/c10/c10/util/complex_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,41 @@ C10_HOST_DEVICE inline c10::complex<T> pow(
#endif
}

// Regression in ROCm 7.2. See https://github.com/ROCm/rocm-libraries/pull/3836.
// Specialized version for complex<float> on AMD GPUs to use FMA-based
// multiplication
#if defined(__HIPCC__)
namespace detail {
// FMA-aware complex multiplication for float precision on AMD GPUs.
// This prevents SLP vectorizer from breaking FMA formation, which causes
// numerical precision loss in complex arithmetic.
// The issue occurs when vectorizer packs scalar multiplies before backend
// can form FMA instructions, resulting in double rounding instead of single.
C10_HOST_DEVICE inline thrust::complex<float> complex_mul_fma(
thrust::complex<float> a,
thrust::complex<float> b) {
// Complex multiplication: (a.r + a.i*i) * (b.r + b.i*i)
// = (a.r*b.r - a.i*b.i) + (a.r*b.i + a.i*b.r)*i
// Using __builtin_fmaf ensures FMA at source level:
// real: a.r*b.r + (-(a.i*b.i)) = FMA(a.r, b.r, -(a.i*b.i))
// imag: a.i*b.r + a.r*b.i = FMA(a.r, b.i, a.i*b.r)
float real_part = __builtin_fmaf(a.real(), b.real(), -(a.imag() * b.imag()));
float imag_part = __builtin_fmaf(a.real(), b.imag(), a.imag() * b.real());
return thrust::complex<float>(real_part, imag_part);
}
} // namespace detail

template <>
C10_HOST_DEVICE inline c10::complex<float> pow(
const c10::complex<float>& x,
const c10::complex<float>& y) {
auto log_x = thrust::log(static_cast<thrust::complex<float>>(x));
auto y_log_x =
detail::complex_mul_fma(static_cast<thrust::complex<float>>(y), log_x);
return static_cast<c10::complex<float>>(thrust::exp(y_log_x));
}
#endif

template <typename T>
C10_HOST_DEVICE inline c10::complex<T> pow(
const c10::complex<T>& x,
Expand Down
2 changes: 1 addition & 1 deletion torch_pin.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
TORCH_VERSION = "2.11.0"
NIGHTLY_VERSION = "dev20260215"
# NIGHTLY_VERSION = "dev20260318" Temporarily pinning to stable release candidate. Revert https://github.com/pytorch/executorch/pull/18287
Loading