Skip to content

Commit 64d637b

Browse files
committed
Update torch pin nightly to 20260310
1 parent 1e17e28 commit 64d637b

File tree

7 files changed

+44
-6
lines changed

7 files changed

+44
-6
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
659af3c353e49b35c191cdd2dba3b3c79d0e6822
1+
08b6f48d871affbc7abe9277020aed882fdf110a

.ci/scripts/test_model_e2e.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ if [ "$AUDIO_URL" != "" ]; then
248248
elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
249249
conda install -y -c conda-forge "ffmpeg<8"
250250
pip install datasets soundfile
251-
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
251+
pip install torchcodec==0.11.0.dev20260312 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
252252
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
253253
fi
254254

examples/models/moshi/mimi/install_requirements.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
set -x
99

1010
sudo apt install ffmpeg -y
11-
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
11+
pip install torchcodec==0.11.0.dev20260312 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
1212
pip install moshi==0.2.11
1313
pip install bitsandbytes soundfile einops
1414
# Run llama2/install requirements for torchao deps

exir/sym_util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ def eval_expr(symint: Union[int, torch.SymInt]) -> Optional[int]:
2525
shape_env = node.shape_env
2626
expr = node.expr
2727
try:
28-
output = shape_env.size_hint(expr)
28+
if hasattr(shape_env, "guarding_hint_or_throw"):
29+
output = shape_env.guarding_hint_or_throw(expr)
30+
else:
31+
output = shape_env.size_hint(expr)
2932
except torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode:
3033
return None
3134
return int(output)

runtime/core/portable_type/c10/c10/util/complex_math.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,41 @@ C10_HOST_DEVICE inline c10::complex<T> pow(
8686
#endif
8787
}
8888

89+
// Regression in ROCm 7.2. See https://github.com/ROCm/rocm-libraries/pull/3836.
90+
// Specialized version for complex<float> on AMD GPUs to use FMA-based
91+
// multiplication
92+
#if defined(__HIPCC__)
93+
namespace detail {
94+
// FMA-aware complex multiplication for float precision on AMD GPUs.
95+
// This prevents SLP vectorizer from breaking FMA formation, which causes
96+
// numerical precision loss in complex arithmetic.
97+
// The issue occurs when vectorizer packs scalar multiplies before backend
98+
// can form FMA instructions, resulting in double rounding instead of single.
99+
C10_HOST_DEVICE inline thrust::complex<float> complex_mul_fma(
100+
thrust::complex<float> a,
101+
thrust::complex<float> b) {
102+
// Complex multiplication: (a.r + a.i*i) * (b.r + b.i*i)
103+
// = (a.r*b.r - a.i*b.i) + (a.r*b.i + a.i*b.r)*i
104+
// Using __builtin_fmaf ensures FMA at source level:
105+
// real: a.r*b.r + (-(a.i*b.i)) = FMA(a.r, b.r, -(a.i*b.i))
106+
// imag: a.i*b.r + a.r*b.i = FMA(a.r, b.i, a.i*b.r)
107+
float real_part = __builtin_fmaf(a.real(), b.real(), -(a.imag() * b.imag()));
108+
float imag_part = __builtin_fmaf(a.real(), b.imag(), a.imag() * b.real());
109+
return thrust::complex<float>(real_part, imag_part);
110+
}
111+
} // namespace detail
112+
113+
template <>
114+
C10_HOST_DEVICE inline c10::complex<float> pow(
115+
const c10::complex<float>& x,
116+
const c10::complex<float>& y) {
117+
auto log_x = thrust::log(static_cast<thrust::complex<float>>(x));
118+
auto y_log_x =
119+
detail::complex_mul_fma(static_cast<thrust::complex<float>>(y), log_x);
120+
return static_cast<c10::complex<float>>(thrust::exp(y_log_x));
121+
}
122+
#endif
123+
89124
template <typename T>
90125
C10_HOST_DEVICE inline c10::complex<T> pow(
91126
const c10::complex<T>& x,

runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ __host__ __device__
629629
// This macro is used to find older C++ compilers
630630
// that don't support move optimization for return values.
631631

632-
#if (defined(__GNUC__) && __GNUC__ < 13) || \
632+
#if (defined(__GNUC__) && __GNUC__ < 13 && __cplusplus < 202002L) || \
633633
(defined(__clang_major__) && __clang_major__ < 13)
634634
#define C10_RETURN_MOVE_IF_OLD_COMPILER 1
635635
#else

torch_pin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
TORCH_VERSION = "2.11.0"
2-
NIGHTLY_VERSION = "dev20260215"
2+
NIGHTLY_VERSION = "dev20260312"

0 commit comments

Comments
 (0)