Skip to content

Commit edb7b68

Browse files
committed
Update torch pin nightly to 20260310
1 parent 1e17e28 commit edb7b68

9 files changed

Lines changed: 61 additions & 103 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
659af3c353e49b35c191cdd2dba3b3c79d0e6822
1+
08b6f48d871affbc7abe9277020aed882fdf110a

.ci/scripts/test_model_e2e.sh

Lines changed: 15 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,9 @@ Arguments:
1919
hf_model HuggingFace model ID (required)
2020
Supported models:
2121
- mistralai/Voxtral-Mini-3B-2507
22-
- nvidia/diar_streaming_sortformer_4spk-v2
2322
- openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
2423
- google/gemma-3-4b-it
25-
- Qwen/Qwen3-0.6B
2624
- nvidia/parakeet-tdt
27-
- facebook/dinov2-small-imagenet1k-1-layer
2825
- mistralai/Voxtral-Mini-4B-Realtime-2602
2926
3027
quant_name Quantization type (required)
@@ -46,7 +43,6 @@ Arguments:
4643
Examples:
4744
test_model_e2e.sh metal "openai/whisper-small" "non-quantized"
4845
test_model_e2e.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed" "./model_output"
49-
test_model_e2e.sh cuda "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./model_output"
5046
test_model_e2e.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./model_output"
5147
test_model_e2e.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./model_output"
5248
test_model_e2e.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming"
@@ -155,18 +151,6 @@ case "$HF_MODEL" in
155151
AUDIO_FILE=""
156152
IMAGE_PATH="docs/source/_static/img/et-logo.png"
157153
;;
158-
Qwen/Qwen3-0.6B)
159-
MODEL_NAME="qwen3"
160-
RUNNER_TARGET="llama_main"
161-
RUNNER_PATH="llama"
162-
EXPECTED_OUTPUT="Paris"
163-
PREPROCESSOR=""
164-
TOKENIZER_URL="https://huggingface.co/Qwen/Qwen3-0.6B/resolve/main" # @lint-ignore
165-
TOKENIZER_FILE=""
166-
AUDIO_URL=""
167-
AUDIO_FILE=""
168-
IMAGE_PATH=""
169-
;;
170154
nvidia/parakeet-tdt)
171155
MODEL_NAME="parakeet"
172156
RUNNER_TARGET="parakeet_runner"
@@ -179,31 +163,6 @@ case "$HF_MODEL" in
179163
AUDIO_FILE="test_audio.wav"
180164
IMAGE_PATH=""
181165
;;
182-
nvidia/diar_streaming_sortformer_4spk-v2)
183-
MODEL_NAME="sortformer"
184-
RUNNER_TARGET="sortformer_runner"
185-
RUNNER_PATH="sortformer"
186-
EXPECTED_OUTPUT="Speaker 1"
187-
PREPROCESSOR=""
188-
TOKENIZER_URL=""
189-
TOKENIZER_FILE=""
190-
AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
191-
AUDIO_FILE="poem.wav"
192-
IMAGE_PATH=""
193-
;;
194-
facebook/dinov2-small-imagenet1k-1-layer)
195-
MODEL_NAME="dinov2"
196-
RUNNER_TARGET="dinov2_runner"
197-
RUNNER_PATH="dinov2"
198-
EXPECTED_OUTPUT="Samoyed"
199-
PREPROCESSOR=""
200-
TOKENIZER_URL=""
201-
TOKENIZER_FILE=""
202-
AUDIO_URL=""
203-
AUDIO_FILE=""
204-
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
205-
IMAGE_PATH=""
206-
;;
207166
mistralai/Voxtral-Mini-4B-Realtime-2602)
208167
MODEL_NAME="voxtral_realtime"
209168
RUNNER_TARGET="voxtral_realtime_runner"
@@ -218,7 +177,7 @@ case "$HF_MODEL" in
218177
;;
219178
*)
220179
echo "Error: Unsupported model '$HF_MODEL'"
221-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
180+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, nvidia/parakeet-tdt"
222181
exit 1
223182
;;
224183
esac
@@ -231,8 +190,8 @@ echo "::endgroup::"
231190
echo "::group::Prepare $MODEL_NAME Artifacts"
232191

233192

234-
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
235-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
193+
# Download tokenizer files (skip for parakeet and voxtral_realtime which bundle tokenizer in export)
194+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ]; then
236195
if [ "$TOKENIZER_FILE" != "" ]; then
237196
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
238197
else
@@ -248,15 +207,10 @@ if [ "$AUDIO_URL" != "" ]; then
248207
elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
249208
conda install -y -c conda-forge "ffmpeg<8"
250209
pip install datasets soundfile
251-
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
210+
pip install torchcodec==0.11.0.dev20260312 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
252211
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
253212
fi
254213

255-
# Download test image for vision models
256-
if [ -n "${IMAGE_URL:-}" ]; then
257-
curl -L "$IMAGE_URL" -o "${MODEL_DIR}/test_image.jpg"
258-
fi
259-
260214
ls -al
261215
echo "::endgroup::"
262216

@@ -292,14 +246,9 @@ if [ "$(uname -s)" = "Darwin" ] && [ -f "$RUNNER_BIN" ]; then
292246
install_name_tool -change /opt/llvm-openmp/lib/libomp.dylib @rpath/libomp.dylib "$RUNNER_BIN"
293247
fi
294248
fi
295-
# For CUDA, add named data argument (Metal embeds data in .pte).
296-
# Llama runner uses --data_paths, other runners use --data_path.
249+
# For CUDA, add data_path argument (Metal embeds data in .pte)
297250
if [ "$DEVICE" = "cuda" ]; then
298-
if [ "$RUNNER_PATH" = "llama" ]; then
299-
RUNNER_ARGS="$RUNNER_ARGS --data_paths ${MODEL_DIR}/aoti_cuda_blob.ptd"
300-
else
301-
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
302-
fi
251+
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
303252
fi
304253

305254
# Add model-specific arguments
@@ -313,44 +262,26 @@ case "$MODEL_NAME" in
313262
gemma3)
314263
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --image_path $IMAGE_PATH"
315264
;;
316-
qwen3)
317-
PROMPT_FILE="${MODEL_DIR}/qwen3_prompt.txt"
318-
cat > "${PROMPT_FILE}" << 'EOF'
319-
<|im_start|>user
320-
What is the capital of France?<|im_end|>
321-
<|im_start|>assistant
322-
EOF
323-
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/ --prompt_file ${PROMPT_FILE}"
324-
;;
325265
parakeet)
326266
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --audio_path ${MODEL_DIR}/$AUDIO_FILE --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE"
327267
# For CUDA, add data_path argument (Metal embeds data in .pte)
328268
if [ "$DEVICE" = "cuda" ]; then
329269
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
330270
fi
331271
;;
332-
sortformer)
333-
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --audio_path ${MODEL_DIR}/$AUDIO_FILE"
334-
if [ "$DEVICE" = "cuda" ]; then
335-
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
336-
fi
337-
;;
338-
dinov2)
339-
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
340-
if [ "$DEVICE" = "cuda" ]; then
341-
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342-
fi
343-
;;
344272
voxtral_realtime)
345273
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
346-
# Add CUDA data path if present
347-
if [ "$DEVICE" = "cuda" ] && [ -f "${MODEL_DIR}/aoti_cuda_blob.ptd" ]; then
348-
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
349-
fi
350274
# Determine streaming mode based on MODE parameter
351-
USE_STREAMING="true"
352-
if [ "$MODE" = "vr-offline" ]; then
275+
USE_STREAMING="false"
276+
if [ "$MODE" = "vr-streaming" ]; then
277+
USE_STREAMING="true"
278+
elif [ "$MODE" = "vr-offline" ]; then
353279
USE_STREAMING="false"
280+
elif [ -z "$MODE" ]; then
281+
# Auto-detect: XNNPACK uses streaming, others use offline
282+
if [ "$DEVICE" = "xnnpack" ]; then
283+
USE_STREAMING="true"
284+
fi
354285
fi
355286
# Add streaming flag if needed
356287
if [ "$USE_STREAMING" = "true" ]; then

.ci/scripts/test_wheel_package_qnn.sh

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ import argparse
1818
1919
import torch
2020
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
21-
from executorch.backends.qualcomm.serialization.qc_schema import (
22-
QnnExecuTorchBackendType,
23-
)
2421
from executorch.backends.qualcomm.utils.utils import (
2522
generate_htp_compiler_spec,
2623
generate_qnn_executorch_compiler_spec,
@@ -53,7 +50,7 @@ def main() -> None:
5350
example_inputs = model.get_example_inputs()
5451
5552
if args.quantization:
56-
quantizer = QnnQuantizer(backend=QnnExecuTorchBackendType.kHtpBackend, soc_model=get_soc_to_chipset_map()[args.soc])
53+
quantizer = QnnQuantizer()
5754
m = torch.export.export(model.eval(), example_inputs, strict=True).module()
5855
if args.quantization == "qat":
5956
m = prepare_qat_pt2e(m, quantizer)
@@ -89,14 +86,6 @@ EOF
8986
# ----------------------------
9087
echo "=== Building Wheel Package ==="
9188
source .ci/scripts/utils.sh
92-
93-
# Ensure QNN SDK is available so setup.py auto-detects it.
94-
source backends/qualcomm/scripts/install_qnn_sdk.sh
95-
install_qnn
96-
97-
# Make QNN SDK libraries available for runtime loading (e.g. libQnnHtp.so)
98-
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
99-
10089
install_executorch
10190
EXECUTORCH_BUILDING_WHEEL=1 python setup.py bdist_wheel
10291
unset EXECUTORCH_BUILDING_WHEEL

.ci/scripts/utils.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@ install_pytorch_and_domains() {
141141

142142
dedupe_macos_loader_path_rpaths
143143
# Grab the pinned audio and vision commits from PyTorch
144-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
144+
TORCHAUDIO_VERSION=release/2.11
145145
export TORCHAUDIO_VERSION
146-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
146+
TORCHVISION_VERSION=release/0.26
147147
export TORCHVISION_VERSION
148148

149149
install_domains

examples/models/moshi/mimi/install_requirements.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
set -x
99

1010
sudo apt install ffmpeg -y
11-
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
11+
pip install torchcodec==0.11.0.dev20260312 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
1212
pip install moshi==0.2.11
1313
pip install bitsandbytes soundfile einops
1414
# Run llama2/install requirements for torchao deps

exir/sym_util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ def eval_expr(symint: Union[int, torch.SymInt]) -> Optional[int]:
2525
shape_env = node.shape_env
2626
expr = node.expr
2727
try:
28-
output = shape_env.size_hint(expr)
28+
if hasattr(shape_env, "guarding_hint_or_throw"):
29+
output = shape_env.guarding_hint_or_throw(expr)
30+
else:
31+
output = shape_env.size_hint(expr)
2932
except torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode:
3033
return None
3134
return int(output)

runtime/core/portable_type/c10/c10/util/complex_math.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,41 @@ C10_HOST_DEVICE inline c10::complex<T> pow(
8686
#endif
8787
}
8888

89+
// Regression in ROCm 7.2. See https://github.com/ROCm/rocm-libraries/pull/3836.
90+
// Specialized version for complex<float> on AMD GPUs to use FMA-based
91+
// multiplication
92+
#if defined(__HIPCC__)
93+
namespace detail {
94+
// FMA-aware complex multiplication for float precision on AMD GPUs.
95+
// This prevents SLP vectorizer from breaking FMA formation, which causes
96+
// numerical precision loss in complex arithmetic.
97+
// The issue occurs when vectorizer packs scalar multiplies before backend
98+
// can form FMA instructions, resulting in double rounding instead of single.
99+
C10_HOST_DEVICE inline thrust::complex<float> complex_mul_fma(
100+
thrust::complex<float> a,
101+
thrust::complex<float> b) {
102+
// Complex multiplication: (a.r + a.i*i) * (b.r + b.i*i)
103+
// = (a.r*b.r - a.i*b.i) + (a.r*b.i + a.i*b.r)*i
104+
// Using __builtin_fmaf ensures FMA at source level:
105+
// real: a.r*b.r + (-(a.i*b.i)) = FMA(a.r, b.r, -(a.i*b.i))
106+
// imag: a.i*b.r + a.r*b.i = FMA(a.r, b.i, a.i*b.r)
107+
float real_part = __builtin_fmaf(a.real(), b.real(), -(a.imag() * b.imag()));
108+
float imag_part = __builtin_fmaf(a.real(), b.imag(), a.imag() * b.real());
109+
return thrust::complex<float>(real_part, imag_part);
110+
}
111+
} // namespace detail
112+
113+
template <>
114+
C10_HOST_DEVICE inline c10::complex<float> pow(
115+
const c10::complex<float>& x,
116+
const c10::complex<float>& y) {
117+
auto log_x = thrust::log(static_cast<thrust::complex<float>>(x));
118+
auto y_log_x =
119+
detail::complex_mul_fma(static_cast<thrust::complex<float>>(y), log_x);
120+
return static_cast<c10::complex<float>>(thrust::exp(y_log_x));
121+
}
122+
#endif
123+
89124
template <typename T>
90125
C10_HOST_DEVICE inline c10::complex<T> pow(
91126
const c10::complex<T>& x,

runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ __host__ __device__
629629
// This macro is used to find older C++ compilers
630630
// that don't support move optimization for return values.
631631

632-
#if (defined(__GNUC__) && __GNUC__ < 13) || \
632+
#if (defined(__GNUC__) && __GNUC__ < 13 && __cplusplus < 202002L) || \
633633
(defined(__clang_major__) && __clang_major__ < 13)
634634
#define C10_RETURN_MOVE_IF_OLD_COMPILER 1
635635
#else

torch_pin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
TORCH_VERSION = "2.11.0"
2-
NIGHTLY_VERSION = "dev20260215"
2+
NIGHTLY_VERSION = "dev20260312"

0 commit comments

Comments
 (0)