fix(ci): pin onnxruntime>=1.20.0 to ensure INT4 quantization support

jdalton · jdalton · commit 23482682ab47 · 2025-10-31T15:48:58.000-04:00
- Specify minimum version 1.20.0 which has stable matmul_4bits_quantizer
- Install optimum without [onnxruntime] extra to avoid version downgrades
- Add package version diagnostics for debugging
- Fail fast if INT4 quantization is not available (instead of silent fallback)
diff --git a/.github/workflows/build-wasm.yml b/.github/workflows/build-wasm.yml
@@ -163,10 +163,13 @@ jobs:
         run: |
           echo "::group::Installing Python ML dependencies"
           pip install --no-cache-dir torch transformers
-          pip install --no-cache-dir --upgrade onnx onnxruntime onnxruntime-tools
-          pip install --no-cache-dir optimum[onnxruntime]
+          pip install --no-cache-dir "onnx>=1.15.0" "onnxruntime>=1.20.0"
+          pip install --no-cache-dir optimum
+          echo "Installed packages:"
+          pip list | grep -E "(onnx|optimum|torch)"
+          echo ""
           python3 -c "import onnxruntime; print(f'ONNX Runtime version: {onnxruntime.__version__}')"
-          python3 -c "from onnxruntime.quantization import matmul_4bits_quantizer; print('INT4 quantization available')" || echo "INT4 quantization not available - will use FP32 fallback"
+          python3 -c "from onnxruntime.quantization import matmul_4bits_quantizer; print('✓ INT4 quantization available')"
           echo "::endgroup::"
 
       - name: Install dependencies