diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index fbe602375..c02884076 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -128,6 +128,14 @@ Remove-Item -Recurse -Force "$env:USERPROFILE\.cache\winml" The next `winml build` will re-create the cache as needed. Use `--rebuild` to force a full rebuild without relying on cached intermediates. +When a build runs out of disk space mid-write, `winml` now stops with a clear message instead of a misleading downstream error: + +```text +ONNXSaveError: Insufficient disk space — unable to write ONNX model to . Free up disk space and try again. +``` + +The partially written file is removed automatically, so a later stage never reads a truncated model. (Previously this surfaced much later as a confusing `ValueError: Failed to find proper ai.onnx domain` during quantization.) Free up space using the command above and re-run the build. + --- ## General Tips diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 8588935f4..1c3817b56 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -810,8 +810,14 @@ def _patch_device(cfg: WinMLBuildConfig) -> None: # Map common errors to actionable hints err_str = str(e) + err_lower = err_str.lower() hint = None - if "Quantization failed" in err_str: + if "disk space" in err_lower or "no space left" in err_lower: + hint = ( + "Free up disk space (e.g. clear the HuggingFace cache or " + "~/.cache/winml) and rebuild." + ) + elif "Quantization failed" in err_str: hint = "Try: --no-quant to skip quantization" elif "Compilation failed" in err_str: hint = "Try: --no-compile to skip compilation" diff --git a/src/winml/modelkit/commands/quantize.py b/src/winml/modelkit/commands/quantize.py index 4c8a77498..e93f63c98 100644 --- a/src/winml/modelkit/commands/quantize.py +++ b/src/winml/modelkit/commands/quantize.py @@ -333,6 +333,7 @@ def _run_multi_precision( """Execute a multi-pass quantization pipeline from ordered precision strings.""" from ..config.precision import extract_weight_bits from ..quant import Quantizer, WinMLQuantizationConfig, expand_precision + from ..quant.quantizer import _check_input_model_opset modes = [_cli_precision_to_mode(p) for p in precision] has_calibration_pass = any(m == "static" for m in modes) @@ -387,6 +388,17 @@ def _run_multi_precision( try: console.print(f"\n[bold]Running pipeline: {label}...[/bold]") + # Mirror quantize_onnx's input guard: the multi-precision path drives the + # Quantizer pipeline directly (bypassing quantize_onnx), so surface a + # clear disk-full/corruption error here too instead of ORT's opaque + # "Failed to find proper ai.onnx domain" deep inside a pass. A missing + # file is left to Quantizer.run(), which reports "Model not found". + opset_error = _check_input_model_opset(model) if model.exists() else None + if opset_error is not None: + console.print("\n[bold red]Pipeline failed:[/bold red]") + console.print(f" {opset_error}") + raise click.ClickException("Pipeline failed") + result = Quantizer(passes).run(model, output) if result.success: diff --git a/src/winml/modelkit/onnx/__init__.py b/src/winml/modelkit/onnx/__init__.py index 3b067e224..4c7fe312d 100644 --- a/src/winml/modelkit/onnx/__init__.py +++ b/src/winml/modelkit/onnx/__init__.py @@ -20,7 +20,7 @@ from .external_data import copy_onnx_model, get_onnx_model_hash from .io import InputTensorSpec, OutputTensorSpec, generate_inputs_from_onnx, get_io_config from .metadata import capture_metadata, restore_metadata -from .persistence import cleanup_onnx, load_onnx, save_onnx +from .persistence import ONNXSaveError, cleanup_onnx, load_onnx, save_onnx from .shape import infer_onnx_shapes, infer_shapes from .utils import EXTERNAL_DATA_THRESHOLD, check_onnx_model, get_model_size @@ -29,6 +29,7 @@ "EXTERNAL_DATA_THRESHOLD", "InputTensorSpec", "ONNXDomain", + "ONNXSaveError", "OutputTensorSpec", "SupportedONNXType", "capture_metadata", diff --git a/src/winml/modelkit/onnx/external_data.py b/src/winml/modelkit/onnx/external_data.py index 6fe61c74c..5363a1985 100644 --- a/src/winml/modelkit/onnx/external_data.py +++ b/src/winml/modelkit/onnx/external_data.py @@ -26,7 +26,7 @@ import onnx from onnx import external_data_helper -from .persistence import load_onnx, save_onnx +from .persistence import _cleanup_partial_save, _raise_save_error, load_onnx, save_onnx logger = logging.getLogger(__name__) @@ -219,23 +219,30 @@ def copy_onnx_model( dst.parent.mkdir(parents=True, exist_ok=True) try: - external_files = get_external_data_files(src) - except Exception: - # Not a valid ONNX file or can't parse — fall back to simple copy - shutil.copy2(src, dst) - return - - if not external_files: - # No external data — simple copy - shutil.copy2(src, dst) - return - - if len(external_files) == 1: - # Single external data file — copy .data + patch .onnx - _copy_single_external(src, dst, external_files[0]) - else: - # Multiple files — consolidate into one - _copy_consolidate(src, dst) + try: + external_files = get_external_data_files(src) + except Exception: + # Not a valid ONNX file or can't parse — fall back to simple copy + shutil.copy2(src, dst) + return + + if not external_files: + # No external data — simple copy + shutil.copy2(src, dst) + return + + if len(external_files) == 1: + # Single external data file — copy .data + patch .onnx + _copy_single_external(src, dst, external_files[0]) + else: + # Multiple files — consolidate into one + _copy_consolidate(src, dst) + except OSError as e: + # A failed copy (commonly disk-full) can leave a truncated destination + # and/or .data sidecar behind. Remove them and surface a clear error + # instead of letting a later stage load the corrupt model. + _cleanup_partial_save(dst, dst.parent / f"{dst.name}.data") + _raise_save_error(e, dst) logger.debug( "Copied ONNX model with external data: %s -> %s (%d data files)", diff --git a/src/winml/modelkit/onnx/persistence.py b/src/winml/modelkit/onnx/persistence.py index c4c71cad0..3a121e06a 100644 --- a/src/winml/modelkit/onnx/persistence.py +++ b/src/winml/modelkit/onnx/persistence.py @@ -12,9 +12,11 @@ from __future__ import annotations +import errno import logging import os from pathlib import Path +from typing import NoReturn import onnx from onnx.external_data_helper import _get_all_tensors, uses_external_data @@ -25,6 +27,92 @@ logger = logging.getLogger(__name__) +# Windows ERROR_DISK_FULL. Python usually maps this to errno.ENOSPC via the CRT, +# but we check the raw winerror too so a disk-full write is always recognised. +_WINDOWS_ERROR_DISK_FULL = 112 + + +class ONNXSaveError(OSError): + """Raised when an ONNX model cannot be written to disk. + + Subclasses :class:`OSError` so existing ``except OSError`` handlers keep + working and the original ``errno`` is preserved (see ``errno_code``), while + surfacing a clear, actionable message. This matters most for disk-full + conditions: without it, a failed write leaves a truncated/zero-byte + ``.onnx`` behind and the real cause only shows up much later as an opaque + opset-parsing error in a downstream stage. + + Note: + ``OSError.__init__`` only populates ``errno`` from a 2-argument + ``(errno, strerror)`` call, which would also rewrite ``str(self)`` as + ``"[Errno N] "``. To keep the clean message *and* preserve + ``errno`` for ``except OSError`` callers that inspect ``e.errno``, we + construct with the single message and set ``errno`` explicitly. + + Attributes: + path: Destination path that could not be written. + disk_full: ``True`` when the failure was caused by insufficient disk + space (``errno.ENOSPC`` / Windows ``ERROR_DISK_FULL``). + errno: The originating OS error code, when known (inherited from + :class:`OSError`). + """ + + def __init__( + self, + message: str, + *, + path: str | Path | None = None, + disk_full: bool = False, + errno_code: int | None = None, + ) -> None: + super().__init__(message) + # super().__init__(message) leaves self.errno = None; set it explicitly + # so callers catching this as OSError can still inspect e.errno. + if errno_code is not None: + self.errno = errno_code + self.path = path + self.disk_full = disk_full + + +def _is_disk_full_error(error: OSError) -> bool: + """Return ``True`` when *error* represents an out-of-disk-space condition.""" + return ( + error.errno == errno.ENOSPC + or getattr(error, "winerror", None) == _WINDOWS_ERROR_DISK_FULL + ) + + +def _cleanup_partial_save(*paths: Path | None) -> None: + """Best-effort removal of partial artifacts left by a failed write. + + A failed ``onnx.save_model`` / copy can leave a zero-byte or truncated + ``.onnx`` file (and ``.data`` sidecar) behind. Removing them prevents a + later stage from loading a corrupt model and reporting a misleading error. + """ + for partial in paths: + if partial is None: + continue + try: + Path(partial).unlink(missing_ok=True) + except OSError: + logger.debug("Could not remove partial artifact: %s", partial, exc_info=True) + + +def _raise_save_error(error: OSError, path: Path) -> NoReturn: + """Translate a write ``OSError`` into a clear :class:`ONNXSaveError`.""" + disk_full = _is_disk_full_error(error) + if disk_full: + message = ( + f"Insufficient disk space — unable to write ONNX model to {path}. " + "Free up disk space and try again." + ) + else: + message = f"Failed to write ONNX model to {path}: {error}" + raise ONNXSaveError( + message, path=path, disk_full=disk_full, errno_code=error.errno + ) from error + + def load_onnx( path: str | Path, *, @@ -127,20 +215,31 @@ def save_onnx( # path.parent is guaranteed to exist: mkdir() was called above. original_cwd = Path.cwd() try: - os.chdir(path.parent) - onnx.save_model( - model, - path.name, - save_as_external_data=True, - all_tensors_to_one_file=True, - location=ext_location, - size_threshold=1024, - ) - finally: - os.chdir(original_cwd) + try: + os.chdir(path.parent) + onnx.save_model( + model, + path.name, + save_as_external_data=True, + all_tensors_to_one_file=True, + location=ext_location, + size_threshold=1024, + ) + finally: + os.chdir(original_cwd) + except OSError as e: + # A failed external-data write can leave a truncated .onnx and/or + # .data sidecar behind; remove them so a later stage never loads a + # corrupt model and reports a misleading error. + _cleanup_partial_save(path, ext_path) + _raise_save_error(e, path) else: logger.debug("Saving ONNX model inline to %s", path) - onnx.save_model(model, str(path)) + try: + onnx.save_model(model, str(path)) + except OSError as e: + _cleanup_partial_save(path) + _raise_save_error(e, path) def cleanup_onnx(path: str | Path) -> list[Path]: diff --git a/src/winml/modelkit/quant/quantizer.py b/src/winml/modelkit/quant/quantizer.py index d7d143a4b..48808f293 100644 --- a/src/winml/modelkit/quant/quantizer.py +++ b/src/winml/modelkit/quant/quantizer.py @@ -219,6 +219,57 @@ def _merge_results(base: QuantizeResult, new: QuantizeResult) -> QuantizeResult: ) +def _check_input_model_opset(model_path: Path) -> str | None: + """Return a clear error message if *model_path* is empty/corrupt, else None. + + Mirrors ORT's ``get_opset_version`` requirement: a usable model must declare + a default (``""`` / ``ai.onnx``) opset import. A zero-byte or truncated file + parses into an (almost) empty ModelProto with no such opset import — the + signature of a previous stage that failed to finish writing (most commonly + because it ran out of disk space). Detecting it here lets us surface the + real cause instead of ORT's opaque "Failed to find proper ai.onnx domain". + + A zero-byte file (the most common disk-full artefact) is caught up front + with a cheap ``stat`` so the healthy success path never pays for a full + proto parse. The full parse via ``onnx.load_model`` (graph only — no + external weights, so it never trips over a missing ``.data`` sidecar) is the + fallback for the rarer truncated-but-nonzero case. + """ + from onnx import load_model + + # Fast path: a zero-byte output is the most common disk-full artefact. + try: + if model_path.stat().st_size == 0: + return ( + f"Input ONNX model is empty (zero bytes): {model_path}. " + "A previous build stage may have run out of disk space. " + "Free up disk space and rebuild." + ) + except OSError: + # stat() failing is unexpected (existence was already checked); fall + # through to the full parse, which surfaces a clear error either way. + pass + + try: + model = load_model(str(model_path), load_external_data=False) + except Exception as e: + return ( + f"Input ONNX model could not be parsed: {model_path} ({e}). " + "The file may be truncated or corrupt — for example, a previous " + "build stage may have run out of disk space. Free up disk space " + "and rebuild." + ) + + has_default_opset = any(opset.domain in ("", "ai.onnx") for opset in model.opset_import) + if not has_default_opset: + return ( + f"Input ONNX model is empty or corrupt (no ai.onnx opset import): " + f"{model_path}. It may have been truncated by a previous failed " + "write (e.g. insufficient disk space). Free up disk space and rebuild." + ) + return None + + def quantize_onnx( model_path: str | Path, output_path: str | Path | None = None, @@ -257,6 +308,25 @@ def quantize_onnx( output_path = model_path.parent / f"{model_path.stem}_quantized.onnx" use_external_data: bool = kwargs.pop("use_external_data", True) + if kwargs: + raise TypeError(f"quantize_onnx() got unexpected keyword arguments: {sorted(kwargs)}") + + # Guard against an empty/corrupt input model before building the pipeline. + # A previous stage that ran out of disk space can leave a truncated/zero-byte + # .onnx behind; without this check a pass fails deep inside ORT with the + # opaque "Failed to find proper ai.onnx domain". Surface the real cause + # instead, and catch it before the model-type finalizer reads the model. A + # missing file is left to Quantizer.run(), which reports a clear + # "Model not found". + if model_path.exists(): + opset_error = _check_input_model_opset(model_path) + if opset_error is not None: + return QuantizeResult( + success=False, + output_path=None, + errors=[opset_error], + ) + # Apply model-type-specific quant finalizer if registered. Some model types # finalize calibration reader / nodes-to-exclude / dtypes only once the # exported ONNX exists. @@ -267,7 +337,5 @@ def quantize_onnx( if finalizer is not None: config = finalizer.finalize(config, onnx_path=model_path, model_id=config.model_id) - if kwargs: - raise TypeError(f"quantize_onnx() got unexpected keyword arguments: {sorted(kwargs)}") passes = expand_precision(config=config) return Quantizer(passes).run(model_path, output_path, use_external_data=use_external_data) diff --git a/tests/unit/commands/test_compile_quantize_flags.py b/tests/unit/commands/test_compile_quantize_flags.py index 6bd873e3a..ac2af1215 100644 --- a/tests/unit/commands/test_compile_quantize_flags.py +++ b/tests/unit/commands/test_compile_quantize_flags.py @@ -575,6 +575,36 @@ def fake_quantize(*_args, **_kwargs): assert ran["called"] is False +class TestQuantizeMultiPrecisionDiskFull: + """The multi-precision pipeline drives Quantizer directly (bypassing + quantize_onnx), so it must apply the same disk-full/corruption guard: + a truncated/empty input must surface a clear error instead of ORT's opaque + "Failed to find proper ai.onnx domain" — parity with the single-precision + path, which routes through quantize_onnx. + """ + + @staticmethod + def _invoke(args): + from click.testing import CliRunner + + from winml.modelkit.commands.quantize import quantize as quantize_cmd + + return CliRunner().invoke(quantize_cmd, args, obj={}, catch_exceptions=False) + + def test_empty_input_model_surfaces_clear_error(self, tmp_path): + model = tmp_path / "truncated.onnx" + model.write_bytes(b"") # zero-byte artifact left by a disk-full write + + # Two precisions -> len(precision) > 1 -> _run_multi_precision path. + r = self._invoke(["-m", str(model), "-p", "int4", "-p", "fp16"]) + + assert r.exit_code != 0, r.output + # Collapse rich console wrapping before substring checks. + normalized = " ".join(r.output.split()).lower() + assert "disk space" in normalized + assert "failed to find proper ai.onnx domain" not in normalized + + class TestOverwriteGuard: """The shared --overwrite/--no-overwrite guard on quantize (file) and compile (directory) outputs. Cross-checks the wiring of diff --git a/tests/unit/onnx/test_external_data.py b/tests/unit/onnx/test_external_data.py index 0d118f475..2acb81a82 100644 --- a/tests/unit/onnx/test_external_data.py +++ b/tests/unit/onnx/test_external_data.py @@ -6,12 +6,16 @@ from __future__ import annotations +import errno +import shutil from typing import TYPE_CHECKING import numpy as np import onnx +import pytest from onnx import TensorProto, external_data_helper, helper, numpy_helper +from winml.modelkit.onnx import ONNXSaveError from winml.modelkit.onnx.external_data import ( copy_onnx_model, get_external_data_files, @@ -258,3 +262,33 @@ def test_copy_overwrites_existing_dst_with_external_data(self, tmp_path: Path) - src_arr = numpy_helper.to_array(src_full.graph.initializer[0]) dst_arr = numpy_helper.to_array(dst_full.graph.initializer[0]) assert np.array_equal(src_arr, dst_arr) + + +class TestCopyOnnxModelDiskFull: + """copy_onnx_model surfaces a clear error and cleans up on a failed write.""" + + def test_copy_disk_full_raises_and_cleans_dst( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + src = tmp_path / "src.onnx" + dst = tmp_path / "out" / "dst.onnx" + onnx.save(_make_small_model(), str(src)) # valid, no external data + + def _failing_copy2(_s: object, d: object, *_a: object, **_k: object) -> None: + from pathlib import Path as _Path + + _Path(d).write_bytes(b"") # partial/truncated destination + raise OSError(errno.ENOSPC, "simulated write failure") + + monkeypatch.setattr(shutil, "copy2", _failing_copy2) + + with pytest.raises(ONNXSaveError) as exc_info: + copy_onnx_model(src, dst) + + err = exc_info.value + assert err.disk_full is True + assert isinstance(err, OSError) + assert err.errno == errno.ENOSPC # preserved for callers inspecting e.errno + assert "disk space" in str(err).lower() + # The truncated destination must not be left behind. + assert not dst.exists() diff --git a/tests/unit/onnx/test_persistence.py b/tests/unit/onnx/test_persistence.py index 189991d81..c5d771d01 100644 --- a/tests/unit/onnx/test_persistence.py +++ b/tests/unit/onnx/test_persistence.py @@ -11,6 +11,7 @@ from __future__ import annotations +import errno from typing import TYPE_CHECKING import numpy as np @@ -21,6 +22,7 @@ from winml.modelkit.onnx import EXTERNAL_DATA_THRESHOLD from winml.modelkit.onnx.persistence import ( + ONNXSaveError, cleanup_onnx, load_onnx, save_onnx, @@ -622,3 +624,83 @@ class TestConstants: def test_threshold_is_100mib(self) -> None: assert EXTERNAL_DATA_THRESHOLD == 100 * 1024 * 1024 + + +# --------------------------------------------------------------------------- +# Disk-full / failed-write handling (issue #259) +# --------------------------------------------------------------------------- + + +def _make_failing_save_model(errno_code: int): + """Build a fake ``onnx.save_model`` that simulates a failed disk write. + + Mirrors the OS behaviour: ``open(path, "wb")`` truncates/creates the target + first, then the write fails — leaving a partial artifact behind that the + real code must clean up. + """ + + def _fake_save_model(_proto: object, f: str, **kwargs: object) -> None: + from pathlib import Path as _Path + + _Path(f).write_bytes(b"") # 0-byte partial file, like a truncated write + location = kwargs.get("location") + if isinstance(location, str): + _Path(location).write_bytes(b"") + raise OSError(errno_code, "simulated write failure") + + return _fake_save_model + + +class TestSaveOnnxDiskFull: + """save_onnx surfaces a clear error and removes partial files on failure.""" + + def test_inline_disk_full_raises_clear_error( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + model = _make_tiny_model() + model_path = tmp_path / "out.onnx" + monkeypatch.setattr(onnx, "save_model", _make_failing_save_model(errno.ENOSPC)) + + with pytest.raises(ONNXSaveError) as exc_info: + save_onnx(model, model_path, use_external_data=False) + + err = exc_info.value + assert err.disk_full is True + assert isinstance(err, OSError) # backward-compatible with except OSError + assert err.errno == errno.ENOSPC # preserved for callers inspecting e.errno + assert "disk space" in str(err).lower() + # The truncated 0-byte file must not be left behind for a later stage. + assert not model_path.exists() + + def test_external_disk_full_raises_and_cleans_sidecar( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + model = _make_model_with_initializer() + model_path = tmp_path / "out.onnx" + sidecar = tmp_path / "out.onnx.data" + monkeypatch.setattr(onnx, "save_model", _make_failing_save_model(errno.ENOSPC)) + + with pytest.raises(ONNXSaveError) as exc_info: + save_onnx(model, model_path, threshold_size=0) # force external data + + assert exc_info.value.disk_full is True + assert exc_info.value.errno == errno.ENOSPC + assert "disk space" in str(exc_info.value).lower() + assert not model_path.exists() + assert not sidecar.exists() + + def test_non_enospc_oserror_raises_generic_error( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + model = _make_tiny_model() + model_path = tmp_path / "out.onnx" + monkeypatch.setattr(onnx, "save_model", _make_failing_save_model(errno.EACCES)) + + with pytest.raises(ONNXSaveError) as exc_info: + save_onnx(model, model_path, use_external_data=False) + + err = exc_info.value + assert err.disk_full is False + assert err.errno == errno.EACCES # non-disk-full errno is preserved too + assert "Failed to write ONNX model" in str(err) + assert not model_path.exists() diff --git a/tests/unit/test_quantizer.py b/tests/unit/test_quantizer.py index 03d5f54bd..7c0223b3f 100644 --- a/tests/unit/test_quantizer.py +++ b/tests/unit/test_quantizer.py @@ -30,6 +30,22 @@ def rewind(self) -> None: return None +def _write_minimal_onnx_model(path: Path) -> None: + """Write a tiny but valid ONNX model (with an ai.onnx opset) to *path*. + + The quantizer's input guard parses the file and requires a default opset + import, so tests that exercise the quantize flow need a real model on disk. + """ + import onnx + + x = onnx.helper.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [1, 3]) + y = onnx.helper.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [1, 3]) + node = onnx.helper.make_node("Relu", ["X"], ["Y"]) + graph = onnx.helper.make_graph([node], "g", [x], [y]) + model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 17)]) + onnx.save_model(model, str(path)) + + class _FakeOrtModule(ModuleType): quantization: ModuleType @@ -94,7 +110,7 @@ def test_quantize_onnx_removes_only_exact_external_data_sidecar( ) -> None: """Cleanup should remove only the exact .data sidecar for the output model.""" model_path = tmp_path / "model.onnx" - model_path.write_text("input") + _write_minimal_onnx_model(model_path) output_path = tmp_path / "quantized.onnx" exact_sidecar = tmp_path / f"{output_path.name}.data" extra_suffix_sidecar = tmp_path / f"{output_path.name}.data.bak" @@ -145,23 +161,94 @@ def fake_quantize(*, model_input, model_output: str, quant_config) -> None: assert extra_suffix_sidecar.exists() +def _write_opsetless_onnx_model(path: Path) -> None: + """Write a parseable ONNX model that declares no default (ai.onnx) opset.""" + import onnx + + x = onnx.helper.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [1, 3]) + y = onnx.helper.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [1, 3]) + node = onnx.helper.make_node("Relu", ["X"], ["Y"]) + graph = onnx.helper.make_graph([node], "g", [x], [y]) + # Only a custom-domain opset — no "" / "ai.onnx" import, which is the + # signature ORT's get_opset_version() rejects. + model = onnx.helper.make_model( + graph, opset_imports=[onnx.helper.make_opsetid("com.example", 1)] + ) + onnx.save_model(model, str(path)) + + +def test_quantize_empty_input_model_surfaces_clear_error(tmp_path: Path) -> None: + """A zero-byte input model yields a clear disk-space/corruption error. + + Regression for #259: a truncated optimize output must not surface as ORT's + opaque "Failed to find proper ai.onnx domain". + """ + model_path = tmp_path / "optimized.onnx" + model_path.write_bytes(b"") # truncated/zero-byte write left by disk-full + + result = quantize_onnx(model_path, output_path=tmp_path / "quantized.onnx") + + assert result.success is False + assert result.output_path is None + joined = " ".join(result.errors).lower() + assert "disk space" in joined + assert "failed to find proper ai.onnx domain" not in joined + + +def test_quantize_opsetless_input_model_surfaces_clear_error(tmp_path: Path) -> None: + """A model with no ai.onnx opset import yields a clear, specific error.""" + model_path = tmp_path / "optimized.onnx" + _write_opsetless_onnx_model(model_path) + + result = quantize_onnx(model_path, output_path=tmp_path / "quantized.onnx") + + assert result.success is False + assert result.output_path is None + joined = " ".join(result.errors) + assert "no ai.onnx opset import" in joined + assert "Failed to find proper ai.onnx domain" not in joined + + +def test_quantize_unparseable_input_model_surfaces_clear_error( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """An input model that fails to parse yields a clear error, not a traceback.""" + import onnx + + model_path = tmp_path / "optimized.onnx" + _write_minimal_onnx_model(model_path) + + def _raise_parse_error(*_args: Any, **_kwargs: Any) -> Any: + raise RuntimeError("protobuf parse error") + + monkeypatch.setattr(onnx, "load_model", _raise_parse_error) + + result = quantize_onnx(model_path, output_path=tmp_path / "quantized.onnx") + + assert result.success is False + assert result.output_path is None + joined = " ".join(result.errors) + assert "could not be parsed" in joined + assert "disk space" in joined.lower() + + def test_quantize_onnx_applies_model_type_finalizer( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: """A registered model_type finalizer is resolved + applied before dispatch. - The model-type-specific quant policy used to be dispatched at each call site - (CLI build, library build). It now lives behind a single seam in - quantize_onnx, keyed on ``config.model_type``: the finalizer is resolved from - the calibration registry and its returned config is what the mode handler - receives. + The model-type-specific quant policy lives behind a single seam in + quantize_onnx, keyed on ``config.model_type``: the finalizer is resolved + from the calibration registry and its returned config is what the Quantizer + pipeline (and every pass built from it) runs against. """ import winml.modelkit.quant.calibration as calibration_mod import winml.modelkit.quant.quantizer as quantizer_mod model_path = tmp_path / "model.onnx" - model_path.write_text("input") + _write_minimal_onnx_model(model_path) output_path = tmp_path / "quantized.onnx" finalized_config = WinMLQuantizationConfig( @@ -178,13 +265,19 @@ def finalize(self, config, *, onnx_path, model_id): # type: ignore[no-untyped-d monkeypatch.setattr(calibration_mod, "get_quant_finalizer", lambda model_type: _StubFinalizer()) - handler_calls: list[WinMLQuantizationConfig] = [] + # The finalized config is threaded into the passes that expand_precision + # builds; capture the Quantizer's pass list to confirm it carries the + # finalized config rather than the original. + captured_passes: list[Any] = [] + + class _FakeQuantizer: + def __init__(self, passes: list[Any]) -> None: + captured_passes.extend(passes) - def _fake_qdq(*, config, **_kwargs): # type: ignore[no-untyped-def] - handler_calls.append(config) - return SimpleNamespace(success=True, output_path=output_path, errors=[]) + def run(self, _model_path, _output_path, *, use_external_data=True): # type: ignore[no-untyped-def] + return SimpleNamespace(success=True, output_path=output_path, errors=[]) - monkeypatch.setattr(quantizer_mod, "_quantize_qdq", _fake_qdq) + monkeypatch.setattr(quantizer_mod, "Quantizer", _FakeQuantizer) result = quantize_onnx( model_path, @@ -200,8 +293,10 @@ def _fake_qdq(*, config, **_kwargs): # type: ignore[no-untyped-def] assert len(finalize_calls) == 1 assert finalize_calls[0]["onnx_path"] == model_path assert finalize_calls[0]["model_id"] == "some/model-id" - # The handler ran against the finalized config, not the original. - assert handler_calls == [finalized_config] + # The pipeline was built from the finalized config, not the original + # (every pass shares the single config object). + assert captured_passes + assert all(p.config is finalized_config for p in captured_passes) def test_quantize_onnx_skips_finalizer_when_calibration_data_provided( @@ -213,7 +308,7 @@ def test_quantize_onnx_skips_finalizer_when_calibration_data_provided( import winml.modelkit.quant.quantizer as quantizer_mod model_path = tmp_path / "model.onnx" - model_path.write_text("input") + _write_minimal_onnx_model(model_path) output_path = tmp_path / "quantized.onnx" def _boom(_model_type): # type: ignore[no-untyped-def] @@ -221,10 +316,14 @@ def _boom(_model_type): # type: ignore[no-untyped-def] monkeypatch.setattr(calibration_mod, "get_quant_finalizer", _boom) - def _fake_qdq(*, config, **_kwargs): # type: ignore[no-untyped-def] - return SimpleNamespace(success=True, output_path=output_path, errors=[]) + class _FakeQuantizer: + def __init__(self, passes: list[Any]) -> None: + pass + + def run(self, _model_path, _output_path, *, use_external_data=True): # type: ignore[no-untyped-def] + return SimpleNamespace(success=True, output_path=output_path, errors=[]) - monkeypatch.setattr(quantizer_mod, "_quantize_qdq", _fake_qdq) + monkeypatch.setattr(quantizer_mod, "Quantizer", _FakeQuantizer) result = quantize_onnx( model_path,