microsoft · timenick · Jun 30, 2026 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
@@ -128,6 +128,14 @@ Remove-Item -Recurse -Force "$env:USERPROFILE\.cache\winml"
 
 The next `winml build` will re-create the cache as needed. Use `--rebuild` to force a full rebuild without relying on cached intermediates.
 
+When a build runs out of disk space mid-write, `winml` now stops with a clear message instead of a misleading downstream error:
+
+```text
+ONNXSaveError: Insufficient disk space — unable to write ONNX model to <path>. Free up disk space and try again.
+```
+
+The partially written file is removed automatically, so a later stage never reads a truncated model. (Previously this surfaced much later as a confusing `ValueError: Failed to find proper ai.onnx domain` during quantization.) Free up space using the command above and re-run the build.
+
 ---
 
 ## General Tips

@@ -810,8 +810,14 @@ def _patch_device(cfg: WinMLBuildConfig) -> None:
 
         # Map common errors to actionable hints
         err_str = str(e)
+        err_lower = err_str.lower()
         hint = None
-        if "Quantization failed" in err_str:
+        if "disk space" in err_lower or "no space left" in err_lower:
+            hint = (
+                "Free up disk space (e.g. clear the HuggingFace cache or "
+                "~/.cache/winml) and rebuild."
+            )
+        elif "Quantization failed" in err_str:
             hint = "Try: --no-quant to skip quantization"
         elif "Compilation failed" in err_str:
             hint = "Try: --no-compile to skip compilation"

@@ -333,6 +333,7 @@ def _run_multi_precision(
     """Execute a multi-pass quantization pipeline from ordered precision strings."""
     from ..config.precision import extract_weight_bits
     from ..quant import Quantizer, WinMLQuantizationConfig, expand_precision
+    from ..quant.quantizer import _check_input_model_opset
 
     modes = [_cli_precision_to_mode(p) for p in precision]
     has_calibration_pass = any(m == "static" for m in modes)
@@ -387,6 +388,17 @@ def _run_multi_precision(
 
     try:
         console.print(f"\n[bold]Running pipeline: {label}...[/bold]")
+        # Mirror quantize_onnx's input guard: the multi-precision path drives the
+        # Quantizer pipeline directly (bypassing quantize_onnx), so surface a
+        # clear disk-full/corruption error here too instead of ORT's opaque
+        # "Failed to find proper ai.onnx domain" deep inside a pass. A missing
+        # file is left to Quantizer.run(), which reports "Model not found".
+        opset_error = _check_input_model_opset(model) if model.exists() else None
+        if opset_error is not None:
+            console.print("\n[bold red]Pipeline failed:[/bold red]")
+            console.print(f"  {opset_error}")
+            raise click.ClickException("Pipeline failed")
+
         result = Quantizer(passes).run(model, output)
 
         if result.success:

@@ -20,7 +20,7 @@
 from .external_data import copy_onnx_model, get_onnx_model_hash
 from .io import InputTensorSpec, OutputTensorSpec, generate_inputs_from_onnx, get_io_config
 from .metadata import capture_metadata, restore_metadata
-from .persistence import cleanup_onnx, load_onnx, save_onnx
+from .persistence import ONNXSaveError, cleanup_onnx, load_onnx, save_onnx
 from .shape import infer_onnx_shapes, infer_shapes
 from .utils import EXTERNAL_DATA_THRESHOLD, check_onnx_model, get_model_size
 
@@ -29,6 +29,7 @@
     "EXTERNAL_DATA_THRESHOLD",
     "InputTensorSpec",
     "ONNXDomain",
+    "ONNXSaveError",
     "OutputTensorSpec",
     "SupportedONNXType",
     "capture_metadata",

@@ -26,7 +26,7 @@
 import onnx
 from onnx import external_data_helper
 
-from .persistence import load_onnx, save_onnx
+from .persistence import _cleanup_partial_save, _raise_save_error, load_onnx, save_onnx
 
 
 logger = logging.getLogger(__name__)
@@ -219,23 +219,30 @@ def copy_onnx_model(
     dst.parent.mkdir(parents=True, exist_ok=True)
 
     try:
-        external_files = get_external_data_files(src)
-    except Exception:
-        # Not a valid ONNX file or can't parse — fall back to simple copy
-        shutil.copy2(src, dst)
-        return
-
-    if not external_files:
-        # No external data — simple copy
-        shutil.copy2(src, dst)
-        return
-
-    if len(external_files) == 1:
-        # Single external data file — copy .data + patch .onnx
-        _copy_single_external(src, dst, external_files[0])
-    else:
-        # Multiple files — consolidate into one
-        _copy_consolidate(src, dst)
+        try:
+            external_files = get_external_data_files(src)
+        except Exception:
+            # Not a valid ONNX file or can't parse — fall back to simple copy
+            shutil.copy2(src, dst)
+            return
+
+        if not external_files:
+            # No external data — simple copy
+            shutil.copy2(src, dst)
+            return
+
+        if len(external_files) == 1:
+            # Single external data file — copy .data + patch .onnx
+            _copy_single_external(src, dst, external_files[0])
+        else:
+            # Multiple files — consolidate into one
+            _copy_consolidate(src, dst)
+    except OSError as e:
+        # A failed copy (commonly disk-full) can leave a truncated destination
+        # and/or .data sidecar behind. Remove them and surface a clear error
+        # instead of letting a later stage load the corrupt model.
+        _cleanup_partial_save(dst, dst.parent / f"{dst.name}.data")
+        _raise_save_error(e, dst)
 
     logger.debug(
         "Copied ONNX model with external data: %s -> %s (%d data files)",

@@ -12,9 +12,11 @@
 
 from __future__ import annotations
 
+import errno
 import logging
 import os
 from pathlib import Path
+from typing import NoReturn
 
 import onnx
 from onnx.external_data_helper import _get_all_tensors, uses_external_data
@@ -25,6 +27,92 @@
 logger = logging.getLogger(__name__)
 
 
+# Windows ERROR_DISK_FULL. Python usually maps this to errno.ENOSPC via the CRT,
+# but we check the raw winerror too so a disk-full write is always recognised.
+_WINDOWS_ERROR_DISK_FULL = 112
+
+
+class ONNXSaveError(OSError):
+    """Raised when an ONNX model cannot be written to disk.
+
+    Subclasses :class:`OSError` so existing ``except OSError`` handlers keep
+    working and the original ``errno`` is preserved (see ``errno_code``), while
+    surfacing a clear, actionable message. This matters most for disk-full
+    conditions: without it, a failed write leaves a truncated/zero-byte
+    ``.onnx`` behind and the real cause only shows up much later as an opaque
+    opset-parsing error in a downstream stage.
+
+    Note:
+        ``OSError.__init__`` only populates ``errno`` from a 2-argument
+        ``(errno, strerror)`` call, which would also rewrite ``str(self)`` as
+        ``"[Errno N] <message>"``. To keep the clean message *and* preserve
+        ``errno`` for ``except OSError`` callers that inspect ``e.errno``, we
+        construct with the single message and set ``errno`` explicitly.
+
+    Attributes:
+        path: Destination path that could not be written.
+        disk_full: ``True`` when the failure was caused by insufficient disk
+            space (``errno.ENOSPC`` / Windows ``ERROR_DISK_FULL``).
+        errno: The originating OS error code, when known (inherited from
+            :class:`OSError`).
+    """
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        path: str | Path | None = None,
+        disk_full: bool = False,
+        errno_code: int | None = None,
+    ) -> None:
+        super().__init__(message)
+        # super().__init__(message) leaves self.errno = None; set it explicitly
+        # so callers catching this as OSError can still inspect e.errno.
+        if errno_code is not None:
+            self.errno = errno_code
+        self.path = path
+        self.disk_full = disk_full
+
+
+def _is_disk_full_error(error: OSError) -> bool:
+    """Return ``True`` when *error* represents an out-of-disk-space condition."""
+    return (
+        error.errno == errno.ENOSPC
+        or getattr(error, "winerror", None) == _WINDOWS_ERROR_DISK_FULL
+    )
+
+
+def _cleanup_partial_save(*paths: Path | None) -> None:
+    """Best-effort removal of partial artifacts left by a failed write.
+
+    A failed ``onnx.save_model`` / copy can leave a zero-byte or truncated
+    ``.onnx`` file (and ``.data`` sidecar) behind. Removing them prevents a
+    later stage from loading a corrupt model and reporting a misleading error.
+    """
+    for partial in paths:
+        if partial is None:
+            continue
+        try:
+            Path(partial).unlink(missing_ok=True)
+        except OSError:
+            logger.debug("Could not remove partial artifact: %s", partial, exc_info=True)
+
+
+def _raise_save_error(error: OSError, path: Path) -> NoReturn:
+    """Translate a write ``OSError`` into a clear :class:`ONNXSaveError`."""
+    disk_full = _is_disk_full_error(error)
+    if disk_full:
+        message = (
+            f"Insufficient disk space — unable to write ONNX model to {path}. "
+            "Free up disk space and try again."
+        )
+    else:
+        message = f"Failed to write ONNX model to {path}: {error}"
+    raise ONNXSaveError(
+        message, path=path, disk_full=disk_full, errno_code=error.errno
+    ) from error
+
+
 def load_onnx(
     path: str | Path,
     *,
@@ -127,20 +215,31 @@ def save_onnx(
         # path.parent is guaranteed to exist: mkdir() was called above.
         original_cwd = Path.cwd()
         try:
-            os.chdir(path.parent)
-            onnx.save_model(
-                model,
-                path.name,
-                save_as_external_data=True,
-                all_tensors_to_one_file=True,
-                location=ext_location,
-                size_threshold=1024,
-            )
-        finally:
-            os.chdir(original_cwd)
+            try:
+                os.chdir(path.parent)
+                onnx.save_model(
+                    model,
+                    path.name,
+                    save_as_external_data=True,
+                    all_tensors_to_one_file=True,
+                    location=ext_location,
+                    size_threshold=1024,
+                )
+            finally:
+                os.chdir(original_cwd)
+        except OSError as e:
+            # A failed external-data write can leave a truncated .onnx and/or
+            # .data sidecar behind; remove them so a later stage never loads a
+            # corrupt model and reports a misleading error.
+            _cleanup_partial_save(path, ext_path)
+            _raise_save_error(e, path)
     else:
         logger.debug("Saving ONNX model inline to %s", path)
-        onnx.save_model(model, str(path))
+        try:
+            onnx.save_model(model, str(path))
+        except OSError as e:
+            _cleanup_partial_save(path)
+            _raise_save_error(e, path)
 
 
 def cleanup_onnx(path: str | Path) -> list[Path]:

@@ -219,6 +219,57 @@ def _merge_results(base: QuantizeResult, new: QuantizeResult) -> QuantizeResult:
     )
 
 
+def _check_input_model_opset(model_path: Path) -> str | None:
+    """Return a clear error message if *model_path* is empty/corrupt, else None.
+
+    Mirrors ORT's ``get_opset_version`` requirement: a usable model must declare
+    a default (``""`` / ``ai.onnx``) opset import. A zero-byte or truncated file
+    parses into an (almost) empty ModelProto with no such opset import — the
+    signature of a previous stage that failed to finish writing (most commonly
+    because it ran out of disk space). Detecting it here lets us surface the
+    real cause instead of ORT's opaque "Failed to find proper ai.onnx domain".
+
+    A zero-byte file (the most common disk-full artefact) is caught up front
+    with a cheap ``stat`` so the healthy success path never pays for a full
+    proto parse. The full parse via ``onnx.load_model`` (graph only — no
+    external weights, so it never trips over a missing ``.data`` sidecar) is the
+    fallback for the rarer truncated-but-nonzero case.
+    """
+    from onnx import load_model
+
+    # Fast path: a zero-byte output is the most common disk-full artefact.
+    try:
+        if model_path.stat().st_size == 0:
+            return (
+                f"Input ONNX model is empty (zero bytes): {model_path}. "
+                "A previous build stage may have run out of disk space. "
+                "Free up disk space and rebuild."
+            )
+    except OSError:
+        # stat() failing is unexpected (existence was already checked); fall
+        # through to the full parse, which surfaces a clear error either way.
+        pass
+
+    try:
+        model = load_model(str(model_path), load_external_data=False)
+    except Exception as e:
+        return (
+            f"Input ONNX model could not be parsed: {model_path} ({e}). "
+            "The file may be truncated or corrupt — for example, a previous "
+            "build stage may have run out of disk space. Free up disk space "
+            "and rebuild."
+        )
+
+    has_default_opset = any(opset.domain in ("", "ai.onnx") for opset in model.opset_import)
+    if not has_default_opset:
+        return (
+            f"Input ONNX model is empty or corrupt (no ai.onnx opset import): "
+            f"{model_path}. It may have been truncated by a previous failed "
+            "write (e.g. insufficient disk space). Free up disk space and rebuild."
+        )
+    return None
+
+
 def quantize_onnx(
     model_path: str | Path,
     output_path: str | Path | None = None,
@@ -257,6 +308,25 @@ def quantize_onnx(
         output_path = model_path.parent / f"{model_path.stem}_quantized.onnx"
 
     use_external_data: bool = kwargs.pop("use_external_data", True)
+    if kwargs:
+        raise TypeError(f"quantize_onnx() got unexpected keyword arguments: {sorted(kwargs)}")
+
+    # Guard against an empty/corrupt input model before building the pipeline.
+    # A previous stage that ran out of disk space can leave a truncated/zero-byte
+    # .onnx behind; without this check a pass fails deep inside ORT with the
+    # opaque "Failed to find proper ai.onnx domain". Surface the real cause
+    # instead, and catch it before the model-type finalizer reads the model. A
+    # missing file is left to Quantizer.run(), which reports a clear
+    # "Model not found".
+    if model_path.exists():
+        opset_error = _check_input_model_opset(model_path)
+        if opset_error is not None:
+            return QuantizeResult(
+                success=False,
+                output_path=None,
+                errors=[opset_error],
+            )
+
     # Apply model-type-specific quant finalizer if registered. Some model types
     # finalize calibration reader / nodes-to-exclude / dtypes only once the
     # exported ONNX exists.
@@ -267,7 +337,5 @@ def quantize_onnx(
         if finalizer is not None:
             config = finalizer.finalize(config, onnx_path=model_path, model_id=config.model_id)
 
-    if kwargs:
-        raise TypeError(f"quantize_onnx() got unexpected keyword arguments: {sorted(kwargs)}")
     passes = expand_precision(config=config)
     return Quantizer(passes).run(model_path, output_path, use_external_data=use_external_data)