microsoft · zhenchaoni · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
@@ -17,7 +17,6 @@
 import logging
 import os
 import re
-import sys
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, cast
 
@@ -839,8 +838,7 @@ def analyze(
 
         # Validate model
         if not model.exists():
-            logger.error("ONNX model file not found: %s", model)
-            sys.exit(2)
+            raise click.UsageError(f"ONNX model file not found: {model}")
 
         from ..analyze.utils.ep_utils import (
             has_any_rule_data,
@@ -890,15 +888,14 @@ def analyze(
                             "Resolved absolute path(s) from %s: (none)",
                             WINMLCLI_RULES_DIR_FOR_DEBUG_ENV,
                         )
-                sys.exit(2)
+                raise click.UsageError("--debug rules directory not configured.")
 
         search_dirs = get_runtime_rules_search_dirs()
         if not has_any_rule_data():
             searched = ", ".join(str(p) for p in search_dirs) if search_dirs else "(none)"
-            logger.error("No runtime rule parquet files were found.")
             logger.error("Please reinstall winml-cli, or manually download rule parquet files.")
             logger.error("Searched directories: %s", searched)
-            sys.exit(2)
+            raise click.UsageError("No runtime rule parquet files were found.")
 
         # Resolve the EP/device selection. `all` keeps the full rule-data-backed
         # set (fan-out, unchanged). `auto` resolves to a single best target from
@@ -920,8 +917,7 @@ def analyze(
             try:
                 resolved_device, _ = resolve_device(device="auto", ep=ep_hint)
             except (ValueError, RuntimeError) as e:
-                logger.error("Could not auto-select a device: %s", e)
-                sys.exit(2)
+                raise click.UsageError(f"Could not auto-select a device: {e}") from e
             devices = [resolved_device]
         elif device is not None:
             devices = [device]
@@ -955,12 +951,12 @@ def analyze(
                 # of raising an unguarded IndexError on ``devices[0]``.
                 ref_device = devices[0] if devices else None
                 if not ref_device:
-                    logger.error("No device context available for EP auto-resolution.")
-                    sys.exit(2)
+                    raise click.UsageError("No device context available for EP auto-resolution.")
                 compatible_eps = resolve_eps(ref_device)
                 if not compatible_eps:
-                    logger.error("No execution provider is available for device '%s'.", ref_device)
-                    sys.exit(2)
+                    raise click.UsageError(
+                        f"No execution provider is available for device '{ref_device}'."
+                    )
                 eps = [compatible_eps[0]]
             else:
                 # ep is a specific EP or alias
@@ -988,8 +984,7 @@ def analyze(
         local_pairs = set(_get_local_ep_device_pairs())
 
         if not execution_pairs:
-            logger.error("No EP/device combination matched the current selection.")
-            sys.exit(2)
+            raise click.UsageError("No EP/device combination matched the current selection.")
 
         logger.info("Analyzing model: %s", model)
         logger.info(
@@ -1440,16 +1435,19 @@ def on_node_result(pattern_runtime: PatternRuntime) -> None:
 
         # Exit code: 0 = fully supported, 1 = partial support
         overall_supported = all(run_result.is_fully_supported() for run_result in analysis_results)
-        sys.exit(0 if overall_supported else 1)
+        if not overall_supported:
+            raise cli_utils.PartialSupportError
 
     except FileNotFoundError as e:
-        logger.error("File not found: %s", e)
-        sys.exit(2)
+        raise click.UsageError(f"File not found: {e}") from e
+    except (click.exceptions.Exit, click.ClickException):
+        # Exit/click exceptions are intentional control flow; re-raise so the
+        # catch-all below doesn't relabel them as "Analysis failed".
+        raise
     except Exception as e:
-        logger.error("Analysis failed: %s", e)
         if verbose:
             logger.exception("Full traceback:")
-        sys.exit(2)
+        raise click.UsageError(f"Analysis failed: {e}") from e
 
 
 __all__ = ["analyze"]
@@ -1812,9 +1812,10 @@ def perf(
             from ..optracing import is_qnn_profiling_available
 
             if not is_qnn_profiling_available():
-                console.print("[red]Error:[/red] Op-tracing requires onnxruntime-qnn")
-                console.print("Install with: [bold]pip install onnxruntime-qnn[/bold]")
-                raise SystemExit(1)
+                raise click.ClickException(
+                    "Op-tracing requires onnxruntime-qnn. "
+                    "Install with: pip install onnxruntime-qnn"
+                )
 
             from ..optracing import (
                 display_op_trace_report,
@@ -1831,20 +1832,18 @@ def perf(
                 if onnx_for_trace is None:
                     raise AttributeError("benchmark._model not initialized")
             except AttributeError:
-                console.print(
-                    "[red]Error:[/red] Could not determine ONNX model path for op-tracing"
-                )
-                raise SystemExit(1) from None
+                raise click.ClickException(
+                    "Could not determine ONNX model path for op-tracing"
+                ) from None
 
             output_dir = output.parent if output else Path()
 
             # Look up tracer via registry (EP-agnostic).
             tracer_cls = get_tracer("QNNExecutionProvider", op_tracing)
             if tracer_cls is None:
-                console.print(
-                    f"[red]Error:[/red] No tracer registered for QNN EP at level '{op_tracing}'"
+                raise click.ClickException(
+                    f"No tracer registered for QNN EP at level '{op_tracing}'"
                 )
-                raise SystemExit(1)
 
             profiler = tracer_cls(
                 onnx_for_trace,
@@ -1871,6 +1870,10 @@ def perf(
         # the convention used by Click for argument problems.
         raise click.UsageError(f"Model not found: {e}") from e
 
+    except click.ClickException:
+        # Click exceptions are already intentional control flow; re-raise so
+        # the catch-all below doesn't relabel them as "Benchmark failed".
+        raise
     except Exception as e:
         if verbose:
             logger.exception("Benchmark failed")

@@ -519,6 +519,18 @@ def run(
     Uses embedded inference by default. Pass ``--connect`` to route
     through a running ``winml serve`` instance instead.
 
+    Exit Codes:
+
+        0: Success
+
+        1: General error
+
+        2: Usage error — invalid input or arguments
+
+        3: Model load failure
+
+        4: Inference failure
+
     Examples:
     \b
         # Image classification (shortcut)
@@ -547,17 +559,15 @@ def run(
     pipeline_kwargs: dict[str, Any] = {}
     for p in params:
         if "=" not in p:
-            click.echo(f"Error: invalid --param format: '{p}'. Use KEY=VALUE.", err=True)
-            ctx.exit(2)
+            raise click.UsageError(f"invalid --param format: '{p}'. Use KEY=VALUE.")
         k, v = p.split("=", 1)
         pipeline_kwargs[k] = _parse_param_value(v)
 
     # Parse --input entries (raw strings, coerced after model load)
     raw_inputs: dict[str, str] = {}
     for inp in input_args:
         if "=" not in inp:
-            click.echo(f"Error: invalid --input format: '{inp}'. Use NAME=VALUE.", err=True)
-            ctx.exit(2)
+            raise click.UsageError(f"invalid --input format: '{inp}'. Use NAME=VALUE.")
         k, v = inp.split("=", 1)
         raw_inputs[k] = v
 
@@ -566,17 +576,14 @@ def run(
     for fp in files:
         file_path = Path(fp)
         if not file_path.exists() or not file_path.is_file():
-            click.echo(f"Error: file not found: {fp}", err=True)
-            ctx.exit(2)
+            raise click.UsageError(f"file not found: {fp}")
         file_bytes_list.append(file_path.read_bytes())
 
     if len(file_bytes_list) > 1:
-        click.echo(
-            f"Error: --file accepts only one file (got {len(file_bytes_list)}). "
-            "Use --input for multiple file inputs (e.g. -I image_0=@a.jpg -I image_1=@b.jpg).",
-            err=True,
+        raise click.UsageError(
+            f"--file accepts only one file (got {len(file_bytes_list)}). "
+            "Use --input for multiple file inputs (e.g. -I image_0=@a.jpg -I image_1=@b.jpg)."
         )
-        ctx.exit(2)
 
     # Check if any input was provided
     has_inputs = bool(file_bytes_list) or text is not None or bool(raw_inputs)
@@ -619,8 +626,7 @@ def run(
         try:
             engine.load_schema_only(model, task=task, device=device, ep=ep)
         except (OSError, ValueError, RuntimeError) as exc:
-            click.echo(f"Error loading model: {exc}", err=True)
-            ctx.exit(3)
+            raise cli_utils.ModelLoadError(f"Error loading model: {exc}") from exc
         _print_schema(engine, output_format=output_format, output_path=output)
         return
 
@@ -638,8 +644,7 @@ def run(
                 allow_unsupported_nodes=allow_unsupported_nodes,
             )
     except (OSError, ValueError, RuntimeError) as exc:
-        click.echo(f"Error loading model: {exc}", err=True)
-        ctx.exit(3)
+        raise cli_utils.ModelLoadError(f"Error loading model: {exc}") from exc
 
     # No inputs: print hint and exit
     if not has_inputs:
@@ -651,33 +656,28 @@ def run(
     try:
         coerced_inputs = _coerce_inputs(raw_inputs, schema)
     except click.ClickException as exc:
-        click.echo(f"Error: {exc.format_message()}", err=True)
-        ctx.exit(2)
+        raise click.UsageError(exc.format_message()) from exc
 
     # Merge --file/--text shortcuts with --input
     try:
         inputs = _resolve_shortcuts(file_bytes_list, text, coerced_inputs, schema)
     except click.ClickException as exc:
-        click.echo(f"Error: {exc.format_message()}", err=True)
-        ctx.exit(2)
+        raise click.UsageError(exc.format_message()) from exc
 
     # Check input / -P collision (after shortcuts are resolved so that
     # --file and --text shortcut keys are included in the check)
     collision = set(inputs.keys()) & set(pipeline_kwargs.keys())
     if collision:
         key = sorted(collision)[0]
-        click.echo(
-            f"Error: '{key}' specified as both input and -P. "
-            f"Use --input for model inputs and -P for pipeline parameters.",
-            err=True,
+        raise click.UsageError(
+            f"'{key}' specified as both input and -P. "
+            "Use --input for model inputs and -P for pipeline parameters."
         )
-        ctx.exit(2)
 
     try:
         prediction = engine.predict(inputs=inputs, **pipeline_kwargs)
     except (ValueError, TypeError, RuntimeError, OSError) as exc:
-        click.echo(f"Error during inference: {exc}", err=True)
-        ctx.exit(4)
+        raise cli_utils.InferenceError(f"Error during inference: {exc}") from exc
 
     _print_result(prediction.model_dump(), output_format=output_format, output_path=output)
 

@@ -16,7 +16,6 @@
 from __future__ import annotations
 
 import logging
-import sys
 from typing import TYPE_CHECKING
 
 import click
@@ -117,12 +116,10 @@ def serve(
     """
     try:
         import uvicorn
-    except ImportError:
-        click.echo(
-            "Error: uvicorn is required. Install with: pip install uvicorn[standard]",
-            err=True,
-        )
-        sys.exit(1)
+    except ImportError as e:
+        raise click.ClickException(
+            "uvicorn is required. Install with: pip install uvicorn[standard]"
+        ) from e
 
     if ctx.obj and ctx.obj.get("debug"):
         logging.getLogger("modelkit").setLevel(logging.DEBUG)
@@ -135,8 +132,7 @@ def serve(
             from ..serve.cli_api import app
             from ..serve.cli_api import print_startup_banner as _banner0
         except ImportError as e:
-            click.echo(f"Error: Failed to load serving module: {e}", err=True)
-            sys.exit(1)
+            raise click.ClickException(f"Failed to load serving module: {e}") from e
         _banner0(host=host, port=port)
         uvicorn.run(app, host=host, port=port, reload=auto_reload, log_level="warning")
         return
@@ -148,8 +144,7 @@ def serve(
         from ..serve.app import create_app
         from ..serve.app import print_startup_banner as _banner1
     except ImportError as e:
-        click.echo(f"Error: Failed to load inference serving module: {e}", err=True)
-        sys.exit(1)
+        raise click.ClickException(f"Failed to load inference serving module: {e}") from e
 
     mode = "multi" if multi else "single"
     inference_app = create_app(

@@ -29,6 +29,46 @@
 OutputFormat: TypeAlias = Literal["text", "json", "table", "compact"]
 
 
+class ModelLoadError(click.ClickException):
+    """Exit code 3: model could not be loaded onto the device/EP.
+
+    Use for failures loading a model onto a device/EP, missing accelerators,
+    or session creation that fails for hardware reasons. The message is printed
+    verbatim to stderr (no ``Error:`` prefix) so callers control the wording.
+    """
+
+    exit_code = 3
+
+    def show(self, file: Any = None) -> None:
+        """Print the message verbatim to stderr (no ``Error:`` prefix)."""
+        click.echo(self.format_message(), err=True)
+
+
+class InferenceError(click.ClickException):
+    """Exit code 4: inference/prediction failed at runtime.
+
+    Use for prediction failures after the model loaded successfully. The
+    message is printed verbatim to stderr (no ``Error:`` prefix).
+    """
+
+    exit_code = 4
+
+    def show(self, file: Any = None) -> None:
+        """Print the message verbatim to stderr (no ``Error:`` prefix)."""
+        click.echo(self.format_message(), err=True)
+
+
+class PartialSupportError(click.exceptions.Exit):
+    """Exit code 1: a valid negative result, not an error.
+
+    Raised silently (no ``Error:`` prefix) so commands can signal an
+    actionable-but-non-fatal outcome (e.g. analyze: model not fully supported).
+    """
+
+    def __init__(self) -> None:
+        super().__init__(1)
+
+
 # Shared stderr console for security/diagnostic messages emitted from utils.
 # Mirrors the module-level ``console = Console()`` pattern used by individual
 # command modules, but targets stderr so messages survive ``-q/--quiet``.