From 4fa58483cd7954b157101918a4e3930ccbe52b29 Mon Sep 17 00:00:00 2001
From: zhenchaoni <zhenni@microsoft.com>
Date: Mon, 29 Jun 2026 15:13:48 +0800
Subject: [PATCH 1/2] consistent exit code

---
 src/winml/modelkit/commands/analyze.py | 36 +++++++++++------------
 src/winml/modelkit/commands/perf.py    | 23 ++++++++-------
 src/winml/modelkit/commands/run.py     | 40 +++++++++-----------------
 src/winml/modelkit/commands/serve.py   | 17 ++++-------
 src/winml/modelkit/utils/cli.py        | 38 ++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 67 deletions(-)

diff --git a/src/winml/modelkit/commands/analyze.py b/src/winml/modelkit/commands/analyze.py
index ef7ae95bd..34cf017d5 100644
--- a/src/winml/modelkit/commands/analyze.py
+++ b/src/winml/modelkit/commands/analyze.py
@@ -17,7 +17,6 @@
 import logging
 import os
 import re
-import sys
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, cast
 
@@ -832,8 +831,7 @@ def analyze(
 
         # Validate model
         if not model.exists():
-            logger.error("ONNX model file not found: %s", model)
-            sys.exit(2)
+            raise click.UsageError(f"ONNX model file not found: {model}")
 
         from ..analyze.utils.ep_utils import (
             has_any_rule_data,
@@ -883,15 +881,14 @@ def analyze(
                             "Resolved absolute path(s) from %s: (none)",
                             WINMLCLI_RULES_DIR_FOR_DEBUG_ENV,
                         )
-                sys.exit(2)
+                raise click.UsageError("--debug rules directory not configured.")
 
         search_dirs = get_runtime_rules_search_dirs()
         if not has_any_rule_data():
             searched = ", ".join(str(p) for p in search_dirs) if search_dirs else "(none)"
-            logger.error("No runtime rule parquet files were found.")
             logger.error("Please reinstall winml-cli, or manually download rule parquet files.")
             logger.error("Searched directories: %s", searched)
-            sys.exit(2)
+            raise click.UsageError("No runtime rule parquet files were found.")
 
         # Resolve the EP/device selection. `all` keeps the full rule-data-backed
         # set (fan-out, unchanged). `auto` resolves to a single best target from
@@ -913,8 +910,7 @@ def analyze(
             try:
                 resolved_device, _ = resolve_device(device="auto", ep=ep_hint)
             except (ValueError, RuntimeError) as e:
-                logger.error("Could not auto-select a device: %s", e)
-                sys.exit(2)
+                raise click.UsageError(f"Could not auto-select a device: {e}") from e
             devices = [resolved_device]
         elif device is not None:
             devices = [device]
@@ -948,14 +944,12 @@ def analyze(
                 # of raising an unguarded IndexError on ``devices[0]``.
                 ref_device = devices[0] if devices else None
                 if not ref_device:
-                    logger.error("No device context available for EP auto-resolution.")
-                    sys.exit(2)
+                    raise click.UsageError("No device context available for EP auto-resolution.")
                 compatible_eps = resolve_eps(ref_device)
                 if not compatible_eps:
-                    logger.error(
-                        "No execution provider is available for device '%s'.", ref_device
+                    raise click.UsageError(
+                        f"No execution provider is available for device '{ref_device}'."
                     )
-                    sys.exit(2)
                 eps = [compatible_eps[0]]
             else:
                 # ep is a specific EP or alias
@@ -983,8 +977,7 @@ def analyze(
         local_pairs = set(_get_local_ep_device_pairs())
 
         if not execution_pairs:
-            logger.error("No EP/device combination matched the current selection.")
-            sys.exit(2)
+            raise click.UsageError("No EP/device combination matched the current selection.")
 
         logger.info("Analyzing model: %s", model)
         logger.info(
@@ -1435,16 +1428,19 @@ def on_node_result(pattern_runtime: PatternRuntime) -> None:
 
         # Exit code: 0 = fully supported, 1 = partial support
         overall_supported = all(run_result.is_fully_supported() for run_result in analysis_results)
-        sys.exit(0 if overall_supported else 1)
+        if not overall_supported:
+            raise cli_utils.PartialSupportError()
 
     except FileNotFoundError as e:
-        logger.error("File not found: %s", e)
-        sys.exit(2)
+        raise click.UsageError(f"File not found: {e}") from e
+    except (click.exceptions.Exit, click.ClickException):
+        # Exit/click exceptions are intentional control flow; re-raise so the
+        # catch-all below doesn't relabel them as "Analysis failed".
+        raise
     except Exception as e:
-        logger.error("Analysis failed: %s", e)
         if verbose:
             logger.exception("Full traceback:")
-        sys.exit(2)
+        raise click.UsageError(f"Analysis failed: {e}") from e
 
 
 __all__ = ["analyze"]
diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index d64dda258..e29346fe8 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -1807,9 +1807,10 @@ def perf(
             from ..optracing import is_qnn_profiling_available
 
             if not is_qnn_profiling_available():
-                console.print("[red]Error:[/red] Op-tracing requires onnxruntime-qnn")
-                console.print("Install with: [bold]pip install onnxruntime-qnn[/bold]")
-                raise SystemExit(1)
+                raise click.ClickException(
+                    "Op-tracing requires onnxruntime-qnn. "
+                    "Install with: pip install onnxruntime-qnn"
+                )
 
             from ..optracing import (
                 display_op_trace_report,
@@ -1826,20 +1827,18 @@ def perf(
                 if onnx_for_trace is None:
                     raise AttributeError("benchmark._model not initialized")
             except AttributeError:
-                console.print(
-                    "[red]Error:[/red] Could not determine ONNX model path for op-tracing"
-                )
-                raise SystemExit(1) from None
+                raise click.ClickException(
+                    "Could not determine ONNX model path for op-tracing"
+                ) from None
 
             output_dir = output.parent if output else Path()
 
             # Look up tracer via registry (EP-agnostic).
             tracer_cls = get_tracer("QNNExecutionProvider", op_tracing)
             if tracer_cls is None:
-                console.print(
-                    f"[red]Error:[/red] No tracer registered for QNN EP at level '{op_tracing}'"
+                raise click.ClickException(
+                    f"No tracer registered for QNN EP at level '{op_tracing}'"
                 )
-                raise SystemExit(1)
 
             profiler = tracer_cls(
                 onnx_for_trace,
@@ -1866,6 +1865,10 @@ def perf(
         # the convention used by Click for argument problems.
         raise click.UsageError(f"Model not found: {e}") from e
 
+    except click.ClickException:
+        # Click exceptions are already intentional control flow; re-raise so
+        # the catch-all below doesn't relabel them as "Benchmark failed".
+        raise
     except Exception as e:
         if verbose:
             logger.exception("Benchmark failed")
diff --git a/src/winml/modelkit/commands/run.py b/src/winml/modelkit/commands/run.py
index 193809b4e..8277e9d23 100644
--- a/src/winml/modelkit/commands/run.py
+++ b/src/winml/modelkit/commands/run.py
@@ -541,8 +541,7 @@ def run(
     pipeline_kwargs: dict[str, Any] = {}
     for p in params:
         if "=" not in p:
-            click.echo(f"Error: invalid --param format: '{p}'. Use KEY=VALUE.", err=True)
-            ctx.exit(2)
+            raise click.UsageError(f"invalid --param format: '{p}'. Use KEY=VALUE.")
         k, v = p.split("=", 1)
         pipeline_kwargs[k] = _parse_param_value(v)
 
@@ -550,8 +549,7 @@ def run(
     raw_inputs: dict[str, str] = {}
     for inp in input_args:
         if "=" not in inp:
-            click.echo(f"Error: invalid --input format: '{inp}'. Use NAME=VALUE.", err=True)
-            ctx.exit(2)
+            raise click.UsageError(f"invalid --input format: '{inp}'. Use NAME=VALUE.")
         k, v = inp.split("=", 1)
         raw_inputs[k] = v
 
@@ -560,17 +558,14 @@ def run(
     for fp in files:
         file_path = Path(fp)
         if not file_path.exists() or not file_path.is_file():
-            click.echo(f"Error: file not found: {fp}", err=True)
-            ctx.exit(2)
+            raise click.UsageError(f"file not found: {fp}")
         file_bytes_list.append(file_path.read_bytes())
 
     if len(file_bytes_list) > 1:
-        click.echo(
-            f"Error: --file accepts only one file (got {len(file_bytes_list)}). "
-            "Use --input for multiple file inputs (e.g. -I image_0=@a.jpg -I image_1=@b.jpg).",
-            err=True,
+        raise click.UsageError(
+            f"--file accepts only one file (got {len(file_bytes_list)}). "
+            "Use --input for multiple file inputs (e.g. -I image_0=@a.jpg -I image_1=@b.jpg)."
         )
-        ctx.exit(2)
 
     # Check if any input was provided
     has_inputs = bool(file_bytes_list) or text is not None or bool(raw_inputs)
@@ -613,8 +608,7 @@ def run(
         try:
             engine.load_schema_only(model, task=task, device=device, ep=ep)
         except (OSError, ValueError, RuntimeError) as exc:
-            click.echo(f"Error loading model: {exc}", err=True)
-            ctx.exit(3)
+            raise cli_utils.ModelLoadError(f"Error loading model: {exc}") from exc
         _print_schema(engine, output_format=output_format, output_path=output)
         return
 
@@ -632,8 +626,7 @@ def run(
                 allow_unsupported_nodes=allow_unsupported_nodes,
             )
     except (OSError, ValueError, RuntimeError) as exc:
-        click.echo(f"Error loading model: {exc}", err=True)
-        ctx.exit(3)
+        raise cli_utils.ModelLoadError(f"Error loading model: {exc}") from exc
 
     # No inputs: print hint and exit
     if not has_inputs:
@@ -645,33 +638,28 @@ def run(
     try:
         coerced_inputs = _coerce_inputs(raw_inputs, schema)
     except click.ClickException as exc:
-        click.echo(f"Error: {exc.format_message()}", err=True)
-        ctx.exit(2)
+        raise click.UsageError(exc.format_message()) from exc
 
     # Merge --file/--text shortcuts with --input
     try:
         inputs = _resolve_shortcuts(file_bytes_list, text, coerced_inputs, schema)
     except click.ClickException as exc:
-        click.echo(f"Error: {exc.format_message()}", err=True)
-        ctx.exit(2)
+        raise click.UsageError(exc.format_message()) from exc
 
     # Check input / -P collision (after shortcuts are resolved so that
     # --file and --text shortcut keys are included in the check)
     collision = set(inputs.keys()) & set(pipeline_kwargs.keys())
     if collision:
         key = sorted(collision)[0]
-        click.echo(
-            f"Error: '{key}' specified as both input and -P. "
-            f"Use --input for model inputs and -P for pipeline parameters.",
-            err=True,
+        raise click.UsageError(
+            f"'{key}' specified as both input and -P. "
+            "Use --input for model inputs and -P for pipeline parameters."
         )
-        ctx.exit(2)
 
     try:
         prediction = engine.predict(inputs=inputs, **pipeline_kwargs)
     except (ValueError, TypeError, RuntimeError, OSError) as exc:
-        click.echo(f"Error during inference: {exc}", err=True)
-        ctx.exit(4)
+        raise cli_utils.InferenceError(f"Error during inference: {exc}") from exc
 
     _print_result(prediction.model_dump(), output_format=output_format, output_path=output)
 
diff --git a/src/winml/modelkit/commands/serve.py b/src/winml/modelkit/commands/serve.py
index 1d45e85d2..1c3bd12a8 100644
--- a/src/winml/modelkit/commands/serve.py
+++ b/src/winml/modelkit/commands/serve.py
@@ -16,7 +16,6 @@
 from __future__ import annotations
 
 import logging
-import sys
 from typing import TYPE_CHECKING
 
 import click
@@ -117,12 +116,10 @@ def serve(
     """
     try:
         import uvicorn
-    except ImportError:
-        click.echo(
-            "Error: uvicorn is required. Install with: pip install uvicorn[standard]",
-            err=True,
-        )
-        sys.exit(1)
+    except ImportError as e:
+        raise click.ClickException(
+            "uvicorn is required. Install with: pip install uvicorn[standard]"
+        ) from e
 
     if ctx.obj and ctx.obj.get("debug"):
         logging.getLogger("modelkit").setLevel(logging.DEBUG)
@@ -135,8 +132,7 @@ def serve(
             from ..serve.cli_api import app
             from ..serve.cli_api import print_startup_banner as _banner0
         except ImportError as e:
-            click.echo(f"Error: Failed to load serving module: {e}", err=True)
-            sys.exit(1)
+            raise click.ClickException(f"Failed to load serving module: {e}") from e
         _banner0(host=host, port=port)
         uvicorn.run(app, host=host, port=port, reload=auto_reload, log_level="warning")
         return
@@ -148,8 +144,7 @@ def serve(
         from ..serve.app import create_app
         from ..serve.app import print_startup_banner as _banner1
     except ImportError as e:
-        click.echo(f"Error: Failed to load inference serving module: {e}", err=True)
-        sys.exit(1)
+        raise click.ClickException(f"Failed to load inference serving module: {e}") from e
 
     mode = "multi" if multi else "single"
     inference_app = create_app(
diff --git a/src/winml/modelkit/utils/cli.py b/src/winml/modelkit/utils/cli.py
index 8320b0f77..90ebea674 100644
--- a/src/winml/modelkit/utils/cli.py
+++ b/src/winml/modelkit/utils/cli.py
@@ -29,6 +29,44 @@
 OutputFormat: TypeAlias = Literal["text", "json", "table", "compact"]
 
 
+class ModelLoadError(click.ClickException):
+    """Exit code 3: model could not be loaded onto the device/EP.
+
+    Use for failures loading a model onto a device/EP, missing accelerators,
+    or session creation that fails for hardware reasons. The message is printed
+    verbatim to stderr (no ``Error:`` prefix) so callers control the wording.
+    """
+
+    exit_code = 3
+
+    def show(self, file: Any = None) -> None:
+        click.echo(self.format_message(), err=True)
+
+
+class InferenceError(click.ClickException):
+    """Exit code 4: inference/prediction failed at runtime.
+
+    Use for prediction failures after the model loaded successfully. The
+    message is printed verbatim to stderr (no ``Error:`` prefix).
+    """
+
+    exit_code = 4
+
+    def show(self, file: Any = None) -> None:
+        click.echo(self.format_message(), err=True)
+
+
+class PartialSupportError(click.exceptions.Exit):
+    """Exit code 1: a valid negative result, not an error.
+
+    Raised silently (no ``Error:`` prefix) so commands can signal an
+    actionable-but-non-fatal outcome (e.g. analyze: model not fully supported).
+    """
+
+    def __init__(self) -> None:
+        super().__init__(1)
+
+
 # Shared stderr console for security/diagnostic messages emitted from utils.
 # Mirrors the module-level ``console = Console()`` pattern used by individual
 # command modules, but targets stderr so messages survive ``-q/--quiet``.

From 8ff0e9ddaa58537d9e55a7b188cf50a1f7ce0a8a Mon Sep 17 00:00:00 2001
From: zhenchaoni <zhenni@microsoft.com>
Date: Mon, 29 Jun 2026 15:26:54 +0800
Subject: [PATCH 2/2] Fix lint

---
 src/winml/modelkit/commands/analyze.py |  2 +-
 src/winml/modelkit/commands/run.py     | 12 ++++++++++++
 src/winml/modelkit/utils/cli.py        |  2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/winml/modelkit/commands/analyze.py b/src/winml/modelkit/commands/analyze.py
index 4be994835..c174bcf65 100644
--- a/src/winml/modelkit/commands/analyze.py
+++ b/src/winml/modelkit/commands/analyze.py
@@ -1436,7 +1436,7 @@ def on_node_result(pattern_runtime: PatternRuntime) -> None:
         # Exit code: 0 = fully supported, 1 = partial support
         overall_supported = all(run_result.is_fully_supported() for run_result in analysis_results)
         if not overall_supported:
-            raise cli_utils.PartialSupportError()
+            raise cli_utils.PartialSupportError
 
     except FileNotFoundError as e:
         raise click.UsageError(f"File not found: {e}") from e
diff --git a/src/winml/modelkit/commands/run.py b/src/winml/modelkit/commands/run.py
index c0035ea81..7d2348cfb 100644
--- a/src/winml/modelkit/commands/run.py
+++ b/src/winml/modelkit/commands/run.py
@@ -519,6 +519,18 @@ def run(
     Uses embedded inference by default. Pass ``--connect`` to route
     through a running ``winml serve`` instance instead.
 
+    Exit Codes:
+
+        0: Success
+
+        1: General error
+
+        2: Usage error — invalid input or arguments
+
+        3: Model load failure
+
+        4: Inference failure
+
     Examples:
     \b
         # Image classification (shortcut)
diff --git a/src/winml/modelkit/utils/cli.py b/src/winml/modelkit/utils/cli.py
index 873e6dabe..5c4591a42 100644
--- a/src/winml/modelkit/utils/cli.py
+++ b/src/winml/modelkit/utils/cli.py
@@ -40,6 +40,7 @@ class ModelLoadError(click.ClickException):
     exit_code = 3
 
     def show(self, file: Any = None) -> None:
+        """Print the message verbatim to stderr (no ``Error:`` prefix)."""
         click.echo(self.format_message(), err=True)
 
 
@@ -53,6 +54,7 @@ class InferenceError(click.ClickException):
     exit_code = 4
 
     def show(self, file: Any = None) -> None:
+        """Print the message verbatim to stderr (no ``Error:`` prefix)."""
         click.echo(self.format_message(), err=True)