From 606dff6b92e9c2e1c031b9c0b96125cf7ff885ed Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Tue, 30 Jun 2026 10:58:24 +0800
Subject: [PATCH 1/3] use is_profiling_available

---
 src/winml/modelkit/commands/perf.py      | 24 ++++++++++---
 src/winml/modelkit/optracing/__init__.py | 45 +++++++++++++++++++-----
 tests/unit/optracing/test_detection.py   | 42 +++++++++++++++++++---
 3 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index 0f83c871c..187f55b0e 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -386,6 +386,16 @@ def _resolve_device_ep(self) -> None:
         self._resolved_device = resolved_device
         self._resolved_ep = resolved_ep
 
+    @property
+    def resolved_device(self) -> str | None:
+        """Concrete device driving the build/inference (``None`` until resolved)."""
+        return self._resolved_device
+
+    @property
+    def resolved_ep(self) -> EPNameOrAlias | None:
+        """Concrete EP driving the build/inference (``None`` until resolved)."""
+        return self._resolved_ep
+
     @property
     def _is_composite(self) -> bool:
         """Composite models orchestrate multiple sub-sessions (e.g. CLIP/SigLIP).
@@ -1810,11 +1820,17 @@ def perf(
         # Op-tracing (additive to existing benchmark)
         # =================================================================
         if op_tracing:
-            from ..optracing import is_qnn_profiling_available
+            from ..optracing import is_profiling_available
 
-            if not is_qnn_profiling_available():
-                console.print("[red]Error:[/red] Op-tracing requires onnxruntime-qnn")
-                console.print("Install with: [bold]pip install onnxruntime-qnn[/bold]")
+            if not is_profiling_available(
+                benchmark.resolved_ep, benchmark.resolved_device, op_tracing
+            ):
+                console.print(
+                    "[red]Error:[/red] Op-tracing is only supported for the QNN EP "
+                    "on NPU at the 'basic' level "
+                    f"(resolved EP={benchmark.resolved_ep}, "
+                    f"device={benchmark.resolved_device}, level={op_tracing})."
+                )
                 raise SystemExit(1)
 
             from ..optracing import (
diff --git a/src/winml/modelkit/optracing/__init__.py b/src/winml/modelkit/optracing/__init__.py
index 7e463d177..eab7e93ba 100644
--- a/src/winml/modelkit/optracing/__init__.py
+++ b/src/winml/modelkit/optracing/__init__.py
@@ -6,20 +6,49 @@
 
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 from .base import OpTracer
 from .registry import get_tracer, register_tracer
 from .report import display_op_trace_report, write_op_trace_json
 from .result import OperatorMetrics, OpTraceResult
 
 
-def is_qnn_profiling_available() -> bool:
-    """Check if QNN EP is available for op-tracing."""
-    try:
-        import onnxruntime as ort
+if TYPE_CHECKING:
+    from ..utils.constants import EPNameOrAlias
+
+# The single EP / device / tracing-level combination op-tracing currently
+# supports. Expanded as more tracers land.
+_SUPPORTED_EP = "QNNExecutionProvider"
+_SUPPORTED_DEVICE = "npu"
+_SUPPORTED_LEVEL = "basic"
+
+
+def is_profiling_available(
+    resolved_ep: EPNameOrAlias | None,
+    resolved_device: str | None,
+    op_tracing: str | None,
+) -> bool:
+    """Check whether op-tracing is supported for a resolved EP/device/level.
+
+    Op-tracing is currently limited to the QNN EP on NPU at the ``"basic"``
+    level; every other combination is unsupported.
+
+    Args:
+        resolved_ep: Concrete EP the benchmark resolved to (full name or alias).
+        resolved_device: Concrete device the benchmark resolved to (e.g. ``"npu"``).
+        op_tracing: Requested tracing level (e.g. ``"basic"``), or ``None``.
+
+    Returns:
+        ``True`` only for the QNN + NPU + ``"basic"`` combination.
+    """
+    from ..utils.constants import normalize_ep_name
 
-        return "QNNExecutionProvider" in ort.get_available_providers()
-    except (ImportError, AttributeError):
-        return False
+    return (
+        normalize_ep_name(resolved_ep) == _SUPPORTED_EP
+        and (resolved_device or "").lower() == _SUPPORTED_DEVICE
+        and (op_tracing or "").lower() == _SUPPORTED_LEVEL
+    )
 
 
 __all__ = [
@@ -28,7 +57,7 @@ def is_qnn_profiling_available() -> bool:
     "OperatorMetrics",
     "display_op_trace_report",
     "get_tracer",
-    "is_qnn_profiling_available",
+    "is_profiling_available",
     "register_tracer",
     "write_op_trace_json",
 ]
diff --git a/tests/unit/optracing/test_detection.py b/tests/unit/optracing/test_detection.py
index 5670d1105..62b599d45 100644
--- a/tests/unit/optracing/test_detection.py
+++ b/tests/unit/optracing/test_detection.py
@@ -2,11 +2,45 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""Test QNN EP detection for op-tracing."""
+"""Test op-tracing support detection for the resolved EP/device/level."""
 
-from winml.modelkit.optracing import is_qnn_profiling_available
+import pytest
 
+from winml.modelkit.optracing import is_profiling_available
 
-def test_is_qnn_profiling_available_returns_bool():
-    result = is_qnn_profiling_available()
+
+def test_is_profiling_available_returns_bool():
+    result = is_profiling_available("QNNExecutionProvider", "npu", "basic")
     assert isinstance(result, bool)
+
+
+def test_supported_combination():
+    assert is_profiling_available("QNNExecutionProvider", "npu", "basic") is True
+
+
+def test_qnn_alias_is_normalized():
+    # The benchmark may carry the user's EP alias verbatim; it must still match.
+    assert is_profiling_available("qnn", "npu", "basic") is True
+
+
+def test_device_is_case_insensitive():
+    assert is_profiling_available("QNNExecutionProvider", "NPU", "basic") is True
+
+
+def test_level_is_case_insensitive():
+    assert is_profiling_available("QNNExecutionProvider", "npu", "BASIC") is True
+
+
+@pytest.mark.parametrize(
+    ("ep", "device", "level"),
+    [
+        ("CPUExecutionProvider", "npu", "basic"),  # wrong EP
+        ("QNNExecutionProvider", "gpu", "basic"),  # wrong device
+        ("QNNExecutionProvider", "npu", "detail"),  # unsupported level
+        (None, "npu", "basic"),  # no EP
+        ("QNNExecutionProvider", None, "basic"),  # no device
+        ("QNNExecutionProvider", "npu", None),  # no level
+    ],
+)
+def test_unsupported_combinations(ep, device, level):
+    assert is_profiling_available(ep, device, level) is False

From 9655dc712da104de511bdf624fafd7eed5363d16 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Tue, 30 Jun 2026 11:17:59 +0800
Subject: [PATCH 2/3] bug fix

---
 src/winml/modelkit/commands/perf.py          |  7 ++-
 src/winml/modelkit/optracing/qnn/profiler.py | 49 ++++++++------------
 2 files changed, 22 insertions(+), 34 deletions(-)

diff --git a/src/winml/modelkit/commands/perf.py b/src/winml/modelkit/commands/perf.py
index 187f55b0e..e5b84baeb 100644
--- a/src/winml/modelkit/commands/perf.py
+++ b/src/winml/modelkit/commands/perf.py
@@ -1876,10 +1876,9 @@ def perf(
             # Display and save
             display_op_trace_report(trace_result, console)
 
-            model_slug = hf_model.replace("/", "_").replace("\\", "_")
-            if is_onnx:
-                model_slug = model_path.stem
-            trace_output = output_dir / f"{model_slug}_op_trace.json"
+            # Mirror the benchmark report path so the two files sit side by side:
+            # a/b.json -> a/b_op_trace.json.
+            trace_output = output.with_name(f"{output.stem}_op_trace{output.suffix}")
             write_op_trace_json(trace_result, trace_output)
             console.print(f"[green]Op-trace saved to:[/green] {trace_output}")
 
diff --git a/src/winml/modelkit/optracing/qnn/profiler.py b/src/winml/modelkit/optracing/qnn/profiler.py
index 0df9e22f2..2021c64ff 100644
--- a/src/winml/modelkit/optracing/qnn/profiler.py
+++ b/src/winml/modelkit/optracing/qnn/profiler.py
@@ -13,6 +13,7 @@
    (detail) post-processing.
 5. Return a structured ``OpTraceResult``.
 """
+
 from __future__ import annotations
 
 import contextlib
@@ -23,6 +24,7 @@
 
 import numpy as np
 
+from ...winml import add_ep_for_device
 from ..base import OpTracer
 from ..result import OperatorMetrics, OpTraceResult
 from .csv_parser import parse_qnn_profiling_csv
@@ -125,14 +127,16 @@ def run(self, iterations: int = 5, warmup: int = 2) -> OpTraceResult:
         csv_path = self.output_dir / "profiling_output.csv"
         options = self._build_session_options(ort)
         provider_options = self._build_provider_options(csv_path)
+        if not add_ep_for_device(
+            options, "QNNExecutionProvider", ort.OrtHardwareDeviceType.NPU, provider_options
+        ):
+            raise RuntimeError("Failed to add QNNExecutionProvider for NPU device.")
 
         # CWD must be output_dir so schematic.bin lands there.
         with _working_directory(self.output_dir):
             session = ort.InferenceSession(
                 str(self.onnx_path),
                 sess_options=options,
-                providers=["QNNExecutionProvider"],
-                provider_options=provider_options,
             )
 
             inputs = self._generate_inputs(session)
@@ -158,16 +162,12 @@ def run(self, iterations: int = 5, warmup: int = 2) -> OpTraceResult:
     def _build_session_options(self, ort_module: Any) -> Any:
         """Create ``ort.SessionOptions`` with profiling config entries."""
         options = ort_module.SessionOptions()
-        options.add_session_config_entry(
-            "session.disable_cpu_ep_fallback", "1"
-        )
+        options.add_session_config_entry("session.disable_cpu_ep_fallback", "1")
         options.add_session_config_entry("ep.context_enable", "1")
         options.add_session_config_entry("ep.context_embed_mode", "0")
         return options
 
-    def _build_provider_options(
-        self, csv_path: Path
-    ) -> list[dict[str, str]]:
+    def _build_provider_options(self, csv_path: Path) -> dict[str, str]:
         """Build QNN EP provider options dict.
 
         - ``basic`` mode uses ``profiling_level=detailed`` (per-op cycles).
@@ -175,16 +175,13 @@ def _build_provider_options(
         """
         profiling_level = "optrace" if self.level == "detail" else "detailed"
 
-        return [
-            {
-                "backend_path": "QnnHtp.dll",
-                "htp_performance_mode": "high_performance",
-                "htp_graph_finalization_optimization_mode": "3",
-                "enable_htp_fp16_precision": "1",
-                "profiling_level": profiling_level,
-                "profiling_file_path": str(csv_path),
-            }
-        ]
+        return {
+            "htp_performance_mode": "high_performance",
+            "htp_graph_finalization_optimization_mode": "3",
+            "enable_htp_fp16_precision": "1",
+            "profiling_level": profiling_level,
+            "profiling_file_path": str(csv_path),
+        }
 
     # ------------------------------------------------------------------
     # Input generation
@@ -204,9 +201,7 @@ def _generate_inputs(session: Any) -> dict[str, np.ndarray]:
     # Result collection
     # ------------------------------------------------------------------
 
-    def _collect_results(
-        self, csv_path: Path, iterations: int
-    ) -> OpTraceResult:
+    def _collect_results(self, csv_path: Path, iterations: int) -> OpTraceResult:
         """Parse profiling artifacts into an ``OpTraceResult``."""
         artifacts: dict[str, str] = {}
         qnn_log = Path(str(csv_path) + "_qnn.log")
@@ -260,15 +255,11 @@ def _try_qhas(
         import json as _json
 
         if schematic is None or not schematic.is_file():
-            logger.info(
-                "No schematic found; falling back to CSV for detail mode"
-            )
+            logger.info("No schematic found; falling back to CSV for detail mode")
             return None
 
         qhas_output = self.output_dir / "qhas_output.json"
-        result_path = run_qhas_viewer(
-            qnn_log, schematic, qhas_output, sdk_root=find_qnn_sdk()
-        )
+        result_path = run_qhas_viewer(qnn_log, schematic, qhas_output, sdk_root=find_qnn_sdk())
 
         if result_path is None or not result_path.is_file():
             logger.info("QHAS viewer did not produce output; falling back")
@@ -329,9 +320,7 @@ def _from_csv(
                 op_path=op["name"],
                 op_id=op["op_id"],
                 duration_us=op["cycles"] * cycle_to_us,
-                percent_of_total=(
-                    op["cycles"] / total_cycles * 100 if total_cycles > 0 else 0
-                ),
+                percent_of_total=(op["cycles"] / total_cycles * 100 if total_cycles > 0 else 0),
             )
             for op in parsed["operators"]
         ]

From 39ce99ae0feddb8cf1bdffd5c1aea7e9d5565773 Mon Sep 17 00:00:00 2001
From: hualxie <hualxie@microsoft.com>
Date: Tue, 30 Jun 2026 11:37:48 +0800
Subject: [PATCH 3/3] test(optracing): update QNN provider-options tests for
 dict return shape

---
 tests/unit/optracing/test_qnn_profiler.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/unit/optracing/test_qnn_profiler.py b/tests/unit/optracing/test_qnn_profiler.py
index 5cbfb3e70..bf371bcfe 100644
--- a/tests/unit/optracing/test_qnn_profiler.py
+++ b/tests/unit/optracing/test_qnn_profiler.py
@@ -52,13 +52,14 @@ def test_qnn_profiler_creates_session_options():
 
 
 def test_qnn_profiler_provider_options_basic():
-    """Verify provider options for basic mode (profiling_level=detailed)."""
+    """Verify provider options for basic mode (profiling_level=detailed).
+
+    The QNN backend/device is now selected by ``add_ep_for_device``, so the
+    options dict no longer carries ``backend_path``.
+    """
     profiler = QNNProfiler(Path("model.onnx"), output_dir=Path("out"), level="basic")
-    opts = profiler._build_provider_options(Path("out/profiling.csv"))
+    po = profiler._build_provider_options(Path("out/profiling.csv"))
 
-    assert len(opts) == 1
-    po = opts[0]
-    assert po["backend_path"] == "QnnHtp.dll"
     assert po["htp_performance_mode"] == "high_performance"
     assert po["htp_graph_finalization_optimization_mode"] == "3"
     assert po["enable_htp_fp16_precision"] == "1"
@@ -69,11 +70,9 @@ def test_qnn_profiler_provider_options_basic():
 def test_qnn_profiler_provider_options_detail():
     """Verify provider options for detail mode (profiling_level=optrace)."""
     profiler = QNNProfiler(Path("model.onnx"), output_dir=Path("out"), level="detail")
-    opts = profiler._build_provider_options(Path("out/profiling.csv"))
+    po = profiler._build_provider_options(Path("out/profiling.csv"))
 
-    po = opts[0]
     assert po["profiling_level"] == "optrace"
-    assert po["backend_path"] == "QnnHtp.dll"
 
 
 # =====================================================================
@@ -182,7 +181,7 @@ def write_csv_on_del():
         # Verify session creation was called correctly via builder methods.
         profiler._build_session_options(mock_ort)
         po = profiler._build_provider_options(output_dir / "profiling_output.csv")
-        assert po[0]["profiling_level"] == "detailed"
+        assert po["profiling_level"] == "detailed"
 
         # Now test the CSV parsing path directly.
         result = profiler._from_csv(