From 99d1e492e90de6933460840d8bb6a145235b1715 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 30 Jun 2026 16:40:34 +0800 Subject: [PATCH 1/3] update infer_ihv_from_ep_name to use EPName only --- src/winml/modelkit/analyze/utils/ep_utils.py | 51 ++++++++------------ src/winml/modelkit/serve/app.py | 2 +- src/winml/modelkit/serve/schema.py | 6 ++- tests/unit/analyze/test_analyzer.py | 8 --- tests/unit/analyze/test_has_rule_data.py | 14 ++---- 5 files changed, 28 insertions(+), 53 deletions(-) diff --git a/src/winml/modelkit/analyze/utils/ep_utils.py b/src/winml/modelkit/analyze/utils/ep_utils.py index 5f29b2d99..e670cdad4 100644 --- a/src/winml/modelkit/analyze/utils/ep_utils.py +++ b/src/winml/modelkit/analyze/utils/ep_utils.py @@ -21,18 +21,18 @@ def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType: - """Infer IHVType from Execution Provider name. + """Infer IHVType from a canonical Execution Provider name. - Maps an execution provider name to its corresponding IHV type. - Supports multiple name variations for each provider. - Unknown EPs (e.g., CPUExecutionProvider, DmlExecutionProvider) resolve - to IHVType.MICROSOFT. + ``EPName`` is a closed set of canonical EP names, so this is a direct, + exact lookup. Any name without an explicit IHV owner (e.g. + ``CPUExecutionProvider``, ``DmlExecutionProvider``) resolves to + ``IHVType.MICROSOFT``. Args: - ep_name: Execution Provider name (e.g., QNNExecutionProvider, OpenVINOExecutionProvider) + ep_name: Canonical Execution Provider name (see ``utils.constants.EPName``). Returns: - IHVType: Inferred IHV type (QC, INTEL, AMD, NVIDIA, or MICROSOFT) + IHVType: Inferred IHV type (QC, INTEL, AMD, NVIDIA, or MICROSOFT). Examples: >>> infer_ihv_from_ep_name("QNNExecutionProvider") @@ -48,30 +48,19 @@ def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType: """ from ..models.ihv_type import IHVType - ep_lower = ep_name.lower() - - # QNN / Qualcomm - if "qnn" in ep_lower or "qualcomm" in ep_lower: - return IHVType.QC - - # OpenVINO / Intel - if "openvino" in ep_lower or "intel" in ep_lower: - return IHVType.INTEL - - # VitisAI / MIGraphX / AMD / ACE (AMD) - amd_keywords = ("amd", "quark", "vitis", "ace", "migraphx") - if any(kw in ep_lower for kw in amd_keywords): - return IHVType.AMD - - # NVIDIA / TensorRT RTX - # This is intentionally a permissive substring fallback to cover common - # TensorRT naming variants. Callers should prefer canonical EP names. - nvidia_keywords = ("nvidia", "nvtensorrt", "trtrtx", "tensorrt", "rtx") - if any(kw in ep_lower for kw in nvidia_keywords): - return IHVType.NVIDIA - - # Default: Microsoft (e.g., CPUExecutionProvider, DmlExecutionProvider) - return IHVType.MICROSOFT + ep_name_to_ihv: dict[EPName, IHVType] = { + "QNNExecutionProvider": IHVType.QC, + "OpenVINOExecutionProvider": IHVType.INTEL, + "VitisAIExecutionProvider": IHVType.AMD, + "MIGraphXExecutionProvider": IHVType.AMD, + "NvTensorRTRTXExecutionProvider": IHVType.NVIDIA, + "CUDAExecutionProvider": IHVType.NVIDIA, + "CPUExecutionProvider": IHVType.MICROSOFT, + "DmlExecutionProvider": IHVType.MICROSOFT, + } + + # EPName is a closed set; anything without an explicit owner is Microsoft. + return ep_name_to_ihv.get(ep_name, IHVType.MICROSOFT) def get_devices_with_rule_data(ep_name: EPName) -> list[str]: diff --git a/src/winml/modelkit/serve/app.py b/src/winml/modelkit/serve/app.py index b38f96f6b..a666c237f 100644 --- a/src/winml/modelkit/serve/app.py +++ b/src/winml/modelkit/serve/app.py @@ -490,7 +490,7 @@ async def get_mcp_schema() -> dict[str, Any]: # ------------------------------------------------------------------ @app.post("/v1/ep", tags=["management"], summary="Switch execution provider") async def switch_ep(request: EpSwitchRequest) -> dict[str, Any]: - # Pydantic already validates ep against the EPAlias Literal (rejects + # Pydantic already validates ep against the EPNameOrAlias Literal (rejects # unknown values with a 422 at parse time), so no extra check needed. ep = request.ep mgr = _get_mgr() diff --git a/src/winml/modelkit/serve/schema.py b/src/winml/modelkit/serve/schema.py index 09ff2232f..2f55bc968 100644 --- a/src/winml/modelkit/serve/schema.py +++ b/src/winml/modelkit/serve/schema.py @@ -14,7 +14,7 @@ from pydantic import BaseModel, Field -from ..utils.constants import EPAlias, EPNameOrAlias +from ..utils.constants import EPNameOrAlias # --------------------------------------------------------------------------- @@ -25,7 +25,9 @@ class EpSwitchRequest(BaseModel): """POST /v1/ep — switch execution provider.""" - ep: EPAlias = Field(..., description="EP short name: cpu, dml, qnn, openvino") + ep: EPNameOrAlias = Field( + ..., description="EP name or short alias (e.g. cpu, dml, qnn, QNNExecutionProvider)" + ) class PredictJsonRequest(BaseModel): diff --git a/tests/unit/analyze/test_analyzer.py b/tests/unit/analyze/test_analyzer.py index d4de6fd0a..e20fa1922 100644 --- a/tests/unit/analyze/test_analyzer.py +++ b/tests/unit/analyze/test_analyzer.py @@ -867,26 +867,18 @@ def test_init_custom_config(self) -> None: def test_map_ep_to_ihv_qnn(self) -> None: """Test EP to IHV mapping for QNN.""" assert infer_ihv_from_ep_name("QNNExecutionProvider") == IHVType.QC - assert infer_ihv_from_ep_name("qnnexecutionprovider") == IHVType.QC - assert infer_ihv_from_ep_name("QualcommProvider") == IHVType.QC def test_map_ep_to_ihv_openvino(self) -> None: """Test EP to IHV mapping for OpenVINO.""" assert infer_ihv_from_ep_name("OpenVINOExecutionProvider") == IHVType.INTEL - assert infer_ihv_from_ep_name("openvino") == IHVType.INTEL - assert infer_ihv_from_ep_name("IntelProvider") == IHVType.INTEL def test_map_ep_to_ihv_vitisai(self) -> None: """Test EP to IHV mapping for VitisAI.""" assert infer_ihv_from_ep_name("VitisAIExecutionProvider") == IHVType.AMD - assert infer_ihv_from_ep_name("vitis") == IHVType.AMD - assert infer_ihv_from_ep_name("AMDProvider") == IHVType.AMD def test_map_ep_to_ihv_nvidia(self) -> None: """Test EP to IHV mapping for NvTensorRTRTX.""" assert infer_ihv_from_ep_name("NvTensorRTRTXExecutionProvider") == IHVType.NVIDIA - assert infer_ihv_from_ep_name("nvtensorrtx") == IHVType.NVIDIA - assert infer_ihv_from_ep_name("TensorRTProvider") == IHVType.NVIDIA def test_map_ep_to_ihv_invalid(self) -> None: """Test EP to IHV mapping with unrecognized EP resolves to MICROSOFT.""" diff --git a/tests/unit/analyze/test_has_rule_data.py b/tests/unit/analyze/test_has_rule_data.py index 209e8fc9c..1b6647a1f 100644 --- a/tests/unit/analyze/test_has_rule_data.py +++ b/tests/unit/analyze/test_has_rule_data.py @@ -48,13 +48,11 @@ def test_migraphx_maps_to_amd(self) -> None: assert infer_ihv_from_ep_name("MIGraphXExecutionProvider") == IHVType.AMD - def test_case_insensitive(self) -> None: + def test_cuda_maps_to_nvidia(self) -> None: + """CUDAExecutionProvider is an NVIDIA EP — should map to IHVType.NVIDIA.""" from winml.modelkit.analyze.models.ihv_type import IHVType - assert infer_ihv_from_ep_name("qnnexecutionprovider") == IHVType.QC - assert infer_ihv_from_ep_name("OPENVINOEXECUTIONPROVIDER") == IHVType.INTEL - assert infer_ihv_from_ep_name("vitisaiexecutionprovider") == IHVType.AMD - assert infer_ihv_from_ep_name("nvtensorrtxexecutionprovider") == IHVType.NVIDIA + assert infer_ihv_from_ep_name("CUDAExecutionProvider") == IHVType.NVIDIA def test_unknown_ep_resolves_to_microsoft(self) -> None: from winml.modelkit.analyze.models.ihv_type import IHVType @@ -79,12 +77,6 @@ def test_nvidia_ep_maps_to_nvidia(self) -> None: assert infer_ihv_from_ep_name("NvTensorRTRTXExecutionProvider") == IHVType.NVIDIA - def test_trtrtx_ep_maps_to_nvidia(self) -> None: - """TrtRTXExecutionProvider should map to IHVType.NVIDIA.""" - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("TrtRTXExecutionProvider") == IHVType.NVIDIA - class TestHasRuleDataForEP: """Tests for has_rule_data_for_ep().""" From ddf9e7a92f50777c4685e619c1b71d51f9c12739 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 30 Jun 2026 17:08:42 +0800 Subject: [PATCH 2/3] fix(compiler): normalize EP name in needs_format_conversion Compare against the canonical EPName instead of a single alias literal (ep == "qnn"). An EP can have multiple aliases (e.g. nv_tensorrt_rtx / nvtensorrtrtx) and the canonical name itself would not match, so the alias-literal comparison was fragile. Normalize via normalize_ep_name first and widen the parameter to EPNameOrAlias. --- src/winml/modelkit/compiler/utils.py | 12 +++++++++--- tests/unit/compiler/test_utils.py | 7 +++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/winml/modelkit/compiler/utils.py b/src/winml/modelkit/compiler/utils.py index ea80ed480..c8867c8aa 100644 --- a/src/winml/modelkit/compiler/utils.py +++ b/src/winml/modelkit/compiler/utils.py @@ -12,27 +12,33 @@ if TYPE_CHECKING: from pathlib import Path - from ..utils.constants import EPAlias + from ..utils.constants import EPNameOrAlias # Canonical definition of ONNX QDQ operator types. # Import this constant instead of redefining {"QuantizeLinear", "DequantizeLinear"}. QDQ_OP_TYPES: frozenset[str] = frozenset({"QuantizeLinear", "DequantizeLinear"}) -def needs_format_conversion(model_path: Path, ep: EPAlias) -> bool: +def needs_format_conversion(model_path: Path, ep: EPNameOrAlias) -> bool: """Check if model's quant format is compatible with target EP. Minimal detection: checks for QLinear ops targeting QDQ-only EPs. FIXME: Expand to full EP-to-format compatibility matrix. """ from ..onnx import load_onnx + from ..utils.constants import normalize_ep_name model = load_onnx(model_path, load_weights=False, validate=False) op_types = {n.op_type for n in model.graph.node} has_qlinear = any(op.startswith("QLinear") for op in op_types) has_qdq = bool(op_types & QDQ_OP_TYPES) - if ep == "qnn" and has_qlinear and not has_qdq: # noqa: SIM103 + # Compare against the canonical EP name, not a single alias: one EP can have + # several aliases (e.g. nv_tensorrt_rtx / nvtensorrtrtx), so an alias-literal + # comparison would miss the others. + ep_canonical = normalize_ep_name(ep) + + if ep_canonical == "QNNExecutionProvider" and has_qlinear and not has_qdq: # noqa: SIM103 return True # QNN requires QDQ format # FIXME: add more EP rules as needed return False diff --git a/tests/unit/compiler/test_utils.py b/tests/unit/compiler/test_utils.py index 7fbe3d3a0..078cd559f 100644 --- a/tests/unit/compiler/test_utils.py +++ b/tests/unit/compiler/test_utils.py @@ -86,6 +86,13 @@ def test_qlinear_for_qnn(self, tmp_path: Path) -> None: onnx.save(model, str(path)) assert needs_format_conversion(path, "qnn") is True + def test_qlinear_for_qnn_canonical_name(self, tmp_path: Path) -> None: + """Canonical EP name must be recognized, not just the alias.""" + model = _make_simple_model(["QLinearConv", "Relu"]) + path = tmp_path / "qlinear.onnx" + onnx.save(model, str(path)) + assert needs_format_conversion(path, "QNNExecutionProvider") is True + def test_qdq_for_qnn(self, tmp_path: Path) -> None: model = _make_simple_model(["QuantizeLinear", "DequantizeLinear"]) path = tmp_path / "qdq.onnx" From f555451e13c0da7c15f1b619dc8b4d79c52fdd36 Mon Sep 17 00:00:00 2001 From: hualxie Date: Tue, 30 Jun 2026 17:27:32 +0800 Subject: [PATCH 3/3] refactor(analyze): raise on unknown EP in infer_ihv_from_ep_name The map covers every canonical EPName, so an unknown name now signals a bug rather than silently defaulting to IHVType.MICROSOFT. InformationEngine already catches ValueError (falls back to loading all rules) and the model validator catches it defensively. Replace the per-EP unit tests with one that verifies every EP_NAMES member resolves to an IHVType (enforcing map/Literal parity) plus an unknown-raises test. Update fixtures that relied on the old lenient behavior to use canonical EP names. --- src/winml/modelkit/analyze/utils/ep_utils.py | 13 +++-- .../core/model_validators/test_validators.py | 13 ++++- .../analyze/core/test_output_aggregator.py | 4 +- tests/unit/analyze/test_analyzer.py | 21 ------- tests/unit/analyze/test_has_rule_data.py | 55 +++---------------- 5 files changed, 29 insertions(+), 77 deletions(-) diff --git a/src/winml/modelkit/analyze/utils/ep_utils.py b/src/winml/modelkit/analyze/utils/ep_utils.py index e670cdad4..709dd7543 100644 --- a/src/winml/modelkit/analyze/utils/ep_utils.py +++ b/src/winml/modelkit/analyze/utils/ep_utils.py @@ -24,9 +24,7 @@ def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType: """Infer IHVType from a canonical Execution Provider name. ``EPName`` is a closed set of canonical EP names, so this is a direct, - exact lookup. Any name without an explicit IHV owner (e.g. - ``CPUExecutionProvider``, ``DmlExecutionProvider``) resolves to - ``IHVType.MICROSOFT``. + exact lookup covering every member of that set. Args: ep_name: Canonical Execution Provider name (see ``utils.constants.EPName``). @@ -34,6 +32,9 @@ def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType: Returns: IHVType: Inferred IHV type (QC, INTEL, AMD, NVIDIA, or MICROSOFT). + Raises: + ValueError: If ``ep_name`` is not a known canonical EP name. + Examples: >>> infer_ihv_from_ep_name("QNNExecutionProvider") @@ -59,8 +60,10 @@ def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType: "DmlExecutionProvider": IHVType.MICROSOFT, } - # EPName is a closed set; anything without an explicit owner is Microsoft. - return ep_name_to_ihv.get(ep_name, IHVType.MICROSOFT) + try: + return ep_name_to_ihv[ep_name] + except KeyError: + raise ValueError(f"Cannot infer IHV for unknown EP name: {ep_name!r}") from None def get_devices_with_rule_data(ep_name: EPName) -> list[str]: diff --git a/tests/unit/analyze/core/model_validators/test_validators.py b/tests/unit/analyze/core/model_validators/test_validators.py index 07085d7fd..31cceead1 100644 --- a/tests/unit/analyze/core/model_validators/test_validators.py +++ b/tests/unit/analyze/core/model_validators/test_validators.py @@ -418,7 +418,9 @@ def _validate(self, proto, ep, device): def test_detects_for_openvino_gpu(self): """Emits a GraphOptimization action enabling the surgery for OV GPU.""" - info = self._validate(_make_batched_const_matmul_proto(), "openvino", "GPU") + info = self._validate( + _make_batched_const_matmul_proto(), "OpenVINOExecutionProvider", "GPU" + ) assert info is not None assert info.pattern_id == "MODEL/BatchedConstantMatMul" items = info.actions[0].action_items @@ -427,7 +429,10 @@ def test_detects_for_openvino_gpu(self): def test_skipped_for_openvino_npu(self): """Device-gated: NPU is unaffected.""" - assert self._validate(_make_batched_const_matmul_proto(), "openvino", "NPU") is None + assert ( + self._validate(_make_batched_const_matmul_proto(), "OpenVINOExecutionProvider", "NPU") + is None + ) def test_skipped_for_non_intel_gpu(self): """IHV-gated: a non-Intel GPU EP is unaffected.""" @@ -436,7 +441,9 @@ def test_skipped_for_non_intel_gpu(self): def test_skipped_for_two_dim_constant(self): """Rank-2 constant gemm compiles on OV GPU; not flagged.""" - info = self._validate(_make_batched_const_matmul_proto(const_rank=2), "openvino", "GPU") + info = self._validate( + _make_batched_const_matmul_proto(const_rank=2), "OpenVINOExecutionProvider", "GPU" + ) assert info is None def test_manager_wires_validator_for_openvino_gpu(self): diff --git a/tests/unit/analyze/core/test_output_aggregator.py b/tests/unit/analyze/core/test_output_aggregator.py index 014fb6fb6..d9803ade8 100644 --- a/tests/unit/analyze/core/test_output_aggregator.py +++ b/tests/unit/analyze/core/test_output_aggregator.py @@ -510,7 +510,7 @@ def test_full_workflow_multiple_ihv(self, sample_metadata: ModelStats) -> None: result=RuntimeTestResult(compile=True, run=True), ), ], - "ACEExecutionProvider": [ + "VitisAIExecutionProvider": [ PatternRuntime( pattern_id="OP/ai.onnx/Add", result=RuntimeTestResult(compile=False, run=False), @@ -521,7 +521,7 @@ def test_full_workflow_multiple_ihv(self, sample_metadata: ModelStats) -> None: information_list = { "QNNExecutionProvider": [], "OpenVINOExecutionProvider": [], - "ACEExecutionProvider": [ + "VitisAIExecutionProvider": [ Information( explanation="Add not supported", pattern_id="OP/ai.onnx/Add", diff --git a/tests/unit/analyze/test_analyzer.py b/tests/unit/analyze/test_analyzer.py index e20fa1922..dcfa735f2 100644 --- a/tests/unit/analyze/test_analyzer.py +++ b/tests/unit/analyze/test_analyzer.py @@ -23,7 +23,6 @@ ModelStats, ONNXStaticAnalyzer, SupportLevel, - infer_ihv_from_ep_name, ) from winml.modelkit.analyze.analyzer import _build_runtime_debug_details_summary from winml.modelkit.analyze.models.runtime_checks import PatternRuntime, RuntimeTestResult @@ -864,26 +863,6 @@ def test_init_custom_config(self) -> None: assert analyzer.config.enable_information is True assert analyzer.config.max_memory_mb == 4096 - def test_map_ep_to_ihv_qnn(self) -> None: - """Test EP to IHV mapping for QNN.""" - assert infer_ihv_from_ep_name("QNNExecutionProvider") == IHVType.QC - - def test_map_ep_to_ihv_openvino(self) -> None: - """Test EP to IHV mapping for OpenVINO.""" - assert infer_ihv_from_ep_name("OpenVINOExecutionProvider") == IHVType.INTEL - - def test_map_ep_to_ihv_vitisai(self) -> None: - """Test EP to IHV mapping for VitisAI.""" - assert infer_ihv_from_ep_name("VitisAIExecutionProvider") == IHVType.AMD - - def test_map_ep_to_ihv_nvidia(self) -> None: - """Test EP to IHV mapping for NvTensorRTRTX.""" - assert infer_ihv_from_ep_name("NvTensorRTRTXExecutionProvider") == IHVType.NVIDIA - - def test_map_ep_to_ihv_invalid(self) -> None: - """Test EP to IHV mapping with unrecognized EP resolves to MICROSOFT.""" - assert infer_ihv_from_ep_name("InvalidEP") == IHVType.MICROSOFT - def test_analyze_file_not_found(self) -> None: """Test analyze with non-existent file.""" analyzer = ONNXStaticAnalyzer() diff --git a/tests/unit/analyze/test_has_rule_data.py b/tests/unit/analyze/test_has_rule_data.py index 1b6647a1f..61a6da32f 100644 --- a/tests/unit/analyze/test_has_rule_data.py +++ b/tests/unit/analyze/test_has_rule_data.py @@ -27,55 +27,18 @@ class TestInferIHVFromEPName: """Tests for infer_ihv_from_ep_name().""" - def test_qnn(self) -> None: + def test_all_known_eps_resolve(self) -> None: + """Every canonical EPName maps to a valid IHVType (map covers the Literal).""" from winml.modelkit.analyze.models.ihv_type import IHVType + from winml.modelkit.utils.constants import EP_NAMES - assert infer_ihv_from_ep_name("QNNExecutionProvider") == IHVType.QC + for ep in EP_NAMES: + assert isinstance(infer_ihv_from_ep_name(ep), IHVType) - def test_openvino(self) -> None: - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("OpenVINOExecutionProvider") == IHVType.INTEL - - def test_vitisai(self) -> None: - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("VitisAIExecutionProvider") == IHVType.AMD - - def test_migraphx_maps_to_amd(self) -> None: - """MIGraphX is an AMD EP — should map to IHVType.AMD.""" - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("MIGraphXExecutionProvider") == IHVType.AMD - - def test_cuda_maps_to_nvidia(self) -> None: - """CUDAExecutionProvider is an NVIDIA EP — should map to IHVType.NVIDIA.""" - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("CUDAExecutionProvider") == IHVType.NVIDIA - - def test_unknown_ep_resolves_to_microsoft(self) -> None: - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("TotallyFakeEP") == IHVType.MICROSOFT - - def test_cpu_ep_resolves_to_microsoft(self) -> None: - """CPUExecutionProvider is a Microsoft EP — should resolve to MICROSOFT.""" - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("CPUExecutionProvider") == IHVType.MICROSOFT - - def test_dml_ep_resolves_to_microsoft(self) -> None: - """DmlExecutionProvider is a Microsoft EP — should resolve to MICROSOFT.""" - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("DmlExecutionProvider") == IHVType.MICROSOFT - - def test_nvidia_ep_maps_to_nvidia(self) -> None: - """NvTensorRTRTXExecutionProvider should map to IHVType.NVIDIA.""" - from winml.modelkit.analyze.models.ihv_type import IHVType - - assert infer_ihv_from_ep_name("NvTensorRTRTXExecutionProvider") == IHVType.NVIDIA + def test_unknown_ep_raises(self) -> None: + """Unknown EP names raise rather than silently defaulting.""" + with pytest.raises(ValueError, match="unknown EP name"): + infer_ihv_from_ep_name("TotallyFakeEP") class TestHasRuleDataForEP: