Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 23 additions & 31 deletions src/winml/modelkit/analyze/utils/ep_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,19 @@


def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType:
"""Infer IHVType from Execution Provider name.
"""Infer IHVType from a canonical Execution Provider name.

Maps an execution provider name to its corresponding IHV type.
Supports multiple name variations for each provider.
Unknown EPs (e.g., CPUExecutionProvider, DmlExecutionProvider) resolve
to IHVType.MICROSOFT.
``EPName`` is a closed set of canonical EP names, so this is a direct,
exact lookup covering every member of that set.

Args:
ep_name: Execution Provider name (e.g., QNNExecutionProvider, OpenVINOExecutionProvider)
ep_name: Canonical Execution Provider name (see ``utils.constants.EPName``).

Returns:
IHVType: Inferred IHV type (QC, INTEL, AMD, NVIDIA, or MICROSOFT)
IHVType: Inferred IHV type (QC, INTEL, AMD, NVIDIA, or MICROSOFT).

Raises:
ValueError: If ``ep_name`` is not a known canonical EP name.

Examples:
>>> infer_ihv_from_ep_name("QNNExecutionProvider")
Expand All @@ -48,30 +49,21 @@ def infer_ihv_from_ep_name(ep_name: EPName) -> IHVType:
"""
from ..models.ihv_type import IHVType

ep_lower = ep_name.lower()

# QNN / Qualcomm
if "qnn" in ep_lower or "qualcomm" in ep_lower:
return IHVType.QC

# OpenVINO / Intel
if "openvino" in ep_lower or "intel" in ep_lower:
return IHVType.INTEL

# VitisAI / MIGraphX / AMD / ACE (AMD)
amd_keywords = ("amd", "quark", "vitis", "ace", "migraphx")
if any(kw in ep_lower for kw in amd_keywords):
return IHVType.AMD

# NVIDIA / TensorRT RTX
# This is intentionally a permissive substring fallback to cover common
# TensorRT naming variants. Callers should prefer canonical EP names.
nvidia_keywords = ("nvidia", "nvtensorrt", "trtrtx", "tensorrt", "rtx")
if any(kw in ep_lower for kw in nvidia_keywords):
return IHVType.NVIDIA

# Default: Microsoft (e.g., CPUExecutionProvider, DmlExecutionProvider)
return IHVType.MICROSOFT
ep_name_to_ihv: dict[EPName, IHVType] = {
"QNNExecutionProvider": IHVType.QC,
"OpenVINOExecutionProvider": IHVType.INTEL,
"VitisAIExecutionProvider": IHVType.AMD,
"MIGraphXExecutionProvider": IHVType.AMD,
"NvTensorRTRTXExecutionProvider": IHVType.NVIDIA,
"CUDAExecutionProvider": IHVType.NVIDIA,
"CPUExecutionProvider": IHVType.MICROSOFT,
"DmlExecutionProvider": IHVType.MICROSOFT,
}

try:
return ep_name_to_ihv[ep_name]
except KeyError:
raise ValueError(f"Cannot infer IHV for unknown EP name: {ep_name!r}") from None


def get_devices_with_rule_data(ep_name: EPName) -> list[str]:
Expand Down
12 changes: 9 additions & 3 deletions src/winml/modelkit/compiler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,33 @@
if TYPE_CHECKING:
from pathlib import Path

from ..utils.constants import EPAlias
from ..utils.constants import EPNameOrAlias

# Canonical definition of ONNX QDQ operator types.
# Import this constant instead of redefining {"QuantizeLinear", "DequantizeLinear"}.
QDQ_OP_TYPES: frozenset[str] = frozenset({"QuantizeLinear", "DequantizeLinear"})


def needs_format_conversion(model_path: Path, ep: EPAlias) -> bool:
def needs_format_conversion(model_path: Path, ep: EPNameOrAlias) -> bool:
"""Check if model's quant format is compatible with target EP.

Minimal detection: checks for QLinear ops targeting QDQ-only EPs.
FIXME: Expand to full EP-to-format compatibility matrix.
"""
from ..onnx import load_onnx
from ..utils.constants import normalize_ep_name

model = load_onnx(model_path, load_weights=False, validate=False)
op_types = {n.op_type for n in model.graph.node}
has_qlinear = any(op.startswith("QLinear") for op in op_types)
has_qdq = bool(op_types & QDQ_OP_TYPES)

if ep == "qnn" and has_qlinear and not has_qdq: # noqa: SIM103
# Compare against the canonical EP name, not a single alias: one EP can have
# several aliases (e.g. nv_tensorrt_rtx / nvtensorrtrtx), so an alias-literal
# comparison would miss the others.
ep_canonical = normalize_ep_name(ep)

if ep_canonical == "QNNExecutionProvider" and has_qlinear and not has_qdq: # noqa: SIM103
return True # QNN requires QDQ format
# FIXME: add more EP rules as needed
return False
2 changes: 1 addition & 1 deletion src/winml/modelkit/serve/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ async def get_mcp_schema() -> dict[str, Any]:
# ------------------------------------------------------------------
@app.post("/v1/ep", tags=["management"], summary="Switch execution provider")
async def switch_ep(request: EpSwitchRequest) -> dict[str, Any]:
# Pydantic already validates ep against the EPAlias Literal (rejects
# Pydantic already validates ep against the EPNameOrAlias Literal (rejects
# unknown values with a 422 at parse time), so no extra check needed.
ep = request.ep
mgr = _get_mgr()
Expand Down
6 changes: 4 additions & 2 deletions src/winml/modelkit/serve/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from pydantic import BaseModel, Field

from ..utils.constants import EPAlias, EPNameOrAlias
from ..utils.constants import EPNameOrAlias


# ---------------------------------------------------------------------------
Expand All @@ -25,7 +25,9 @@
class EpSwitchRequest(BaseModel):
"""POST /v1/ep — switch execution provider."""

ep: EPAlias = Field(..., description="EP short name: cpu, dml, qnn, openvino")
ep: EPNameOrAlias = Field(
..., description="EP name or short alias (e.g. cpu, dml, qnn, QNNExecutionProvider)"
)


class PredictJsonRequest(BaseModel):
Expand Down
13 changes: 10 additions & 3 deletions tests/unit/analyze/core/model_validators/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,9 @@ def _validate(self, proto, ep, device):

def test_detects_for_openvino_gpu(self):
"""Emits a GraphOptimization action enabling the surgery for OV GPU."""
info = self._validate(_make_batched_const_matmul_proto(), "openvino", "GPU")
info = self._validate(
_make_batched_const_matmul_proto(), "OpenVINOExecutionProvider", "GPU"
)
assert info is not None
assert info.pattern_id == "MODEL/BatchedConstantMatMul"
items = info.actions[0].action_items
Expand All @@ -427,7 +429,10 @@ def test_detects_for_openvino_gpu(self):

def test_skipped_for_openvino_npu(self):
"""Device-gated: NPU is unaffected."""
assert self._validate(_make_batched_const_matmul_proto(), "openvino", "NPU") is None
assert (
self._validate(_make_batched_const_matmul_proto(), "OpenVINOExecutionProvider", "NPU")
is None
)

def test_skipped_for_non_intel_gpu(self):
"""IHV-gated: a non-Intel GPU EP is unaffected."""
Expand All @@ -436,7 +441,9 @@ def test_skipped_for_non_intel_gpu(self):

def test_skipped_for_two_dim_constant(self):
"""Rank-2 constant gemm compiles on OV GPU; not flagged."""
info = self._validate(_make_batched_const_matmul_proto(const_rank=2), "openvino", "GPU")
info = self._validate(
_make_batched_const_matmul_proto(const_rank=2), "OpenVINOExecutionProvider", "GPU"
)
assert info is None

def test_manager_wires_validator_for_openvino_gpu(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/analyze/core/test_output_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def test_full_workflow_multiple_ihv(self, sample_metadata: ModelStats) -> None:
result=RuntimeTestResult(compile=True, run=True),
),
],
"ACEExecutionProvider": [
"VitisAIExecutionProvider": [
PatternRuntime(
pattern_id="OP/ai.onnx/Add",
result=RuntimeTestResult(compile=False, run=False),
Expand All @@ -521,7 +521,7 @@ def test_full_workflow_multiple_ihv(self, sample_metadata: ModelStats) -> None:
information_list = {
"QNNExecutionProvider": [],
"OpenVINOExecutionProvider": [],
"ACEExecutionProvider": [
"VitisAIExecutionProvider": [
Information(
explanation="Add not supported",
pattern_id="OP/ai.onnx/Add",
Expand Down
29 changes: 0 additions & 29 deletions tests/unit/analyze/test_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
ModelStats,
ONNXStaticAnalyzer,
SupportLevel,
infer_ihv_from_ep_name,
)
from winml.modelkit.analyze.analyzer import _build_runtime_debug_details_summary
from winml.modelkit.analyze.models.runtime_checks import PatternRuntime, RuntimeTestResult
Expand Down Expand Up @@ -864,34 +863,6 @@ def test_init_custom_config(self) -> None:
assert analyzer.config.enable_information is True
assert analyzer.config.max_memory_mb == 4096

def test_map_ep_to_ihv_qnn(self) -> None:
"""Test EP to IHV mapping for QNN."""
assert infer_ihv_from_ep_name("QNNExecutionProvider") == IHVType.QC
assert infer_ihv_from_ep_name("qnnexecutionprovider") == IHVType.QC
assert infer_ihv_from_ep_name("QualcommProvider") == IHVType.QC

def test_map_ep_to_ihv_openvino(self) -> None:
"""Test EP to IHV mapping for OpenVINO."""
assert infer_ihv_from_ep_name("OpenVINOExecutionProvider") == IHVType.INTEL
assert infer_ihv_from_ep_name("openvino") == IHVType.INTEL
assert infer_ihv_from_ep_name("IntelProvider") == IHVType.INTEL

def test_map_ep_to_ihv_vitisai(self) -> None:
"""Test EP to IHV mapping for VitisAI."""
assert infer_ihv_from_ep_name("VitisAIExecutionProvider") == IHVType.AMD
assert infer_ihv_from_ep_name("vitis") == IHVType.AMD
assert infer_ihv_from_ep_name("AMDProvider") == IHVType.AMD

def test_map_ep_to_ihv_nvidia(self) -> None:
"""Test EP to IHV mapping for NvTensorRTRTX."""
assert infer_ihv_from_ep_name("NvTensorRTRTXExecutionProvider") == IHVType.NVIDIA
assert infer_ihv_from_ep_name("nvtensorrtx") == IHVType.NVIDIA
assert infer_ihv_from_ep_name("TensorRTProvider") == IHVType.NVIDIA

def test_map_ep_to_ihv_invalid(self) -> None:
"""Test EP to IHV mapping with unrecognized EP resolves to MICROSOFT."""
assert infer_ihv_from_ep_name("InvalidEP") == IHVType.MICROSOFT

def test_analyze_file_not_found(self) -> None:
"""Test analyze with non-existent file."""
analyzer = ONNXStaticAnalyzer()
Expand Down
63 changes: 9 additions & 54 deletions tests/unit/analyze/test_has_rule_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,63 +27,18 @@
class TestInferIHVFromEPName:
"""Tests for infer_ihv_from_ep_name()."""

def test_qnn(self) -> None:
def test_all_known_eps_resolve(self) -> None:
"""Every canonical EPName maps to a valid IHVType (map covers the Literal)."""
from winml.modelkit.analyze.models.ihv_type import IHVType
from winml.modelkit.utils.constants import EP_NAMES

assert infer_ihv_from_ep_name("QNNExecutionProvider") == IHVType.QC
for ep in EP_NAMES:
assert isinstance(infer_ihv_from_ep_name(ep), IHVType)

def test_openvino(self) -> None:
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("OpenVINOExecutionProvider") == IHVType.INTEL

def test_vitisai(self) -> None:
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("VitisAIExecutionProvider") == IHVType.AMD

def test_migraphx_maps_to_amd(self) -> None:
"""MIGraphX is an AMD EP — should map to IHVType.AMD."""
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("MIGraphXExecutionProvider") == IHVType.AMD

def test_case_insensitive(self) -> None:
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("qnnexecutionprovider") == IHVType.QC
assert infer_ihv_from_ep_name("OPENVINOEXECUTIONPROVIDER") == IHVType.INTEL
assert infer_ihv_from_ep_name("vitisaiexecutionprovider") == IHVType.AMD
assert infer_ihv_from_ep_name("nvtensorrtxexecutionprovider") == IHVType.NVIDIA

def test_unknown_ep_resolves_to_microsoft(self) -> None:
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("TotallyFakeEP") == IHVType.MICROSOFT

def test_cpu_ep_resolves_to_microsoft(self) -> None:
"""CPUExecutionProvider is a Microsoft EP — should resolve to MICROSOFT."""
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("CPUExecutionProvider") == IHVType.MICROSOFT

def test_dml_ep_resolves_to_microsoft(self) -> None:
"""DmlExecutionProvider is a Microsoft EP — should resolve to MICROSOFT."""
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("DmlExecutionProvider") == IHVType.MICROSOFT

def test_nvidia_ep_maps_to_nvidia(self) -> None:
"""NvTensorRTRTXExecutionProvider should map to IHVType.NVIDIA."""
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("NvTensorRTRTXExecutionProvider") == IHVType.NVIDIA

def test_trtrtx_ep_maps_to_nvidia(self) -> None:
"""TrtRTXExecutionProvider should map to IHVType.NVIDIA."""
from winml.modelkit.analyze.models.ihv_type import IHVType

assert infer_ihv_from_ep_name("TrtRTXExecutionProvider") == IHVType.NVIDIA
def test_unknown_ep_raises(self) -> None:
"""Unknown EP names raise rather than silently defaulting."""
with pytest.raises(ValueError, match="unknown EP name"):
infer_ihv_from_ep_name("TotallyFakeEP")


class TestHasRuleDataForEP:
Expand Down
7 changes: 7 additions & 0 deletions tests/unit/compiler/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ def test_qlinear_for_qnn(self, tmp_path: Path) -> None:
onnx.save(model, str(path))
assert needs_format_conversion(path, "qnn") is True

def test_qlinear_for_qnn_canonical_name(self, tmp_path: Path) -> None:
"""Canonical EP name must be recognized, not just the alias."""
model = _make_simple_model(["QLinearConv", "Relu"])
path = tmp_path / "qlinear.onnx"
onnx.save(model, str(path))
assert needs_format_conversion(path, "QNNExecutionProvider") is True

def test_qdq_for_qnn(self, tmp_path: Path) -> None:
model = _make_simple_model(["QuantizeLinear", "DequantizeLinear"])
path = tmp_path / "qdq.onnx"
Expand Down
Loading