From 64a27470363ed398110dc688869953ec7ab20bd5 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 10:06:45 +0200 Subject: [PATCH 01/54] Add OperatorDescriptor --- Deeploy/DeeployTypes.py | 155 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 8c2f5d2485..b1e11679d0 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1020,6 +1020,152 @@ def copy(self) -> NetworkContext: return copy.copy(self) +class IoDesc: + + def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None: + if isinstance(required, str): + required = [required] + self.required = required + if isinstance(optional, str): + optional = [optional] + self.optional = optional + + def symbolicName(self, idx: int) -> str: + return (self.required + self.optional)[idx] + + def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool: + return len(tensors) >= len(self.required) and \ + len(tensors) <= len(self.required) + len(self.optional) + + +class VariadicIoDesc(IoDesc): + + def __init__(self, baseName: str, minNumTensors: int = 0) -> None: + self.baseName = baseName + self.minNumTensors = minNumTensors + + def symbolicName(self, idx: int) -> str: + return f"{self.baseName}_{idx}" + + def checkTensors(self, tensors: Sequence[gs.Tensor]) -> bool: + return len(tensors) >= self.minNumTensors + + +@dataclass +class AttrDesc: + name: str + unpacker: Callable[[Any], Any] + default: Optional[Union[Any, Callable[[gs.Node], Any]]] = None + + @staticmethod + def _constUnpack(value: Any) -> Any: + if isinstance(value, gs.Constant): + return value.values.tolist() + elif isinstance(value, np.ndarray): + return value.tolist() + else: + return value + + def unpack(self, value: Any) -> Union[int, float, List[int], List[float]]: + return self.unpacker(self._constUnpack(value)) + + def getDefault(self, node: gs.Node) -> Any: + if callable(self.default): + return self.default(node) + else: + return self.default + + +@dataclass +class OperatorDescriptor: + inputDescriptor: IoDesc + outputDescriptor: IoDesc + attrDescriptors: List[AttrDesc] + + def check(self, node: gs.Node) -> bool: + """This method checks whether the node is valid. + + Parameters + ---------- + node : gs.Node + Graphsurgeon node to be validated + + Returns + ------- + bool : node validity + + """ + valid = True + + if not self.inputDescriptor.checkTensors(node.inputs): + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Invalid input tensors: {node.inputs}") + valid = False + + if not self.outputDescriptor.checkTensors(node.outputs): + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Invalid output tensors: {node.outputs}") + valid = False + + for attrDesc in self.attrDescriptors: + if attrDesc.default is None and not attrDesc.name in node.attrs: + # TODO: Change to logging + print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}") + valid = False + + return valid + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + _ = opset + for desc in self.attrDescriptors: + if desc.default is None: + value = node.attrs[desc.name] + else: + value = node.attrs.get(desc.name, desc.getDefault(node)) + try: + node.attrs[desc.name] = desc.unpack(value) + except ValueError as e: + raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e + return True + + def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor], + ioDesc: IoDesc) -> OperatorRepresentation: + opRepr = {} + for i, tensor in enumerate(tensors): + symName = ioDesc.symbolicName(i) + buffer = ctxt.lookup(tensor.name) + assert isinstance(buffer, VariableBuffer) + opRepr[symName] = buffer.name + opRepr[f"{symName}_shape"] = buffer.shape + opRepr[f"{symName}_size"] = math.prod(buffer.shape) + opRepr[f"{symName}_type"] = buffer._type + return opRepr + + def parseAttrs(self, node: gs.Node) -> OperatorRepresentation: + return node.attrs.copy() + + def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: + opReprs = { + "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor), + "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), + "attributes": self.parseAttrs(node), + } + + for (firstName, firstOpRepr), (secondName, secondOpRepr) in itertools.combinations(opReprs.items(), 2): + firstKeySet = set(firstOpRepr.keys()) + secondKeySet = set(secondOpRepr.keys()) + assert firstKeySet.isdisjoint(secondKeySet), \ + f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \ + f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\ + f"Overlapping keys: {firstKeySet ^ secondKeySet}" + + resultOpRepr = {} + for opRepr in opReprs.values(): + resultOpRepr.update(opRepr) + + return resultOpRepr + + class NodeParser(): """Deeploy's core Parser class. Analyzes network nodes and evaluates whether they can be mapped by it. @@ -2429,6 +2575,7 @@ def __init__(self, graph: gs.Graph, platform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', deeployStateDir: str = "DeeployState"): @@ -2453,6 +2600,7 @@ def __init__(self, """ self.graph = graph + self.operatorDescriptors = operatorDescriptors self.scheduler = scheduler self.layerBinding: 'OrderedDict[str, ONNXLayer]' = OrderedDict() self.parsed = False @@ -2582,6 +2730,13 @@ def _bindLayers(self): flatSchedule += subGraph for node in flatSchedule: + assert node.op in self.operatorDescriptors, \ + f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}." + desc = self.operatorDescriptors[node.op] + desc.canonicalize(node, self.graph.opset) + assert desc.check(node), \ + f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator" + layer = self._mapNode(node) if isinstance(layer, ONNXLayer): log.debug(f" {SUCCESS_MARK} Bind {node.name} to layer {layer.__class__.__name__}") From c5a0c71e2bae852ede4966586143d0a6b2766ea1 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 09:57:15 +0200 Subject: [PATCH 02/54] Add OperatorDescriptor.py --- Deeploy/DeeployTypes.py | 9 +- Deeploy/OperatorDescriptor.py | 366 ++++++++++++++++++++++++++++++++++ 2 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 Deeploy/OperatorDescriptor.py diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index b1e11679d0..3282c56ec3 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1063,6 +1063,9 @@ def _constUnpack(value: Any) -> Any: return value.values.tolist() elif isinstance(value, np.ndarray): return value.tolist() + # LMACAN: hacky way to detect a 0-dim numpy array + elif hasattr(value, "ndim") and value.ndim == 0 and hasattr(value, "item"): + return value.item() else: return value @@ -1099,12 +1102,12 @@ def check(self, node: gs.Node) -> bool: if not self.inputDescriptor.checkTensors(node.inputs): # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid input tensors: {node.inputs}") + print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") valid = False if not self.outputDescriptor.checkTensors(node.outputs): # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid output tensors: {node.outputs}") + print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") valid = False for attrDesc in self.attrDescriptors: @@ -1124,7 +1127,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: value = node.attrs.get(desc.name, desc.getDefault(node)) try: node.attrs[desc.name] = desc.unpack(value) - except ValueError as e: + except Exception as e: raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e return True diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py new file mode 100644 index 0000000000..f6bd478184 --- /dev/null +++ b/Deeploy/OperatorDescriptor.py @@ -0,0 +1,366 @@ +# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from enum import Enum, IntEnum +from typing import Any, Dict, Tuple + +import numpy as np +import onnx_graphsurgeon as gs + +from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc + + +def IntUnpack(value: Any) -> int: + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + if isinstance(value, int): + return value + elif isinstance(value, float): + assert value.is_integer(), f"Received a non-integer value {value}" + return int(value) + raise ValueError(f"Unsupported value type {type(value)}") + + +def BoolUnpack(value: Any) -> bool: + value = IntUnpack(value) + assert value in [0, 1], f"Casting to bool only supported from 0, 1. Received {value}" + return bool(value) + + +def FloatUnpack(value: Any) -> float: + if isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + + assert isinstance(value, (int, float)), f"Unsupported value type {type(value)}" + return float(value) + + +def IntTupleUnpack(value: Any) -> Tuple[int, ...]: + try: + return tuple(IntUnpack(item) for item in value) + except TypeError: + return (IntUnpack(value),) + + +def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: + try: + return tuple(FloatUnpack(item) for item in value) + except TypeError: + return (FloatUnpack(value),) + + +def attrToTensor(node: gs.Node, attr: str) -> None: + values = node.attrs[attr] + if isinstance(values, (int, float)): + values = np.array([values]) + elif isinstance(values, (list, tuple)): + values = np.array(values) + assert isinstance(values, np.ndarray), f"Unsupported values type {type(values)}" + tensor = gs.Constant(f"{node.name}_{attr}", values) + node.inputs.append(tensor) + node.attrs.pop(attr) + + +concatDesc = OperatorDescriptor( + inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +iRMSNormDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("D", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ], +) + + +class SliceDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset < 10: + attrToTensor(node, "starts") + attrToTensor(node, "ends") + if "axes" in node.attrs: + attrToTensor(node, "axes") + + return super().canonicalize(node, opset) + + +# Opset: 13 +sliceDesc = SliceDescriptor( + inputDescriptor = IoDesc(["data_in", "starts", "ends"], ["axes", "steps"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +# Opset: 1 +sliceDescOld = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axes", IntTupleUnpack, lambda n: range(len(n.attrs["starts"]))), + AttrDesc("ends", IntTupleUnpack), + AttrDesc("starts", IntTupleUnpack), + ], +) + +transposeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("perm", IntTupleUnpack)], +) + + +class CeilMode(IntEnum): + floor = 0 + ceil = 1 + + +maxPoolDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("ceil_mode", unpacker = CeilMode, default = CeilMode.floor), + AttrDesc("kernel_shape", IntTupleUnpack), + AttrDesc("pads", IntTupleUnpack), + AttrDesc("strides", IntTupleUnpack), + ]) + + +class PadMode(str, Enum): + constant = "constant" + reflect = "reflect" + edge = "edge" + wrap = "wrap" + + +# Opset 24 +padDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "pads"], ["constant_value", "axes"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc('mode', unpacker = PadMode, default = PadMode.constant), + ], +) + + +class PadModeOld(str, Enum): + constant = "constant" + reflect = "reflect" + edge = "edge" + + +padDescOld = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("mode", unpacker = PadModeOld, default = PadModeOld.constant), + AttrDesc("pads", IntTupleUnpack), + AttrDesc("value", FloatUnpack), + ], +) + +addDesc = OperatorDescriptor( + inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + + +class ReduceMeanDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset < 18: + if "axes" in node.attrs: + attrToTensor(node, "axes") + return super().canonicalize(node, opset) + + +# Opset 18 +reduceMeanDesc = ReduceMeanDescriptor( + inputDescriptor = IoDesc("data_in", optional = "axes"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("keepdims", unpacker = BoolUnpack, default = True), + AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False), + ], +) + +reduceSumDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in", optional = "axes"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("keepdims", unpacker = BoolUnpack, default = True), + AttrDesc("noop_with_empty_axes", unpacker = BoolUnpack, default = False), + ], +) + +softmaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)], +) + +softmaxGradDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["upstream_grad", "softmax_output"]), + outputDescriptor = IoDesc("softmax_grad"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = -1)], +) + +iSoftmaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("coeffA", IntUnpack), + AttrDesc("coeffB", IntUnpack), + AttrDesc("coeffC", IntUnpack), + AttrDesc("log2", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ], +) + +itaMaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("n_levels", IntUnpack), + ], +) + +itaPartialMaxDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("axis", IntUnpack, default = -1), + AttrDesc("n_levels", IntUnpack), + AttrDesc("group_width", IntUnpack), + ], +) + + +class GeluApprox(str, Enum): + tanh = "tanh" + none = "none" + + +geluDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("approximate", GeluApprox, default = GeluApprox.none), + ]) + +rqsIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ]) + +iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("one_over_six", IntUnpack), + AttrDesc("six", IntUnpack), + AttrDesc("three", IntUnpack), + ]) + +iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("D", IntUnpack), + AttrDesc("mul", IntUnpack), + AttrDesc("n_levels", IntUnpack), + ]) + +quantDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("scale", FloatUnpack), + AttrDesc("zero_point", FloatUnpack), + AttrDesc("bit_width", IntUnpack), + AttrDesc("signed", BoolUnpack, default = True), + AttrDesc("min_val", + IntUnpack, + default = lambda node: -(2**(node.attrs["bit_width"] - 1)) if node.attrs["signed"] else 0), + AttrDesc("max_val", + IntUnpack, + default = lambda node: 2**(node.attrs["bit_width"] - 1) - 1 + if node.attrs["signed"] else 2**node.attrs["bit_width"] - 1), + ], +) + + +class AutoPad(str, Enum): + NOTSET = "NOTSET" + SAME_UPPER = "SAME_UPPER" + SAME_LOWER = "SAME_LOWER" + VALID = "VALID" + + +def _dilationsDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return tuple([1] * nSpatialDims) + + +def _kernelShapeDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return node.inputs[1].shape[-nSpatialDims:] + + +def _stridesDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + return tuple([1] * nSpatialDims) + + +def _padsDefault(node: gs.Node) -> Tuple[int, ...]: + # Remove 2 dims for input and output channels + nSpatialDims = len(node.inputs[1].shape) - 2 + # Two 0's per dimension for begin and end + return tuple([0] * (2 * nSpatialDims)) + + +convDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + ], +) + +defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { + "Concat": concatDesc, + "Conv": convDesc, + "iRMSNorm": iRMSNormDesc, + "Slice": sliceDesc, + "Transpose": transposeDesc, + "MaxPool": maxPoolDesc, + "Pad": padDescOld, + "Add": addDesc, + "ReduceMean": reduceMeanDesc, + "ReduceSum": reduceSumDesc, + "Softmax": softmaxDesc, + "iSoftmax": iSoftmaxDesc, + "SoftmaxGrad": softmaxGradDesc, + "Gelu": geluDesc, + "RequantizediGELU": rqsIGeluDesc, + "iHardswish": iHardswishDesc, + "Quant": quantDesc, + "iNoNorm": iNoNormDesc, + "ITAMax": itaMaxDesc, + "ITAPartialMax": itaPartialMaxDesc, +} From e31ea13a65cb7748dabe4fdb0c1134d3ba2fb1aa Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 15:37:26 +0200 Subject: [PATCH 03/54] Add operatorDescriptors to NetworkDeployers --- .../NetworkDeployers/SignPropDeployer.py | 5 +++-- Deeploy/DeeployTypes.py | 9 ++++++++- .../NetworkDeployers/EngineColoringDeployer.py | 6 ++++-- .../NetworkDeployers/MemoryLevelDeployer.py | 10 ++++++---- Deeploy/Targets/Chimera/Deployer.py | 4 +++- Deeploy/Targets/CortexM/Deployer.py | 4 +++- Deeploy/Targets/Generic/Deployer.py | 4 +++- Deeploy/Targets/MemPool/Deployer.py | 5 +++-- Deeploy/Targets/Neureka/Deployer.py | 5 +++-- Deeploy/Targets/PULPOpen/Deployer.py | 5 ++++- Deeploy/Targets/Snitch/Deployer.py | 4 +++- Deeploy/Targets/SoftHier/Deployer.py | 5 +++-- DeeployTest/testMemoryLevelExtension.py | 5 +++++ DeeployTest/testUtils/dmaUtils.py | 3 +++ DeeployTest/testUtils/platformMapping.py | 15 ++++++++++++++- 15 files changed, 68 insertions(+), 21 deletions(-) diff --git a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py index 7a9fbea1ae..e576ff865b 100644 --- a/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py +++ b/Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py @@ -7,7 +7,7 @@ import onnx_graphsurgeon as gs from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer from Deeploy.Logging import DEFAULT_LOGGER as log @@ -18,12 +18,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) if inputOffsets == {}: diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 3282c56ec3..91d22d55ad 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -3339,6 +3339,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, @@ -3371,7 +3372,13 @@ def __init__(self, """ - super().__init__(graph, deploymentPlatform, inputTypes, scheduler, name, deeployStateDir = deeployStateDir) + super().__init__(graph, + deploymentPlatform, + inputTypes, + operatorDescriptors, + scheduler, + name, + deeployStateDir = deeployStateDir) self.loweringOptimizer = loweringOptimizer self.default_channels_first = default_channels_first diff --git a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py index 4b05ab5be4..eb7175f613 100644 --- a/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py +++ b/Deeploy/EngineExtension/NetworkDeployers/EngineColoringDeployer.py @@ -8,7 +8,8 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, Schedule, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, ONNXLayer, OperatorDescriptor, Schedule, \ + TopologyOptimizer from Deeploy.EngineExtension.OptimizationPasses.TopologyOptimizationPasses.EngineColoringPasses import \ EngineColoringPass, EngineMapper @@ -20,12 +21,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", engineMapperCls: Type[EngineMapper] = EngineMapper): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self._initEngineColoringDeployer(engineMapperCls) diff --git a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py index 2599f9e819..d75b28433e 100644 --- a/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py +++ b/Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py @@ -11,8 +11,8 @@ from Deeploy.CommonExtensions.NetworkDeployers.NetworkDeployerWrapper import NetworkDeployerWrapper from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer from Deeploy.DeeployTypes import CodeGenVerbosity, ConstantBuffer, DeploymentEngine, DeploymentPlatform, \ - NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, Schedule, StructBuffer, \ - TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity + NetworkContext, NetworkDeployer, NetworkOptimizationPass, NetworkOptimizer, OperatorDescriptor, Schedule, \ + StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer, _NoVerbosity from Deeploy.Logging import DEFAULT_LOGGER as log from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel @@ -112,12 +112,13 @@ def __init__(self, deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper], inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable[[gs.Graph], Schedule] = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) if len(memoryLevelAnnotationPasses) == 0: memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy)) @@ -155,13 +156,14 @@ def __init__(self, deploymentPlatform: Union[MemoryPlatform, MemoryPlatformWrapper], inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}, memoryLevelAnnotationPasses: List[NetworkOptimizationPass] = []): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir, inputOffsets) if len(memoryLevelAnnotationPasses) == 0: memoryLevelAnnotationPasses.append(AnnotateDefaultMemoryLevel(self.Platform.memoryHierarchy)) diff --git a/Deeploy/Targets/Chimera/Deployer.py b/Deeploy/Targets/Chimera/Deployer.py index ba28279b66..85b0496e39 100644 --- a/Deeploy/Targets/Chimera/Deployer.py +++ b/Deeploy/Targets/Chimera/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer class ChimeraDeployer(SignPropDeployer): @@ -18,6 +18,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -27,6 +28,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/CortexM/Deployer.py b/Deeploy/Targets/CortexM/Deployer.py index bef8fdcf36..9a4f27b061 100644 --- a/Deeploy/Targets/CortexM/Deployer.py +++ b/Deeploy/Targets/CortexM/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -32,6 +33,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Generic/Deployer.py b/Deeploy/Targets/Generic/Deployer.py index 3cef57a2ea..9bf89a8a0e 100644 --- a/Deeploy/Targets/Generic/Deployer.py +++ b/Deeploy/Targets/Generic/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -32,6 +33,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/MemPool/Deployer.py b/Deeploy/Targets/MemPool/Deployer.py index 5431320978..968787972a 100644 --- a/Deeploy/Targets/MemPool/Deployer.py +++ b/Deeploy/Targets/MemPool/Deployer.py @@ -11,7 +11,7 @@ from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.DebugPasses import DebugPrintMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -22,12 +22,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self.inputOffsets = inputOffsets diff --git a/Deeploy/Targets/Neureka/Deployer.py b/Deeploy/Targets/Neureka/Deployer.py index be34e1f4d3..6d96f8d097 100644 --- a/Deeploy/Targets/Neureka/Deployer.py +++ b/Deeploy/Targets/Neureka/Deployer.py @@ -9,7 +9,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, PULPNCHWtoNHWCPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Neureka.TopologyOptimizationPasses.Passes import ConvEngineDiscolorationPass, \ NeurekaOptimizationPass from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer @@ -22,12 +22,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda graph: list(graph.nodes), name: str = 'DeeployNetwork', default_channels_first = False, deeployStateDir: str = "DeeployStateDir", inputOffsets = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir, inputOffsets) if self.Platform.engines[0].enable3x3: diff --git a/Deeploy/Targets/PULPOpen/Deployer.py b/Deeploy/Targets/PULPOpen/Deployer.py index 86bf02e578..17412c8da4 100644 --- a/Deeploy/Targets/PULPOpen/Deployer.py +++ b/Deeploy/Targets/PULPOpen/Deployer.py @@ -12,7 +12,8 @@ from Deeploy.CommonExtensions.OptimizationPasses.BindingsOptimizationPasses.AutoTranspose import AutoTransposeMergePass from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ PULPNCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, TopologyOptimizer, VariableBuffer +from Deeploy.DeeployTypes import ConstantBuffer, DeploymentPlatform, NodeTemplate, OperatorDescriptor, \ + TopologyOptimizer, VariableBuffer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \ TransposeMergePass, TransposeNoPermOptPass, TransposeSplitPass from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import RQAddTransposeSquashPass @@ -33,6 +34,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -42,6 +44,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/Snitch/Deployer.py b/Deeploy/Targets/Snitch/Deployer.py index 7c3922a6bb..4daab3b9f5 100644 --- a/Deeploy/Targets/Snitch/Deployer.py +++ b/Deeploy/Targets/Snitch/Deployer.py @@ -10,7 +10,7 @@ from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ NCHWtoNHWCPass, RemoveGlobalOutputReshapePass, TransposeMatmulInputsPass -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ReshapeConstOptPass, TransposeConstOptPass, \ TransposeMergePass, TransposeSplitPass @@ -22,6 +22,7 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first = False, @@ -31,6 +32,7 @@ def __init__(self, deploymentPlatform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name, default_channels_first = default_channels_first, diff --git a/Deeploy/Targets/SoftHier/Deployer.py b/Deeploy/Targets/SoftHier/Deployer.py index e4ab37f299..4827ba83b9 100644 --- a/Deeploy/Targets/SoftHier/Deployer.py +++ b/Deeploy/Targets/SoftHier/Deployer.py @@ -8,7 +8,7 @@ from Deeploy.AbstractDataTypes import Pointer from Deeploy.CommonExtensions.NetworkDeployers.SignPropDeployer import SignPropDeployer -from Deeploy.DeeployTypes import DeploymentPlatform, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, OperatorDescriptor, TopologyOptimizer class SoftHierDeployer(SignPropDeployer): @@ -18,12 +18,13 @@ def __init__(self, deploymentPlatform: DeploymentPlatform, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: TopologyOptimizer, + operatorDescriptors: Dict[str, OperatorDescriptor], scheduler: Callable = lambda x: x, name: str = 'DeeployNetwork', default_channels_first: bool = True, deeployStateDir: str = "DeeployState", inputOffsets: Dict[str, int] = {}): - super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, scheduler, name, + super().__init__(graph, deploymentPlatform, inputTypes, loweringOptimizer, operatorDescriptors, scheduler, name, default_channels_first, deeployStateDir) self.inputOffsets = inputOffsets diff --git a/DeeployTest/testMemoryLevelExtension.py b/DeeployTest/testMemoryLevelExtension.py index 0e1ed6cc43..a6a1cf37d2 100644 --- a/DeeployTest/testMemoryLevelExtension.py +++ b/DeeployTest/testMemoryLevelExtension.py @@ -18,6 +18,7 @@ from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryLevelAwareSignPropDeployer +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.CortexM.Platform import CMSISEngine, CMSISMapping, CMSISOptimizer, CMSISPlatform from Deeploy.Targets.Generic.Platform import GenericEngine, GenericMapping, GenericOptimizer, GenericPlatform from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import TransposeConstOptPass, TransposeMergePass @@ -83,6 +84,7 @@ MockPlatform, inputTypes, CMSISOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetwork", deeployStateDir = _DEEPLOYSTATEDIR, @@ -106,6 +108,7 @@ MockPlatform, inputTypes, MemPoolOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetwork", deeployStateDir = _DEEPLOYSTATEDIR, @@ -121,6 +124,7 @@ MockPlatform, inputTypes, GenericOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetworkMock", deeployStateDir = _DEEPLOYSTATEDIRMOCK, @@ -136,6 +140,7 @@ MockPlatform, inputTypes, PULPOptimizer, + defaultOperatorDescriptors, defaultScheduler, name = "DeeployNetworkMock", deeployStateDir = _DEEPLOYSTATEDIRMOCK, diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py index 3266ce5129..3205275fda 100644 --- a/DeeployTest/testUtils/dmaUtils.py +++ b/DeeployTest/testUtils/dmaUtils.py @@ -17,6 +17,7 @@ MemoryPlatformWrapper from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel, \ AnnotateIOMemoryLevel +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, PULPOptimizer from Deeploy.Targets.Snitch.Deployer import SnitchDeployer @@ -299,6 +300,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, platform, inputTypes, PULPOptimizer, + defaultOperatorDescriptors, defaultScheduler, default_channels_first = True, deeployStateDir = deeployStateDir) @@ -340,6 +342,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph platform, inputTypes, SnitchOptimizer, + defaultOperatorDescriptors, defaultScheduler, deeployStateDir = deeployStateDir) memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)] diff --git a/DeeployTest/testUtils/platformMapping.py b/DeeployTest/testUtils/platformMapping.py index 48c5777905..d02c3da64e 100644 --- a/DeeployTest/testUtils/platformMapping.py +++ b/DeeployTest/testUtils/platformMapping.py @@ -7,9 +7,10 @@ import onnx_graphsurgeon as gs from Deeploy.AbstractDataTypes import Pointer -from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer +from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, OperatorDescriptor, TopologyOptimizer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper +from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.Chimera.Deployer import ChimeraDeployer from Deeploy.Targets.Chimera.Platform import ChimeraOptimizer, ChimeraPlatform from Deeploy.Targets.CortexM.Deployer import CMSISDeployer @@ -93,6 +94,7 @@ def mapDeployer(platform: DeploymentPlatform, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]], loweringOptimizer: Optional[TopologyOptimizer] = None, + operatorDescriptors: Optional[Dict[str, OperatorDescriptor]] = None, scheduler: Optional[Callable] = None, name: Optional[str] = None, default_channels_first: Optional[bool] = None, @@ -108,6 +110,9 @@ def mapDeployer(platform: DeploymentPlatform, if name is None: name = "DeeployNetwork" + if operatorDescriptors is None: + operatorDescriptors = defaultOperatorDescriptors + if isinstance(platform, CMSISPlatform): if loweringOptimizer is None: @@ -120,6 +125,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -138,6 +144,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -156,6 +163,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -177,6 +185,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -195,6 +204,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -212,6 +222,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -228,6 +239,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, @@ -244,6 +256,7 @@ def mapDeployer(platform: DeploymentPlatform, platform, inputTypes, loweringOptimizer, + operatorDescriptors, scheduler, name = name, default_channels_first = default_channels_first, From 550b559d8fcb083e01ee6b566bd190a5e96ceccb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 15:37:52 +0200 Subject: [PATCH 04/54] Fix extract padding pass --- .../TopologyOptimizationPasses/Passes.py | 69 ++++++++++--------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py index b881529f7e..09ed0b6c7d 100644 --- a/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py @@ -353,44 +353,49 @@ def __init__(self): super().__init__(graph, _split_add_fun, name) -def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0): +def _extract_padding_fun_conv(graph: gs.Graph, match: Match, name: str, value = 0) -> gs.Graph: + conv = list(match.nodes_map.values())[0] - matched_nodes = [m for k, m in match.nodes_map.items()] - conv = matched_nodes[0] - if 'pads' in conv.attrs and np.sum(conv.attrs['pads']) > 1: - pads = copy.deepcopy(conv.attrs['pads']) - shape = copy.deepcopy(conv.inputs[0].shape) - newPads = np.zeros(2 * len(shape)) - assert len(shape) - 2 == len(pads) / 2, "Conv padding dims do not match!" - newShape = shape + if 'pads' not in conv.attrs: + return graph - beginPads = pads[0:len(pads) // 2] - endPads = pads[len(pads) // 2:] - for idx, i in enumerate(beginPads): - newShape[2 + idx] = newShape[2 + idx] + i - newPads[2 + idx] = i + convPads = conv.attrs['pads'] - for idx, i in enumerate(endPads): - newShape[2 + idx] = newShape[2 + idx] + i - newPads[len(newPads) // 2 + 2 + idx] = i + if all(p == 0 for p in convPads): + return graph - newConvInput = gs.Variable(name + '_padded_input', dtype = np.float32, shape = newShape) - #valConst = gs.Constant('value', np.array(0)) - conv.attrs['pads'] = [0 for pad in conv.attrs['pads']] - newPad = gs.Node(op = 'Pad', - name = name + '_pad', - attrs = { - 'pads': newPads, - 'mode': 'constant', - 'value': value - }, - inputs = [conv.inputs[0]], - outputs = [newConvInput]) + inTensor = conv.inputs[0] + assert isinstance(inTensor, gs.Variable) + convShape = inTensor.shape - conv.inputs[0] = newConvInput - graph.nodes.append(newPad) - graph.cleanup().toposort() + beginConvPads = convPads[0:len(convPads) // 2] + endConvPads = convPads[len(convPads) // 2:] + + nonSpatialDimCount = len(convShape) - (len(convPads) // 2) + pads = [0] * nonSpatialDimCount + beginConvPads + [0] * nonSpatialDimCount + endConvPads + shape = [] + for dim, begin, end in zip(convShape, pads[:len(pads) // 2], pads[len(pads) // 2:]): + shape.append(begin + dim + end) + + paddedInput = gs.Variable(f"{name}_{inTensor.name}", dtype = np.float32, shape = shape) + + newPad = gs.Node(op = 'Pad', + name = name + '_pad', + attrs = { + 'pads': pads, + 'mode': 'constant', + 'value': value + }, + inputs = [conv.inputs[0]], + outputs = [paddedInput]) + + graph.nodes.append(newPad) + + conv.attrs['pads'] = [0] * len(convPads) + conv.inputs[0] = paddedInput + + graph.cleanup().toposort() return graph From ab9fdfece7ccaa09085978de0d39c55d089f45bb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 21:52:36 +0200 Subject: [PATCH 05/54] Fix isoftmax parser --- Deeploy/Targets/Generic/Parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 7752834c50..8b1ddf8f73 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -669,11 +669,11 @@ def parseNode(self, node: gs.Node) -> bool: ]) if wellFormed: - self.operatorRepresentation['coeffA'] = int(node.attrs['coeffA'].values) - self.operatorRepresentation['coeffB'] = int(node.attrs['coeffB'].values) - self.operatorRepresentation['coeffC'] = int(node.attrs['coeffC'].values) - self.operatorRepresentation['log2'] = int(node.attrs['log2'].values) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['coeffA'] = node.attrs['coeffA'] + self.operatorRepresentation['coeffB'] = node.attrs['coeffB'] + self.operatorRepresentation['coeffC'] = node.attrs['coeffC'] + self.operatorRepresentation['log2'] = node.attrs['log2'] + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return wellFormed From a410763f4745af05bea9491f2293c53c64f1faaf Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:01:26 +0200 Subject: [PATCH 06/54] Fix iRMSNorm and iNoNorm parsers --- Deeploy/Targets/Generic/Parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 8b1ddf8f73..ab12a09d3c 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -52,7 +52,7 @@ def parseNode(self, node: gs.Node) -> (bool): if ret: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels']) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) return ret @@ -848,8 +848,8 @@ def parseNode(self, node: gs.Node) -> bool: if ret: self.operatorRepresentation['D'] = node.attrs['D'] - self.operatorRepresentation['log2D'] = int(np.log2(node.attrs['D'].values).tolist()[0]) - self.operatorRepresentation['mul'] = int(node.attrs['mul'].values.tolist()[0]) + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['D'])) + self.operatorRepresentation['mul'] = node.attrs['mul'] self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return ret From f6027fb72ca6a199732f577fa1ed6db003946f08 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:21:04 +0200 Subject: [PATCH 07/54] Fix ReduceMean type signature --- Deeploy/Targets/Generic/Bindings.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 6bfe805b39..b29e403d55 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -8,7 +8,7 @@ from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ - int8_t, int32_t, uint8_t + int8_t, int32_t, int64_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, BatchNormalizationTemplate, ConcatTemplate, ConvTemplate, \ @@ -195,13 +195,11 @@ ] BasicReduceMeanBindings = [ - NodeBinding(ReduceMeanChecker([PointerClass(type)], [PointerClass(type)]), ReduceMeanTemplate.referenceTemplate, - BasicTransformer) for type in SignedIntegerDataTypes + NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]), + ReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in SignedIntegerDataTypes ] + [ - NodeBinding(ReduceMeanChecker([PointerClass(float_type), PointerClass(integer_type)], [PointerClass(float_type)]), - FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) - for integer_type in SignedIntegerDataTypes - for float_type in FloatDataTypes + NodeBinding(ReduceMeanChecker([PointerClass(ty), PointerClass(int64_t)], [PointerClass(ty)]), + FloatReduceMeanTemplate.referenceTemplate, BasicTransformer) for ty in FloatDataTypes ] BasicReduceSumBindings = [ From 475b337cc99b4282529cce8a9d1e213858672687 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:23:49 +0200 Subject: [PATCH 08/54] Fix itamax and itapartialmax parsers --- Deeploy/Targets/Generic/Parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ab12a09d3c..c8ecf9e83e 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -698,7 +698,7 @@ def parseNode(self, node: gs.Node) -> bool: ret = all(['n_levels' in node.attrs]) if ret and wellFormed: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return True return False @@ -725,8 +725,8 @@ def parseNode(self, node: gs.Node) -> bool: ret = all(['group_width' in node.attrs, 'n_levels' in node.attrs]) if ret and wellFormed: - self.operatorRepresentation['group_width'] = int(node.attrs['group_width']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) + self.operatorRepresentation['group_width'] = node.attrs['group_width'] + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] return True return False From c6c310912315be33bb71805a07bcf4889c336a1f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 22:52:17 +0200 Subject: [PATCH 09/54] Fix attr comparison to compare with tuple in neureka --- Deeploy/Targets/Neureka/Parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Deeploy/Targets/Neureka/Parsers.py b/Deeploy/Targets/Neureka/Parsers.py index 3c564c10b2..1d3db0d882 100644 --- a/Deeploy/Targets/Neureka/Parsers.py +++ b/Deeploy/Targets/Neureka/Parsers.py @@ -18,7 +18,7 @@ def parseNode(self, node: gs.Node) -> bool: if not all([ # No dilation support - self.operatorRepresentation['dilations'] == [1, 1], + self.operatorRepresentation['dilations'] == (1, 1), # Channels have to be last 'channels_first' in self.operatorRepresentation and not self.operatorRepresentation['channels_first'], # Expect "weight_offset" attribute in the node @@ -129,7 +129,7 @@ def parseNode(self, node: gs.Node) -> bool: return False if not all([ - self.operatorRepresentation['kernel_shape'] == [1, 1], + self.operatorRepresentation['kernel_shape'] == (1, 1), self.operatorRepresentation['group'] == 1, ]): return False From cd2270c540f51d92090f438af58f4eae9077c217 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Wed, 24 Sep 2025 23:03:24 +0200 Subject: [PATCH 10/54] Fix keepdims type in fuse mhsa pass --- Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py index 49f317caa4..46bad04cea 100644 --- a/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py +++ b/Deeploy/Targets/MemPool/TopologyOptimizationPasses/Passes.py @@ -289,7 +289,7 @@ def get_constant_input_or_zeros(n: gs.Node, shape): name = name + "_sum", attrs = { 'axes': [1], - "keepdims": "0" + "keepdims": 0 }) mhsa_out[0].shape = [_output.shape[0]] + [int(H)] + _output.shape[1:] From 2e62e8451b4c05e2f580f999b3c2237e5922c9fc Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 25 Sep 2025 11:56:49 +0200 Subject: [PATCH 11/54] Fix old _unpack_const to pass Python literals --- Deeploy/DeeployTypes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 91d22d55ad..aecb112b57 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1292,7 +1292,9 @@ def _unpack_const(attr) -> Union[int, float]: The attributes can either be a numpy scalar value or a Constant tensor. This expects the numpy value to be of size 1. """ - if isinstance(attr, gs.Constant): + if isinstance(attr, (int, float, bool, str)): + return attr + elif isinstance(attr, gs.Constant): value = attr.values elif isinstance(attr, np.ndarray): value = attr From 587d6deea3140243d4910a540bfc17a1577a4a17 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Thu, 25 Sep 2025 11:57:42 +0200 Subject: [PATCH 12/54] Add RequantizedConv desc --- Deeploy/OperatorDescriptor.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index f6bd478184..a3fe275366 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -342,9 +342,44 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]: ], ) + +class RequantizedConvDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if "n_levels_out" in node.attrs and "n_levels" in node.attrs: + # TODO: Change to log + print("[WARNING] RequantizedConv cannot have n_levels_out and n_levels in it's attributes") + return False + + if "n_levels_out" in node.attrs: + node.attrs["n_levels"] = node.attrs["n_levels_out"] + node.attrs.pop("n_levels_out") + + return super().canonicalize(node, opset) + + +requantizedConvDesc = RequantizedConvDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + # Conv attrs + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Concat": concatDesc, "Conv": convDesc, + "RequantizedConv": requantizedConvDesc, "iRMSNorm": iRMSNormDesc, "Slice": sliceDesc, "Transpose": transposeDesc, From 0ccd3b8330a3e96cb5d069afc1465131e80c2c31 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 13:50:17 +0200 Subject: [PATCH 13/54] Fix DW parser --- Deeploy/Targets/PULPOpen/Parsers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index e94af6e420..eebe3ad406 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -206,12 +206,9 @@ def parseNode(self, node: gs.Node) -> (bool): self.operatorRepresentation['stride_x'] = int(self.operatorRepresentation['strides'][0]) self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][1]) - if 'n_levels' in node.attrs: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - else: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values) - self.operatorRepresentation['signed'] = int(node.attrs['signed'].values) - self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values)) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] + self.operatorRepresentation['signed'] = node.attrs['signed'] + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'])) return ret return False From c2f2bb2c0430f6e94cd416608e01e65dd6da8b3c Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 13:52:57 +0200 Subject: [PATCH 14/54] Fix pulp 1D conv --- Deeploy/Targets/PULPOpen/Parsers.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Parsers.py b/Deeploy/Targets/PULPOpen/Parsers.py index eebe3ad406..51b26ae546 100644 --- a/Deeploy/Targets/PULPOpen/Parsers.py +++ b/Deeploy/Targets/PULPOpen/Parsers.py @@ -133,13 +133,9 @@ def parseNode(self, node: gs.Node) -> (bool): self.operatorRepresentation['padding_y_bottom'] = int(self.operatorRepresentation['pads'][1]) self.operatorRepresentation['stride_y'] = int(self.operatorRepresentation['strides'][0]) - if 'n_levels' in node.attrs: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - else: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels_out'].values) - - self.operatorRepresentation['signed'] = int(node.attrs['signed'].values) - self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'].values)) + self.operatorRepresentation['n_levels'] = node.attrs['n_levels'] + self.operatorRepresentation['signed'] = node.attrs['signed'] + self.operatorRepresentation['log2D'] = int(math.log2(node.attrs['div'])) return ret def parseNodeCtxt(self, From 0b6032972a4a1e971550188bd6b08b4e32ad0651 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:08:36 +0200 Subject: [PATCH 15/54] Sort operator descriptors alphabetically --- Deeploy/OperatorDescriptor.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index a3fe275366..3dc0f5fd7f 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -377,25 +377,25 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ) defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { + "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, - "RequantizedConv": requantizedConvDesc, - "iRMSNorm": iRMSNormDesc, - "Slice": sliceDesc, - "Transpose": transposeDesc, + "Gelu": geluDesc, + "ITAMax": itaMaxDesc, + "ITAPartialMax": itaPartialMaxDesc, "MaxPool": maxPoolDesc, "Pad": padDescOld, - "Add": addDesc, + "Quant": quantDesc, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, + "RequantizedConv": requantizedConvDesc, + "RequantizediGELU": rqsIGeluDesc, + "Slice": sliceDesc, "Softmax": softmaxDesc, - "iSoftmax": iSoftmaxDesc, "SoftmaxGrad": softmaxGradDesc, - "Gelu": geluDesc, - "RequantizediGELU": rqsIGeluDesc, + "Transpose": transposeDesc, "iHardswish": iHardswishDesc, - "Quant": quantDesc, "iNoNorm": iNoNormDesc, - "ITAMax": itaMaxDesc, - "ITAPartialMax": itaPartialMaxDesc, + "iRMSNorm": iRMSNormDesc, + "iSoftmax": iSoftmaxDesc, } From a19f98a080dd8a3d0daf56fe5e32a0304c038630 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:16:45 +0200 Subject: [PATCH 16/54] Add DequantDescriptor --- Deeploy/OperatorDescriptor.py | 12 ++++++++++++ Deeploy/Targets/Generic/Parsers.py | 10 ++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 3dc0f5fd7f..cbb5d723ba 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -376,10 +376,22 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +dequantDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("scale", FloatUnpack), + AttrDesc("zero_point", FloatUnpack), + AttrDesc("bit_width", IntUnpack), + AttrDesc("signed", BoolUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, + "Dequant": dequantDesc, "Gelu": geluDesc, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index c8ecf9e83e..b43672d9c9 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2488,12 +2488,10 @@ def parseNode(self, node: gs.Node) -> bool: ]) if ret: - self.operatorRepresentation['scale'] = float(node.attrs['scale']) - self.operatorRepresentation['zero_point'] = float(node.attrs['zero_point']) - self.operatorRepresentation['bit_width'] = int(node.attrs['bit_width']) - - self.operatorRepresentation['signed'] = bool(node.attrs['signed']) - + self.operatorRepresentation['scale'] = node.attrs['scale'] + self.operatorRepresentation['zero_point'] = node.attrs['zero_point'] + self.operatorRepresentation['bit_width'] = node.attrs['bit_width'] + self.operatorRepresentation['signed'] = node.attrs['signed'] return ret def parseNodeCtxt(self, From 4af65525156222cef52333bdfabcabe87c4afe69 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:26:47 +0200 Subject: [PATCH 17/54] Add Div, IntegerDiv, RQIntegerDiv --- Deeploy/OperatorDescriptor.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index cbb5d723ba..ac486f4af9 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -387,12 +387,44 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +divDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["input1", "input2"]), + outputDescriptor = IoDesc("output"), + attrDescriptors = [], +) + +integerDivDescriptor = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [ + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + ], +) + +requantizedIntegerDivDescriptor = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [ + # IntegerDiv attrs + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, "Dequant": dequantDesc, + "Div": divDesc, "Gelu": geluDesc, + "IntegerDiv": integerDivDescriptor, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, "MaxPool": maxPoolDesc, @@ -402,6 +434,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, "RequantizediGELU": rqsIGeluDesc, + "RQIntegerDiv": requantizedIntegerDivDescriptor, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, From 2e2e3dfbb74c45d7e9cb63624e63ffc48d31ccbb Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:37:24 +0200 Subject: [PATCH 18/54] Add DebugPrint, LayerNormalization, iLayerNorm --- Deeploy/OperatorDescriptor.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index ac486f4af9..c6f9aa49cd 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -417,29 +417,50 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: AttrDesc("div", IntUnpack), ]) +debugPrintDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +layerNormalizationDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("epsilon", FloatUnpack)], +) + +iLayerNormDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight", "bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, "Conv": convDesc, + "DebugPrint": debugPrintDesc, "Dequant": dequantDesc, "Div": divDesc, "Gelu": geluDesc, - "IntegerDiv": integerDivDescriptor, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, + "IntegerDiv": integerDivDescriptor, + "LayerNormalization": layerNormalizationDesc, "MaxPool": maxPoolDesc, "Pad": padDescOld, "Quant": quantDesc, + "RQIntegerDiv": requantizedIntegerDivDescriptor, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, "RequantizediGELU": rqsIGeluDesc, - "RQIntegerDiv": requantizedIntegerDivDescriptor, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, "Transpose": transposeDesc, "iHardswish": iHardswishDesc, + "iLayerNorm": iLayerNormDesc, "iNoNorm": iNoNormDesc, "iRMSNorm": iRMSNormDesc, "iSoftmax": iSoftmaxDesc, From 9ac9a62ae0713220b4f4693f8565c78781ae1539 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:44:13 +0200 Subject: [PATCH 19/54] Add RequantizedOperatorDescriptor --- Deeploy/OperatorDescriptor.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index c6f9aa49cd..bd57ae822c 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -255,12 +255,12 @@ class GeluApprox(str, Enum): AttrDesc("approximate", GeluApprox, default = GeluApprox.none), ]) -rqsIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), - outputDescriptor = IoDesc("data_out"), - attrDescriptors = [ - AttrDesc("b", IntUnpack), - AttrDesc("one", IntUnpack), - ]) +requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ]) iHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), outputDescriptor = IoDesc("data_out"), @@ -343,7 +343,7 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]: ) -class RequantizedConvDescriptor(OperatorDescriptor): +class RequantizedOperatorDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if "n_levels_out" in node.attrs and "n_levels" in node.attrs: @@ -358,7 +358,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: return super().canonicalize(node, opset) -requantizedConvDesc = RequantizedConvDescriptor( +requantizedConvDesc = RequantizedOperatorDescriptor( inputDescriptor = IoDesc(["data_in", "weight", "mul", "add"], optional = ["shift"]), outputDescriptor = IoDesc("data_out"), attrDescriptors = [ @@ -403,7 +403,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) -requantizedIntegerDivDescriptor = OperatorDescriptor( +requantizedIntegerDivDescriptor = RequantizedOperatorDescriptor( inputDescriptor = IoDesc(["A", "B", "requant_mul", "requant_add", "requant_div"]), outputDescriptor = IoDesc("C"), attrDescriptors = [ @@ -454,7 +454,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, - "RequantizediGELU": rqsIGeluDesc, + "RequantizediGELU": requantizedIGeluDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, From e01fdb034426ad24ce969525e19b82f9ea9692ad Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:48:48 +0200 Subject: [PATCH 20/54] Add flatten and gather --- Deeploy/OperatorDescriptor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index bd57ae822c..462b4c4fc7 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -435,6 +435,18 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("D", IntUnpack), AttrDesc("n_levels", IntUnpack)], ) +flattenDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = 1)], +) + +gatherDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "indices"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -442,6 +454,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "DebugPrint": debugPrintDesc, "Dequant": dequantDesc, "Div": divDesc, + "Flatten": flattenDesc, + "Gather": gatherDesc, "Gelu": geluDesc, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, From 1db3ae7e951be7c41f50d22c0b2b0d933dc808bd Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 14:55:09 +0200 Subject: [PATCH 21/54] Add Squeeze and Unsqueeze --- Deeploy/OperatorDescriptor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 462b4c4fc7..0b5836fa2d 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -447,6 +447,20 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], ) +# Opset <= 11 +unsqueezeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], +) + +# Opset <= 11 +squeezeDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -472,7 +486,9 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, + "Squeeze": squeezeDesc, "Transpose": transposeDesc, + "Unsqueeze": unsqueezeDesc, "iHardswish": iHardswishDesc, "iLayerNorm": iLayerNormDesc, "iNoNorm": iNoNormDesc, From fd30dc727c161c2e4d4e63e5083b5ea518d45062 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 16:46:06 +0200 Subject: [PATCH 22/54] Add Mul --- Deeploy/OperatorDescriptor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 0b5836fa2d..7f36e9a4bf 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -461,6 +461,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], ) +mulDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("C"), + attrDescriptors = [], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -476,6 +482,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "IntegerDiv": integerDivDescriptor, "LayerNormalization": layerNormalizationDesc, "MaxPool": maxPoolDesc, + "Mul": mulDesc, "Pad": padDescOld, "Quant": quantDesc, "RQIntegerDiv": requantizedIntegerDivDescriptor, From a3309edf2fd30eed2bf9a65fc89494fc00ff76a4 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 16:53:01 +0200 Subject: [PATCH 23/54] Add MatMul, RQMatMul, MatMulInteger --- Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 7f36e9a4bf..edbaf6a530 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -467,6 +467,23 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [], ) +matMulDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +rqMatMulDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "add", "mul"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -481,11 +498,14 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, "LayerNormalization": layerNormalizationDesc, + "MatMul": matMulDesc, + "MatMulInteger": matMulDesc, "MaxPool": maxPoolDesc, "Mul": mulDesc, "Pad": padDescOld, "Quant": quantDesc, "RQIntegerDiv": requantizedIntegerDivDescriptor, + "RQMatMul": rqMatMulDesc, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, From c758fccb89e67ceb676e89bcb89320723b24422f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 17:15:12 +0200 Subject: [PATCH 24/54] Add Gemm and RQGemm --- Deeploy/OperatorDescriptor.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index edbaf6a530..327c7b442d 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -484,6 +484,31 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +gemmDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["A", "B"], optional = ["C"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + ], +) + +rqGemmDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "C", "add", "mul"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -494,6 +519,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "Flatten": flattenDesc, "Gather": gatherDesc, "Gelu": geluDesc, + "Gemm": gemmDesc, "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, @@ -504,6 +530,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "Mul": mulDesc, "Pad": padDescOld, "Quant": quantDesc, + "RQGemm": rqGemmDesc, "RQIntegerDiv": requantizedIntegerDivDescriptor, "RQMatMul": rqMatMulDesc, "ReduceMean": reduceMeanDesc, From 7e951d83f68eafd8f0241a6a17fbee6cdfa49516 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:06:59 +0200 Subject: [PATCH 25/54] Add RequantizedGemm --- Deeploy/OperatorDescriptor.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 327c7b442d..e347dad8a6 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -509,6 +509,20 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: AttrDesc("div", IntUnpack), ]) +requantizedGemmDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["A", "B", "add", "mul"]), # Important diff to RQGemm + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("alpha", FloatUnpack, default = 1.0), + AttrDesc("beta", FloatUnpack, default = 1.0), + AttrDesc("transA", BoolUnpack, default = False), + AttrDesc("transB", BoolUnpack, default = False), + # RequantizedShift attrs + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ]) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -536,6 +550,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "RequantizedConv": requantizedConvDesc, + "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, From 1ab763e2d2f7abb81b8939ecb8741cf65dab402f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 29 Sep 2025 08:24:27 +0200 Subject: [PATCH 26/54] Fix transA and transB being treated like ints --- Deeploy/Targets/Generic/Parsers.py | 47 ++++++++++++------- .../Generic/Templates/FloatGemmTemplate.py | 4 +- .../Targets/Generic/Templates/GemmTemplate.py | 4 +- Deeploy/Targets/Generic/TypeCheckers.py | 6 +-- .../Targets/MemPool/Templates/GemmTemplate.py | 4 +- .../MemPool/Templates/RQGemmTemplate.py | 8 ++-- .../PULPOpen/Templates/FloatGemmTemplate.py | 4 +- .../TileConstraints/MatMulTileConstraint.py | 8 ++-- Deeploy/Targets/Snitch/Parsers.py | 8 +--- 9 files changed, 50 insertions(+), 43 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index b43672d9c9..f6c2ee9784 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1690,27 +1690,40 @@ def parseNodeCtxt(self, node.inputs.append(zeroTensor) self.operatorRepresentation['C'] = f'{node.name}_C_Tensor' + buffA = ctxt.lookup(node.inputs[0].name) + assert isinstance(buffA, VariableBuffer) + buffB = ctxt.lookup(node.inputs[1].name) + assert isinstance(buffB, VariableBuffer) + buffOut = ctxt.lookup(node.outputs[0].name) + assert isinstance(buffOut, VariableBuffer) + # Store the input and output shapes in the operator representation - self.operatorRepresentation['size'] = np.prod(ctxt.lookup(node.inputs[0].name).shape) - self.operatorRepresentation['A_shape'] = ctxt.lookup(node.inputs[0].name).shape - self.operatorRepresentation['B_shape'] = ctxt.lookup(node.inputs[1].name).shape - self.operatorRepresentation['data_out_shape'] = ctxt.lookup(node.outputs[0].name).shape + self.operatorRepresentation['size'] = np.prod(buffA.shape) + self.operatorRepresentation['A_shape'] = buffA.shape + self.operatorRepresentation['B_shape'] = buffB.shape + self.operatorRepresentation['data_out_shape'] = buffOut.shape + + if self.operatorRepresentation['transA']: + N_A, M = buffA.shape[-2:] + else: + M, N_A = buffA.shape[-2:] + + if self.operatorRepresentation['transB']: + O, N_B = buffB.shape[-2:] + else: + N_B, O = buffB.shape[-2:] # Store the matrix dimensions in the operator representation - self.operatorRepresentation['M'] = ctxt.lookup( - node.inputs[0].name).shape[(-2 + self.operatorRepresentation['transA'])] - self.operatorRepresentation['N'] = ctxt.lookup( - node.inputs[0].name).shape[(-1 - self.operatorRepresentation['transA'])] - self.operatorRepresentation['O'] = ctxt.lookup( - node.inputs[1].name).shape[(-1 - self.operatorRepresentation['transB'])] + self.operatorRepresentation['M'] = M + self.operatorRepresentation['N'] = N_A + self.operatorRepresentation['O'] = O # SCHEREMO: Assert that reduction dimension is the same on both matrices - ret = ret and (self.operatorRepresentation['N'] == ctxt.lookup( - node.inputs[1].name).shape[-2 + self.operatorRepresentation['transB']]) + ret = ret and N_A == N_B # Check if the batch dimensions are compatible - self.operatorRepresentation['batch_A'] = np.prod(ctxt.lookup(node.inputs[0].name).shape[:-2]) - self.operatorRepresentation['batch_B'] = np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2]) + self.operatorRepresentation['batch_A'] = np.prod(buffA.shape[:-2]) + self.operatorRepresentation['batch_B'] = np.prod(buffB.shape[:-2]) self.operatorRepresentation['batch'] = max(self.operatorRepresentation['batch_A'], self.operatorRepresentation['batch_B']) @@ -1722,10 +1735,10 @@ def parseNodeCtxt(self, ), "Incompatible dimensions for input matrices. Broadcasting not yet supported for dimensions larger than 1 on one of the inputs, or equal dimensions between the 2." # Create flags for same dimension between each input matrix and the final batch dimension - self.operatorRepresentation['A_batched'] = (self.operatorRepresentation['batch'] == np.prod( - ctxt.lookup(node.inputs[0].name).shape[:-2])) + self.operatorRepresentation['A_batched'] = ( + self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_A']) self.operatorRepresentation['W_batched'] = self.operatorRepresentation['B_batched'] = ( - self.operatorRepresentation['batch'] == np.prod(ctxt.lookup(node.inputs[1].name).shape[:-2])) + self.operatorRepresentation['batch'] == self.operatorRepresentation['batch_B']) return ctxt, ret diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py index 69bea8484e..ab78e742d0 100644 --- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -21,8 +21,8 @@ ${M}, ${N}, ${O}, - ${transA}, - ${transB} + ${int(transA)}, + ${int(transB)} ); % if A_batched: diff --git a/Deeploy/Targets/Generic/Templates/GemmTemplate.py b/Deeploy/Targets/Generic/Templates/GemmTemplate.py index 62d760d15c..371004a8e7 100644 --- a/Deeploy/Targets/Generic/Templates/GemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/GemmTemplate.py @@ -56,8 +56,8 @@ def alignToContext(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${A_offset}, ${B_offset}, ${C_offset}, diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index c2c8d436f8..1907a0aea0 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -185,10 +185,8 @@ def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[ def _inferNumLevels(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[int]: - return [ - 2**((self.input_types[0].referencedType.typeWidth) * 2) * - inputs[0].shape[-1 - operatorRepresentation['transA']] - ] + O = inputs[0].shape[-1] if not operatorRepresentation['transA'] else inputs[0].shape[-2] + return [2**((self.input_types[0].referencedType.typeWidth) * 2) * O] def _inferSignedness(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[bool]: diff --git a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py index e5d53bd255..54cc86f6af 100644 --- a/Deeploy/Targets/MemPool/Templates/GemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/GemmTemplate.py @@ -127,8 +127,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${A_offset}, ${B_offset}, ${C_offset}, diff --git a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py index e6a42768e8..f544841acf 100644 --- a/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py +++ b/Deeploy/Targets/MemPool/Templates/RQGemmTemplate.py @@ -145,8 +145,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${mul}, ${add}, ${log2Dstring}, @@ -170,8 +170,8 @@ def hoistTransientBuffers(self, ctxt: NetworkContext, ${O}, ${alpha}, ${beta}, - ${transA}, - ${transB}, + ${int(transA)}, + ${int(transB)}, ${mul}, ${add}, ${log2Dstring}, diff --git a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py index f4c22b2c22..21044a5eca 100644 --- a/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/FloatGemmTemplate.py @@ -20,8 +20,8 @@ ${M}, ${N}, ${O}, - ${transA}, - ${transB} + ${int(transA)}, + ${int(transB)} ); ref_${data_out}_${A} += ${M} * ${N}; diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py index 8b795be88e..a9259a15cf 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py @@ -32,13 +32,13 @@ def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: Netw tensorsShapeLen = len(bufferA.shape) AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transA']) + dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA'])) ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transA']) + dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA'])) BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transB']) + dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB'])) BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transB']) + dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB'])) outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2)) outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1)) diff --git a/Deeploy/Targets/Snitch/Parsers.py b/Deeploy/Targets/Snitch/Parsers.py index 0051994686..51b32db210 100644 --- a/Deeploy/Targets/Snitch/Parsers.py +++ b/Deeploy/Targets/Snitch/Parsers.py @@ -18,9 +18,7 @@ def parseNode(self, node: gs.Node) -> bool: if not ret: return False - if not all([ - self.operatorRepresentation['transA'] == 0, - ]): + if self.operatorRepresentation['transA']: return False return True @@ -50,9 +48,7 @@ def parseNode(self, node: gs.Node) -> bool: if not ret: return False - if not all([ - self.operatorRepresentation['transA'] == 0, - ]): + if self.operatorRepresentation['transA']: return False return True From 1ec6cde925b9da692b80e306d12af590f8fa9446 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:02:10 +0200 Subject: [PATCH 27/54] Add LinearAttention --- Deeploy/OperatorDescriptor.py | 37 ++++++++++++++++++++++ Deeploy/Targets/Generic/Parsers.py | 51 ++++++++++++------------------ 2 files changed, 57 insertions(+), 31 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index e347dad8a6..93ad2643d7 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -523,6 +523,42 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: AttrDesc("div", IntUnpack), ]) +linearAttentionDesc = OperatorDescriptor( + inputDescriptor = IoDesc( + ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight", + "wo_bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("preattn_requant_mul", IntTupleUnpack), + AttrDesc("preattn_requant_div", IntTupleUnpack), + AttrDesc("normalizer_requant_mul", IntTupleUnpack), + AttrDesc("normalizer_requant_shift", IntTupleUnpack), + AttrDesc("normalizer_requant_div", IntTupleUnpack), + AttrDesc("postattn_requant_mul", IntTupleUnpack), + AttrDesc("postattn_requant_shift", IntTupleUnpack), + AttrDesc("postattn_requant_div", IntTupleUnpack), + AttrDesc("wo_requant_mul", IntTupleUnpack), + AttrDesc("wo_requant_shift", IntTupleUnpack), + AttrDesc("wo_requant_div", IntTupleUnpack), + AttrDesc("wq_requant_mul", IntTupleUnpack), + AttrDesc("wq_requant_shift", IntTupleUnpack), + AttrDesc("wq_requant_div", IntTupleUnpack), + AttrDesc("wk_requant_mul", IntTupleUnpack), + AttrDesc("wk_requant_shift", IntTupleUnpack), + AttrDesc("wk_requant_div", IntTupleUnpack), + AttrDesc("wv_requant_mul", IntTupleUnpack), + AttrDesc("wv_requant_shift", IntTupleUnpack), + AttrDesc("wv_requant_div", IntTupleUnpack), + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("act_type", IntUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("heads", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "Concat": concatDesc, @@ -538,6 +574,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, "LayerNormalization": layerNormalizationDesc, + "LinearAttention": linearAttentionDesc, "MatMul": matMulDesc, "MatMulInteger": matMulDesc, "MaxPool": maxPoolDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index f6c2ee9784..9389034969 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1472,37 +1472,26 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['preattn_requant_mul'] = int(node.attrs['preattn_requant_mul'].values) - self.operatorRepresentation['preattn_requant_shift'] = int(node.attrs['preattn_requant_shift'].values) - self.operatorRepresentation['preattn_requant_div'] = int( - math.log2(int(node.attrs['preattn_requant_div'].values))) - self.operatorRepresentation['normalizer_requant_mul'] = int(node.attrs['normalizer_requant_mul'].values) - self.operatorRepresentation['normalizer_requant_shift'] = int(node.attrs['normalizer_requant_shift'].values) - self.operatorRepresentation['normalizer_requant_div'] = int( - math.log2(int(node.attrs['normalizer_requant_div'].values))) - self.operatorRepresentation['postattn_requant_mul'] = int(node.attrs['postattn_requant_mul'].values) - self.operatorRepresentation['postattn_requant_shift'] = int(node.attrs['postattn_requant_shift'].values) - self.operatorRepresentation['postattn_requant_div'] = int( - math.log2(int(node.attrs['postattn_requant_div'].values))) - self.operatorRepresentation['wo_requant_mul'] = int(node.attrs['wo_requant_mul'].values) - self.operatorRepresentation['wo_requant_shift'] = int(node.attrs['wo_requant_shift'].values) - self.operatorRepresentation['wo_requant_div'] = int(math.log2(int(node.attrs['wo_requant_div'].values))) - self.operatorRepresentation['wq_requant_mul'] = int(node.attrs['wq_requant_mul'].values) - self.operatorRepresentation['wq_requant_shift'] = int(node.attrs['wq_requant_shift'].values) - self.operatorRepresentation['wq_requant_div'] = int(math.log2(int(node.attrs['wq_requant_div'].values))) - self.operatorRepresentation['wk_requant_mul'] = int(node.attrs['wk_requant_mul'].values) - self.operatorRepresentation['wk_requant_shift'] = int(node.attrs['wk_requant_shift'].values) - self.operatorRepresentation['wk_requant_div'] = int(math.log2(int(node.attrs['wk_requant_div'].values))) - self.operatorRepresentation['wv_requant_mul'] = int(node.attrs['wv_requant_mul'].values) - self.operatorRepresentation['wv_requant_shift'] = int(node.attrs['wv_requant_shift'].values) - self.operatorRepresentation['wv_requant_div'] = int(math.log2(int(node.attrs['wv_requant_div'].values))) - self.operatorRepresentation['Delta'] = int(node.attrs['Delta']) - self.operatorRepresentation['eps'] = int(node.attrs['eps']) - self.operatorRepresentation['act_type'] = int(node.attrs['act_type']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - self.operatorRepresentation['dim'] = int(node.attrs['dim'].values) - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values) - self.operatorRepresentation['heads'] = int(node.attrs['heads'].values) + self.operatorRepresentation.update(node.attrs) + + # All *_div attrs are log2d-ified + log2Attrs = [ + "preattn_requant_div", + "preattn_requant_div", + "normalizer_requant_div", + "normalizer_requant_div", + "postattn_requant_div", + "postattn_requant_div", + "wo_requant_div", + "wq_requant_div", + "wk_requant_div", + "wv_requant_div", + ] + + for attr in log2Attrs: + value = self.operatorRepresentation[attr] + assert isinstance(value, int) + self.operatorRepresentation[attr] = int(math.log2(value)) return ret From 565cd95b167a41554b66a75a79f558b38f80c1b2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:11:23 +0200 Subject: [PATCH 28/54] Add CLCA --- Deeploy/OperatorDescriptor.py | 23 +++++++++++++++++++++++ Deeploy/Targets/Generic/Parsers.py | 10 +--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 93ad2643d7..f25926c1cf 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -559,8 +559,31 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +clcaDesc = OperatorDescriptor( + inputDescriptor = IoDesc([ + "q", "k", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wo_weight", "wo_bias", "wq_requant_mul", + "wq_requant_add", "wq_requant_div", "wk_requant_mul", "wk_requant_add", "wk_requant_div", "wv_requant_mul", + "wv_requant_add", "wv_requant_div", "kdiv_requant_mul", "kdiv_requant_add", "kdiv_requant_div", + "preattn_requant_mul", "preattn_requant_add", "preattn_requant_div", "postattn_requant_mul", + "postattn_requant_add", "postattn_requant_div", "wo_requant_mul", "wo_requant_add", "wo_requant_div" + ]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("Delta", IntUnpack), + AttrDesc("eps", IntUnpack), + AttrDesc("eta", IntUnpack), + AttrDesc("act_type", IntUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("out_dim", IntUnpack), + AttrDesc("heads", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, + "CLCA": clcaDesc, "Concat": concatDesc, "Conv": convDesc, "DebugPrint": debugPrintDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 9389034969..d0f499b93e 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1533,15 +1533,7 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['Delta'] = int(node.attrs['Delta']) - self.operatorRepresentation['eps'] = int(node.attrs['eps']) - self.operatorRepresentation['eta'] = int(node.attrs['eta']) - self.operatorRepresentation['act_type'] = int(node.attrs['act_type']) - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - self.operatorRepresentation['dim'] = int(node.attrs['dim'].values) - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head'].values) - self.operatorRepresentation['out_dim'] = int(node.attrs['out_dim'].values) - self.operatorRepresentation['heads'] = int(node.attrs['heads'].values) + self.operatorRepresentation.update(node.attrs) return ret From 26cf6486a54089b5177522a5e1cfb76920cd1da6 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:13:11 +0200 Subject: [PATCH 29/54] Add IntegerMean --- Deeploy/OperatorDescriptor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index f25926c1cf..9818601193 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -596,6 +596,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ITAMax": itaMaxDesc, "ITAPartialMax": itaPartialMaxDesc, "IntegerDiv": integerDivDescriptor, + "IntegerMean": reduceMeanDesc, "LayerNormalization": layerNormalizationDesc, "LinearAttention": linearAttentionDesc, "MatMul": matMulDesc, From 8b00f48d013c141a6c8a9373dff0573c186534f2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:18:31 +0200 Subject: [PATCH 30/54] Add MHSA --- Deeploy/OperatorDescriptor.py | 36 +++++++++++++++++++++++++++++- Deeploy/Targets/Generic/Parsers.py | 18 +-------------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 9818601193..925a1ac0e3 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 from enum import Enum, IntEnum -from typing import Any, Dict, Tuple +from typing import Any, Dict, Tuple, Union import numpy as np import onnx_graphsurgeon as gs @@ -51,6 +51,13 @@ def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: return (FloatUnpack(value),) +def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: + try: + return IntUnpack(value) + except: + return IntTupleUnpack(value) + + def attrToTensor(node: gs.Node, attr: str) -> None: values = node.attrs[attr] if isinstance(values, (int, float)): @@ -581,6 +588,32 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +mhsaDesc = OperatorDescriptor( + inputDescriptor = IoDesc( + ["q", "k", "v", "wq_weight", "wq_bias", "wk_weight", "wk_bias", "wv_weight", "wv_bias", "wo_weight", + "wo_bias"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("preattn_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("preattn_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("postattn_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("postattn_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wo_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wo_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wq_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wq_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wk_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wk_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("wv_requant_mul", IntTupleIfNotSingleItemUnpack), + AttrDesc("wv_requant_div", IntTupleIfNotSingleItemUnpack), + AttrDesc("n_levels", IntUnpack), + AttrDesc("dim", IntUnpack), + AttrDesc("dim_head", IntUnpack), + AttrDesc("heads", IntUnpack), + AttrDesc("signed", BoolUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -599,6 +632,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "IntegerMean": reduceMeanDesc, "LayerNormalization": layerNormalizationDesc, "LinearAttention": linearAttentionDesc, + "MHSA": mhsaDesc, "MatMul": matMulDesc, "MatMulInteger": matMulDesc, "MaxPool": maxPoolDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index d0f499b93e..8a3e0662d6 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1408,23 +1408,7 @@ def parseNode(self, node: gs.Node) -> (bool): ]) if ret: - self.operatorRepresentation['preattn_requant_mul'] = node.attrs['preattn_requant_mul'] - self.operatorRepresentation['preattn_requant_div'] = node.attrs['preattn_requant_div'] - self.operatorRepresentation['postattn_requant_mul'] = node.attrs['postattn_requant_mul'] - self.operatorRepresentation['postattn_requant_div'] = node.attrs['postattn_requant_div'] - self.operatorRepresentation['wo_requant_mul'] = node.attrs['wo_requant_mul'] - self.operatorRepresentation['wo_requant_div'] = node.attrs['wo_requant_div'] - self.operatorRepresentation['wq_requant_mul'] = node.attrs['wq_requant_mul'] - self.operatorRepresentation['wq_requant_div'] = node.attrs['wq_requant_div'] - self.operatorRepresentation['wk_requant_mul'] = node.attrs['wk_requant_mul'] - self.operatorRepresentation['wk_requant_div'] = node.attrs['wk_requant_div'] - self.operatorRepresentation['wv_requant_mul'] = node.attrs['wv_requant_mul'] - self.operatorRepresentation['wv_requant_div'] = node.attrs['wv_requant_div'] - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels']) - self.operatorRepresentation['dim'] = int(node.attrs['dim']) # Sequence Length - self.operatorRepresentation['dim_head'] = int(node.attrs['dim_head']) # Projection Size - self.operatorRepresentation['heads'] = int(node.attrs['heads']) - self.operatorRepresentation['signed'] = int(node.attrs['signed']) + self.operatorRepresentation.update(node.attrs) return ret From 6ecf95db33066d0fec6c32551473c45b27ccebf2 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:28:14 +0200 Subject: [PATCH 31/54] Add Relu, Reshape, RequantShift --- Deeploy/OperatorDescriptor.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 925a1ac0e3..daa6c41f9b 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -614,6 +614,28 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +reluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +reshapeDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "shape"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [], +) + +requantShiftDesc = RequantizedOperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "mul", "add"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("n_levels", IntUnpack), + AttrDesc("signed", BoolUnpack), + AttrDesc("div", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -644,9 +666,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "RQMatMul": rqMatMulDesc, "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, + "Relu": reluDesc, "RequantizedConv": requantizedConvDesc, "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, + "RequantShift": requantShiftDesc, + "Reshape": reshapeDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, From 9a577a39e8c198a38ef8dc1e717a6be216c8df91 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 22:56:46 +0200 Subject: [PATCH 32/54] Add RequantizedAdd --- Deeploy/OperatorDescriptor.py | 47 +++++++++++++++++++++++++++++- Deeploy/Targets/Generic/Parsers.py | 32 ++++---------------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index daa6c41f9b..f9f5532bfa 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -355,7 +355,7 @@ class RequantizedOperatorDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if "n_levels_out" in node.attrs and "n_levels" in node.attrs: # TODO: Change to log - print("[WARNING] RequantizedConv cannot have n_levels_out and n_levels in it's attributes") + print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes") return False if "n_levels_out" in node.attrs: @@ -636,6 +636,50 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) + +class RequantizedAddDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + for tensor in ["rqs1", "rqs2", "rqsOut"]: + n_levels = f"{tensor}_n_levels" + n_levels_out = f"{tensor}_n_levels_out" + if n_levels_out in node.attrs and n_levels in node.attrs: + # TODO: Change to log + print( + f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes" + ) + return False + + if n_levels_out in node.attrs: + node.attrs[n_levels] = node.attrs[n_levels_out] + node.attrs.pop(n_levels_out) + + return super().canonicalize(node, opset) + + +requantizedAddDesc = RequantizedAddDescriptor( + inputDescriptor = IoDesc(["data_in_0", "data_in_1"]), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("rqs1_n_levels", IntUnpack), + AttrDesc("rqs1_mul", IntUnpack), + AttrDesc("rqs1_add", IntUnpack), + AttrDesc("rqs1_div", IntUnpack), + AttrDesc("rqs1_signed", BoolUnpack), + AttrDesc("rqs1_n_levels", IntUnpack), + AttrDesc("rqs2_mul", IntUnpack), + AttrDesc("rqs2_add", IntUnpack), + AttrDesc("rqs2_div", IntUnpack), + AttrDesc("rqs2_signed", BoolUnpack), + AttrDesc("rqs2_n_levels", IntUnpack), + AttrDesc("rqsOut_mul", IntUnpack), + AttrDesc("rqsOut_add", IntUnpack), + AttrDesc("rqsOut_div", IntUnpack), + AttrDesc("rqsOut_signed", BoolUnpack), + AttrDesc("rqsOut_n_levels", IntUnpack), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -667,6 +711,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "ReduceMean": reduceMeanDesc, "ReduceSum": reduceSumDesc, "Relu": reluDesc, + "RequantizedAdd": requantizedAddDesc, "RequantizedConv": requantizedConvDesc, "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 8a3e0662d6..ad3bad549d 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2373,32 +2373,12 @@ def parseNode(self, node: gs.Node) -> bool: ]) if ret: - if 'rqs1_n_levels' in node.attrs: - self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels'].values) - else: - self.operatorRepresentation['rqs1_n_levels'] = int(node.attrs['rqs1_n_levels_out'].values) - self.operatorRepresentation['rqs1_mul'] = int(node.attrs['rqs1_mul']) - self.operatorRepresentation['rqs1_add'] = int(node.attrs['rqs1_add']) - self.operatorRepresentation['rqs1_signed'] = int(node.attrs['rqs1_signed'].values) - self.operatorRepresentation['rqs1_log2D'] = int(math.log2(node.attrs['rqs1_div'].values)) - - if 'rqs2_n_levels' in node.attrs: - self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels'].values) - else: - self.operatorRepresentation['rqs2_n_levels'] = int(node.attrs['rqs2_n_levels_out'].values) - self.operatorRepresentation['rqs2_mul'] = int(node.attrs['rqs2_mul']) - self.operatorRepresentation['rqs2_add'] = int(node.attrs['rqs2_add']) - self.operatorRepresentation['rqs2_signed'] = int(node.attrs['rqs2_signed'].values) - self.operatorRepresentation['rqs2_log2D'] = int(math.log2(node.attrs['rqs2_div'].values)) - - if 'rqsOut_n_levels' in node.attrs: - self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels'].values) - else: - self.operatorRepresentation['rqsOut_n_levels'] = int(node.attrs['rqsOut_n_levels_out'].values) - self.operatorRepresentation['rqsOut_mul'] = int(node.attrs['rqsOut_mul']) - self.operatorRepresentation['rqsOut_add'] = int(node.attrs['rqsOut_add']) - self.operatorRepresentation['rqsOut_signed'] = int(node.attrs['rqsOut_signed'].values) - self.operatorRepresentation['rqsOut_log2D'] = int(math.log2(node.attrs['rqsOut_div'].values)) + self.operatorRepresentation.update(node.attrs) + + for tensor in ["rqs1", "rqs2", "rqsOut"]: + value = self.operatorRepresentation[f"{tensor}_div"] + assert isinstance(value, int) + self.operatorRepresentation[f"{tensor}_log2D"] = int(math.log2(value)) return ret From 8ae808a26466acd85fc5ed00caf27682719f1c81 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:07:48 +0200 Subject: [PATCH 33/54] Add RequantizediHardswish --- Deeploy/OperatorDescriptor.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index f9f5532bfa..ecf077a480 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -277,6 +277,17 @@ class GeluApprox(str, Enum): AttrDesc("three", IntUnpack), ]) +requantizedIHardswishDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("one_over_six", IntUnpack), + AttrDesc("six", IntUnpack), + AttrDesc("three", IntUnpack), + AttrDesc("mul", IntUnpack), + AttrDesc("add", IntUnpack), + AttrDesc("shift", IntUnpack), + ]) + iNoNormDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "weights", "bias"]), outputDescriptor = IoDesc("data_out"), attrDescriptors = [ @@ -715,6 +726,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "RequantizedConv": requantizedConvDesc, "RequantizedGemm": requantizedGemmDesc, "RequantizediGELU": requantizedIGeluDesc, + "RequantizediHardswish": requantizedIHardswishDesc, "RequantShift": requantShiftDesc, "Reshape": reshapeDesc, "Slice": sliceDesc, From 5eece92c257bc1ab50143c446a09361ea61b64a4 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:19:30 +0200 Subject: [PATCH 34/54] Add iGELU --- Deeploy/OperatorDescriptor.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index ecf077a480..01c5fbeb08 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -256,11 +256,22 @@ class GeluApprox(str, Enum): none = "none" -geluDesc = OperatorDescriptor(inputDescriptor = IoDesc("data_in"), - outputDescriptor = IoDesc("data_out"), - attrDescriptors = [ - AttrDesc("approximate", GeluApprox, default = GeluApprox.none), - ]) +geluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("approximate", GeluApprox, default = GeluApprox.none), + ], +) + +iGeluDesc = OperatorDescriptor( + inputDescriptor = IoDesc("data_in"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("b", IntUnpack), + AttrDesc("one", IntUnpack), + ], +) requantizedIGeluDesc = OperatorDescriptor(inputDescriptor = IoDesc(["data_in", "mul", "add", "shift"]), outputDescriptor = IoDesc("data_out"), @@ -691,6 +702,12 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: ], ) +sgdDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["weight", "grad"]), + outputDescriptor = IoDesc("weight_updated"), + attrDescriptors = [AttrDesc("lr", FloatUnpack)], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -729,12 +746,14 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "RequantizediHardswish": requantizedIHardswishDesc, "RequantShift": requantShiftDesc, "Reshape": reshapeDesc, + "SGD": sgdDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, "SoftmaxGrad": softmaxGradDesc, "Squeeze": squeezeDesc, "Transpose": transposeDesc, "Unsqueeze": unsqueezeDesc, + "iGELU": iGeluDesc, "iHardswish": iHardswishDesc, "iLayerNorm": iLayerNormDesc, "iNoNorm": iNoNormDesc, From 75983038183da7eabe9ba9f76619f77c4f2aa46b Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:23:24 +0200 Subject: [PATCH 35/54] Add SoftmaxCrossEntropyLoss(Grad) --- Deeploy/OperatorDescriptor.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 01c5fbeb08..ff1fbcf3fd 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -708,6 +708,18 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("lr", FloatUnpack)], ) +softmaxCrossEntropyLossDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["logits", "labels"]), + outputDescriptor = IoDesc("log_prob"), + attrDescriptors = [], +) + +softmaxCrossEntropyLossGradDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["log_prob", "labels"]), + outputDescriptor = IoDesc("grad"), + attrDescriptors = [], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, "CLCA": clcaDesc, @@ -749,6 +761,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "SGD": sgdDesc, "Slice": sliceDesc, "Softmax": softmaxDesc, + "SoftmaxCrossEntropyLoss": softmaxCrossEntropyLossDesc, + "SoftmaxCrossEntropyLossGrad": softmaxCrossEntropyLossGradDesc, "SoftmaxGrad": softmaxGradDesc, "Squeeze": squeezeDesc, "Transpose": transposeDesc, From 72c8d21ca66bbfdf61f2c1b734acc98a34915361 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sun, 28 Sep 2025 23:50:41 +0200 Subject: [PATCH 36/54] Add Memcopy for dma tests --- DeeployTest/testUtils/dmaUtils.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/DeeployTest/testUtils/dmaUtils.py b/DeeployTest/testUtils/dmaUtils.py index 3205275fda..ba2f6e176f 100644 --- a/DeeployTest/testUtils/dmaUtils.py +++ b/DeeployTest/testUtils/dmaUtils.py @@ -10,14 +10,13 @@ from Deeploy.AbstractDataTypes import BaseType, Pointer, PointerClass from Deeploy.CommonExtensions.DataTypes import minimalIntegerType -from Deeploy.DeeployTypes import NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ - ONNXLayer, OperatorRepresentation, VariableBuffer +from Deeploy.DeeployTypes import IoDesc, NetworkContext, NetworkDeployer, NodeParser, NodeTemplate, NodeTypeChecker, \ + ONNXLayer, OperatorDescriptor, OperatorRepresentation, VariableBuffer from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryDeployerWrapper, \ MemoryPlatformWrapper from Deeploy.MemoryLevelExtension.OptimizationPasses.MemoryLevelAnnotationPasses import AnnotateDefaultMemoryLevel, \ AnnotateIOMemoryLevel -from Deeploy.OperatorDescriptor import defaultOperatorDescriptors from Deeploy.Targets.PULPOpen.Deployer import PULPDeployer from Deeploy.Targets.PULPOpen.Platform import MemoryPULPPlatform, PULPOptimizer from Deeploy.Targets.Snitch.Deployer import SnitchDeployer @@ -280,6 +279,17 @@ def defaultScheduler(graph: gs.Graph) -> List[List[gs.Node]]: return [[node] for node in graph.nodes] +memcpyDesc = OperatorDescriptor( + inputDescriptor = IoDesc("src"), + outputDescriptor = IoDesc("dest"), + attrDescriptors = [], +) + +dmaTestOperatorDescriptors = { + "Memcpy": memcpyDesc, +} + + def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, inputTypes: Dict[str, Type[Pointer]], doublebuffer: bool, deeployStateDir: str) -> NetworkDeployer: L3 = MemoryLevel(name = "L3", neighbourNames = ["L2"], size = 64000000) @@ -300,7 +310,7 @@ def setup_pulp_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph, platform, inputTypes, PULPOptimizer, - defaultOperatorDescriptors, + dmaTestOperatorDescriptors, defaultScheduler, default_channels_first = True, deeployStateDir = deeployStateDir) @@ -342,7 +352,7 @@ def setup_snitch_deployer(defaultMemory: str, targetMemory: str, graph: gs.Graph platform, inputTypes, SnitchOptimizer, - defaultOperatorDescriptors, + dmaTestOperatorDescriptors, defaultScheduler, deeployStateDir = deeployStateDir) memoryLevelAnnotationPasses = [AnnotateIOMemoryLevel(defaultMemory), AnnotateDefaultMemoryLevel(memoryHierarchy)] From bff86683b022bbee34b0a75919be874dcbb0c94f Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:30:30 +0100 Subject: [PATCH 37/54] Remove some trailing white space in CHANGELOG.md --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a567305e2b..6b6ee83f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -177,9 +177,9 @@ This release containing major architectural changes, new platform support, enhan ### Added -- BatchNorm kernel -- ConvTranspose kernel -- MaxPool1D kernel +- BatchNorm kernel +- ConvTranspose kernel +- MaxPool1D kernel - Template for 1D Convolution - Support for float32 data type in the previous kernels - Float binding for Pad1D kernel @@ -318,7 +318,7 @@ This release containing major architectural changes, new platform support, enhan ### Changed - FloatConvTemplate file -- Platform.py file +- Platform.py file - Bump the CMake version to 3.24 as required for the chimera-sdk - Bump GVSoC's version and add chimera simulation target - Rename the generic source util to utils to avoid name collision with chimera-sdk From 5ac4e316398e333e5573f8cff07bfd64cd5d76a7 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:55:49 +0100 Subject: [PATCH 38/54] Add try canonicalization exceptions --- Deeploy/DeeployTypes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index aecb112b57..2bf9452ade 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -2738,7 +2738,10 @@ def _bindLayers(self): assert node.op in self.operatorDescriptors, \ f"[ERROR] Error parsing node {node.name}. There is no descriptor for operator {node.op}." desc = self.operatorDescriptors[node.op] - desc.canonicalize(node, self.graph.opset) + try: + desc.canonicalize(node, self.graph.opset) + except BaseException as e: + raise ValueError(f"[ERROR] Node {node.name} of op {node.op} could not be canonicalized.") from e assert desc.check(node), \ f"[ERROR] Node {node.name} is not a valid instance of {node.op} operator" From 2f871d476ea5019ba9e9884e2626dffe1b286324 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:56:10 +0100 Subject: [PATCH 39/54] Make IntegerDataTypes a tuple --- Deeploy/CommonExtensions/DataTypes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Deeploy/CommonExtensions/DataTypes.py b/Deeploy/CommonExtensions/DataTypes.py index 4f6dba3827..c05ea3b9d9 100644 --- a/Deeploy/CommonExtensions/DataTypes.py +++ b/Deeploy/CommonExtensions/DataTypes.py @@ -87,11 +87,11 @@ class float64_t(FloatImmediate): SignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (int8_t, int16_t, int32_t, int64_t) UnsignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (uint8_t, uint16_t, uint32_t, uint64_t) -IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (sorted(( - *SignedIntegerDataTypes, - *UnsignedIntegerDataTypes, -), - key = lambda _type: _type.typeWidth)) +IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = tuple( + sorted(( + *SignedIntegerDataTypes, + *UnsignedIntegerDataTypes, + ), key = lambda _type: _type.typeWidth)) FloatDataTypes: Tuple[Type[FloatImmediate], ...] = (bfloat16_t, float16_t, float32_t, float64_t) From 31577c31c95e36decbdd12dfe616521df2795a70 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 13:57:00 +0100 Subject: [PATCH 40/54] Fix reshape bindings (which are used for squeeze/unsqeeze too) to type map axes to int64_t as per onnx --- Deeploy/Targets/PULPOpen/Bindings.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 9ff940b2f0..57fdf90a57 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -9,8 +9,8 @@ from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureGeneration, MemoryAwareClosureGeneration from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration -from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \ - uint8_t +from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ + int8_t, int32_t, int64_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration @@ -153,11 +153,8 @@ ] PULPReshapeBindings = [ - NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int32_t)], [PointerClass(type)]), - ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes -] + [ - NodeBinding(ReshapeChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]), - ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes + NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int64_t)], [PointerClass(type)]), + ReshapeTemplate.referenceTemplate, SkipTransformer) for type in IntegerDataTypes + FloatDataTypes ] PULPRQAddBindings = [ From 90102f5eac819838252446fc5a0ab5513f09903e Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 14:15:48 +0100 Subject: [PATCH 41/54] Canonicalize (un)squeeze operations as pre-opset-13, i.e., put axes into node attributes to ommit creating a buffer for it --- Deeploy/OperatorDescriptor.py | 18 +++++++++++-- Deeploy/Targets/Generic/Parsers.py | 43 +++++++----------------------- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index ff1fbcf3fd..a0cb483589 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -476,15 +476,29 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [AttrDesc("axis", IntUnpack, default = 0)], ) + +class SqueezeDescriptor(OperatorDescriptor): + + def canonicalize(self, node: gs.Node, opset: int) -> bool: + if opset >= 13: + assert len(node.inputs) == 2, f"Expected 2 inputs but received {len(node.inputs)}" + axes = node.inputs[1] + assert isinstance(axes, + gs.Constant), f"Expected axes to be a constant but received axes of type {type(axes)}" + node.attrs["axes"] = axes.values + axes.outputs.clear() + return super().canonicalize(node, opset) + + # Opset <= 11 -unsqueezeDesc = OperatorDescriptor( +unsqueezeDesc = SqueezeDescriptor( inputDescriptor = IoDesc("data_in"), outputDescriptor = IoDesc("data_out"), attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], ) # Opset <= 11 -squeezeDesc = OperatorDescriptor( +squeezeDesc = SqueezeDescriptor( inputDescriptor = IoDesc("data_in"), outputDescriptor = IoDesc("data_out"), attrDescriptors = [AttrDesc("axes", IntTupleUnpack)], diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ad3bad549d..ddd08a8551 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -986,48 +986,23 @@ def __init__(self): super().__init__() def parseNode(self, node: gs.Node) -> (bool): + if not all(['axes' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1]): + return False - # ONNX v11: 'axes' is a node attribute - if 'axes' in node.attrs: - ret = all(['axes' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1]) - # ONNX v13+: 'axes' becomes an input with the data - # Source: https://onnx.ai/onnx/operators/onnx__Unsqueeze.html - else: - ret = all([len(node.inputs) == 2, len(node.outputs) == 1]) - - if ret and 'axes' in node.attrs: - axes_attr = node.attrs['axes'] - self.operatorRepresentation['axes'] = [int(axes_attr)] if isinstance(axes_attr, int) \ - else [int(a) for a in axes_attr] - # For opset 13+, axes will be extracted from the second input in parseNodeCtxt - - return ret + self.operatorRepresentation['axes'] = node.attrs['axes'] + return True def parseNodeCtxt(self, ctxt: NetworkContext, node: gs.Node, channels_first: bool = True) -> Tuple[NetworkContext, bool]: + inputs = ['data_in'] + for idx, inputNode in enumerate(node.inputs): + self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name outputs = ['data_out'] - if len(node.inputs) == 1: - inputs = ['data_in'] - for idx, inputNode in enumerate(node.inputs): - self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name - for idx, outputNode in enumerate(node.outputs): - self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name - else: - data_in = ctxt.lookup(node.inputs[0].name) - data_out = ctxt.lookup(node.outputs[0].name) - self.operatorRepresentation['data_in'] = data_in.name - self.operatorRepresentation['data_out'] = data_out.name - # axes must be a constant; extract values - axes_buf = ctxt.lookup(node.inputs[1].name) - assert hasattr(axes_buf, 'values'), "Unsqueeze: expected constant 'axes' input for opset 13+" - axes_vals = np.array(axes_buf.values).astype(int).flatten().tolist() - self.operatorRepresentation['axes'] = axes_vals - # Do not deploy the axes tensor - axes_buf._live = False - axes_buf._deploy = False + for idx, outputNode in enumerate(node.outputs): + self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name return ctxt, True From 7bd7353df3ca04e1414164f2025ea9a892eff2e7 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:37:50 +0100 Subject: [PATCH 42/54] Add BatchNormalization descriptor --- Deeploy/OperatorDescriptor.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index a0cb483589..9d74a32aec 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -734,8 +734,19 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: attrDescriptors = [], ) +batchNormalizationDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "scale", "bias", "mean", "variance"]), + outputDescriptor = IoDesc(["data_out"], optional = ["running_mean", "running_var"]), + attrDescriptors = [ + AttrDesc("epsilon", FloatUnpack, default = 1e-5), + AttrDesc("momentum", FloatUnpack, default = 0.9), + AttrDesc("training_mode", BoolUnpack, default = False), + ], +) + defaultOperatorDescriptors: Dict[str, OperatorDescriptor] = { "Add": addDesc, + "BatchNormalization": batchNormalizationDesc, "CLCA": clcaDesc, "Concat": concatDesc, "Conv": convDesc, From 16bc4630c2e4ac4f71f2a798c8c7f5bae25d3023 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:38:11 +0100 Subject: [PATCH 43/54] Add ConvTranspose descriptor --- Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 9d74a32aec..fbf333cf1f 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -371,6 +371,25 @@ def _padsDefault(node: gs.Node) -> Tuple[int, ...]: ], ) +convTransposeDesc = OperatorDescriptor( + inputDescriptor = IoDesc(["data_in", "weight"], optional = "bias"), + outputDescriptor = IoDesc("data_out"), + attrDescriptors = [ + AttrDesc("auto_pad", AutoPad, default = AutoPad.NOTSET), + AttrDesc("dilations", IntTupleUnpack, default = _dilationsDefault), + AttrDesc("group", IntUnpack, default = 1), + AttrDesc("kernel_shape", IntTupleUnpack, default = _kernelShapeDefault), + # TODO: Add output_shape and output_padding default functions. + # Docs: + # - ONNX: https://onnx.ai/onnx/operators/onnx__ConvTranspose.html + # - PyTorch: https://docs.pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html + # AttrDesc("output_shape", IntTupleUnpack, default = _outputShapeDefault), + # AttrDesc("output_padding", IntTupleUnpack, default = _outputPaddingDefault), + AttrDesc("pads", IntTupleUnpack, default = _padsDefault), + AttrDesc("strides", IntTupleUnpack, default = _stridesDefault), + ], +) + class RequantizedOperatorDescriptor(OperatorDescriptor): @@ -750,6 +769,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: "CLCA": clcaDesc, "Concat": concatDesc, "Conv": convDesc, + "ConvTranspose": convTransposeDesc, "DebugPrint": debugPrintDesc, "Dequant": dequantDesc, "Div": divDesc, From d865898cefe487c83676282f61e7d2740e24f16e Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:50:43 +0100 Subject: [PATCH 44/54] Relax opset check on squeeze operations to a warning --- Deeploy/OperatorDescriptor.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index fbf333cf1f..0e93a07ed4 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -9,6 +9,7 @@ import onnx_graphsurgeon as gs from Deeploy.DeeployTypes import AttrDesc, IoDesc, OperatorDescriptor, VariadicIoDesc +from Deeploy.Logging import DEFAULT_LOGGER as log def IntUnpack(value: Any) -> int: @@ -499,13 +500,24 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: class SqueezeDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: - if opset >= 13: - assert len(node.inputs) == 2, f"Expected 2 inputs but received {len(node.inputs)}" + if len(node.inputs) == 2: axes = node.inputs[1] - assert isinstance(axes, - gs.Constant), f"Expected axes to be a constant but received axes of type {type(axes)}" + assert isinstance(axes, gs.Constant), \ + f"Expected axes to be a constant but received axes of type {type(axes)}" node.attrs["axes"] = axes.values axes.outputs.clear() + + if opset >= 13 and len(node.inputs) != 2: + log.warning( + "Squeeze operation expects 2 inputs for opset >= 13. " + f"Received node {node.name} with {len(node.inputs)} input{'s' if len(node.inputs) > 1 else ''} and opset {opset}" + ) + elif opset < 13 and len(node.inputs) != 1: + log.warning( + "Squeeze operation expects 1 input for opset < 13. " + f"Received node {node.name} with {len(node.inputs)} input{'s' if len(node.inputs) > 1 else ''} and opset {opset}" + ) + return super().canonicalize(node, opset) From cd62a695cb523db2dc207b6c876b10424c3b32d3 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 16:56:21 +0100 Subject: [PATCH 45/54] Replace prints with logging --- Deeploy/DeeployTypes.py | 13 +++++-------- Deeploy/OperatorDescriptor.py | 9 +++------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 2bf9452ade..30c06548c0 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1101,19 +1101,16 @@ def check(self, node: gs.Node) -> bool: valid = True if not self.inputDescriptor.checkTensors(node.inputs): - # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") + log.error(f"[OP {node.op}] Invalid input tensors: {[t.name for t in node.inputs]}") valid = False if not self.outputDescriptor.checkTensors(node.outputs): - # TODO: Change to logging - print(f"[ERROR OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") + log.error(f"[OP {node.op}] Invalid output tensors: {[t.name for t in node.outputs]}") valid = False for attrDesc in self.attrDescriptors: if attrDesc.default is None and not attrDesc.name in node.attrs: - # TODO: Change to logging - print(f"[ERROR OP {node.op}] Missing attribute {attrDesc.name}") + log.error(f"[OP {node.op}] Missing attribute {attrDesc.name}") valid = False return valid @@ -1128,7 +1125,7 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: try: node.attrs[desc.name] = desc.unpack(value) except Exception as e: - raise ValueError(f"[ERROR OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e + raise ValueError(f"[OP {node.op}] Error unpacking the attribute {desc.name}. {e}") from e return True def parseTensors(self, ctxt: NetworkContext, tensors: Sequence[gs.Tensor], @@ -1158,7 +1155,7 @@ def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: firstKeySet = set(firstOpRepr.keys()) secondKeySet = set(secondOpRepr.keys()) assert firstKeySet.isdisjoint(secondKeySet), \ - f"[PARSE ERROR] (Node: {node.name}, Op: {node.op}) " \ + f"[OP {node.op}] Encourntered error while parsing node {node.name}. " \ f"Keys from parsing {firstName} clash with the keys from parsing {secondName}. "\ f"Overlapping keys: {firstKeySet ^ secondKeySet}" diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 0e93a07ed4..4ebab580a7 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -396,8 +396,7 @@ class RequantizedOperatorDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if "n_levels_out" in node.attrs and "n_levels" in node.attrs: - # TODO: Change to log - print("[WARNING] Requantized operator cannot have n_levels_out and n_levels in its attributes") + log.warning("Requantized operator cannot have n_levels_out and n_levels in its attributes") return False if "n_levels_out" in node.attrs: @@ -711,10 +710,8 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: n_levels = f"{tensor}_n_levels" n_levels_out = f"{tensor}_n_levels_out" if n_levels_out in node.attrs and n_levels in node.attrs: - # TODO: Change to log - print( - f"[WARNING] RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes" - ) + log.warning( + f"RequantizedAdd tensor {tensor} cannot have {n_levels_out} and {n_levels} in its attributes") return False if n_levels_out in node.attrs: From 91bdeb7f4573dbee5a97a650ef464d3b62f60c7c Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:02:59 +0100 Subject: [PATCH 46/54] Add missing itertools import --- Deeploy/DeeployTypes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 30c06548c0..b2afde7410 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -5,6 +5,7 @@ from __future__ import annotations import copy +import itertools import math import os import pickle From 238d3affd422e61cc65774a698bac093a8ce370c Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:03:33 +0100 Subject: [PATCH 47/54] Initialize optional value with None --- Deeploy/DeeployTypes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index b2afde7410..166c91289c 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1023,10 +1023,11 @@ def copy(self) -> NetworkContext: class IoDesc: - def __init__(self, required: Union[str, List[str]], optional: Union[str, List[str]] = []) -> None: + def __init__(self, required: Union[str, List[str]], optional: Optional[Union[str, List[str]]] = None) -> None: if isinstance(required, str): required = [required] self.required = required + optional = optional if optional is not None else [] if isinstance(optional, str): optional = [optional] self.optional = optional From a4198b433517eeb5e0068063e04e8722da203f55 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:04:34 +0100 Subject: [PATCH 48/54] Fix typo --- Deeploy/DeeployTypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index 166c91289c..6b63697d2f 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -1149,7 +1149,7 @@ def parseAttrs(self, node: gs.Node) -> OperatorRepresentation: def parse(self, ctxt: NetworkContext, node: gs.Node) -> OperatorRepresentation: opReprs = { "input tensors": self.parseTensors(ctxt, node.inputs, self.inputDescriptor), - "output tesnors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), + "output tensors": self.parseTensors(ctxt, node.outputs, self.outputDescriptor), "attributes": self.parseAttrs(node), } From e8f1721bcf45c6c05efb921b1b54f65b9c1c5678 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:07:08 +0100 Subject: [PATCH 49/54] Explicit exception coverage --- Deeploy/OperatorDescriptor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 4ebab580a7..3af145cd43 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -55,7 +55,7 @@ def FloatTupleUnpack(value: Any) -> Tuple[float, ...]: def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: try: return IntUnpack(value) - except: + except ValueError: return IntTupleUnpack(value) From f180f85348c3f74c90cc97823069722b7d332d19 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:11:31 +0100 Subject: [PATCH 50/54] Rename attrToTensor to attrToInputTensor and add inputTensorToAttr --- Deeploy/OperatorDescriptor.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 3af145cd43..7f283708c0 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -59,7 +59,7 @@ def IntTupleIfNotSingleItemUnpack(value: Any) -> Union[int, Tuple[int, ...]]: return IntTupleUnpack(value) -def attrToTensor(node: gs.Node, attr: str) -> None: +def attrToInputTensor(node: gs.Node, attr: str) -> None: values = node.attrs[attr] if isinstance(values, (int, float)): values = np.array([values]) @@ -71,6 +71,14 @@ def attrToTensor(node: gs.Node, attr: str) -> None: node.attrs.pop(attr) +def inputTensorToAttr(node: gs.Node, tensorIdx: int, attr: str) -> None: + tensor = node.inputs[tensorIdx] + assert isinstance(tensor, gs.Constant), \ + f"Can convert only constant tensors to attributes. Received tensor of type {tensor}" + node.attrs[attr] = tensor.values + tensor.outputs.clear() + + concatDesc = OperatorDescriptor( inputDescriptor = VariadicIoDesc("data_in", minNumTensors = 2), outputDescriptor = IoDesc("data_out"), @@ -91,10 +99,10 @@ class SliceDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if opset < 10: - attrToTensor(node, "starts") - attrToTensor(node, "ends") + attrToInputTensor(node, "starts") + attrToInputTensor(node, "ends") if "axes" in node.attrs: - attrToTensor(node, "axes") + attrToInputTensor(node, "axes") return super().canonicalize(node, opset) @@ -184,7 +192,7 @@ class ReduceMeanDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if opset < 18: if "axes" in node.attrs: - attrToTensor(node, "axes") + attrToInputTensor(node, "axes") return super().canonicalize(node, opset) From bc75e85564ea1401eeae2f4f2ecb1bfab9f82f22 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:17:17 +0100 Subject: [PATCH 51/54] Use inputTensorToAttr in squeeze canonicalization --- Deeploy/OperatorDescriptor.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 7f283708c0..6bf6b6ca30 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -508,11 +508,7 @@ class SqueezeDescriptor(OperatorDescriptor): def canonicalize(self, node: gs.Node, opset: int) -> bool: if len(node.inputs) == 2: - axes = node.inputs[1] - assert isinstance(axes, gs.Constant), \ - f"Expected axes to be a constant but received axes of type {type(axes)}" - node.attrs["axes"] = axes.values - axes.outputs.clear() + inputTensorToAttr(node, tensorIdx = 1, attr = "axes") if opset >= 13 and len(node.inputs) != 2: log.warning( From 6976c52dc8368620e69164d43ca32eb03b2b851d Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:12:52 +0100 Subject: [PATCH 52/54] Remove duplicate attribute --- Deeploy/OperatorDescriptor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Deeploy/OperatorDescriptor.py b/Deeploy/OperatorDescriptor.py index 6bf6b6ca30..288a9de505 100644 --- a/Deeploy/OperatorDescriptor.py +++ b/Deeploy/OperatorDescriptor.py @@ -729,7 +729,6 @@ def canonicalize(self, node: gs.Node, opset: int) -> bool: inputDescriptor = IoDesc(["data_in_0", "data_in_1"]), outputDescriptor = IoDesc("data_out"), attrDescriptors = [ - AttrDesc("rqs1_n_levels", IntUnpack), AttrDesc("rqs1_mul", IntUnpack), AttrDesc("rqs1_add", IntUnpack), AttrDesc("rqs1_div", IntUnpack), From 97da07c29915bd7e2c3661212e02bb3a4f1a94df Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 17:37:08 +0100 Subject: [PATCH 53/54] Refactor MatMulTileConstraint --- .../TileConstraints/MatMulTileConstraint.py | 87 +++++++++++-------- 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py index a9259a15cf..c0f3b70461 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/MatMulTileConstraint.py @@ -19,42 +19,50 @@ class MatMulTileConstraint(TileConstraint): @staticmethod def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: - - # Get to-be-tiled tensor's buffers bufferA = ctxt.lookup(name = parseDict['A']) bufferB = ctxt.lookup(name = parseDict['B']) - outputBuffer = ctxt.lookup(name = parseDict['data_out']) + bufferOut = ctxt.lookup(name = parseDict['data_out']) # Add I/O dimensions to the model as variables - for _buffer in [bufferA, bufferB, outputBuffer]: - tilerModel.addTensorDimToModel(ctxt, _buffer.name) - - tensorsShapeLen = len(bufferA.shape) - - AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 2) + int(parseDict['transA'])) - ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - int(parseDict['transA'])) - BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + int(parseDict['transB'])) - BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - int(parseDict['transB'])) - outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 2)) - outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = (tensorsShapeLen - 1)) - - # Map output dims to inputs dims - for idx in range(tensorsShapeLen - 2): - tilerModel.addConstraint( - tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = idx) == tilerModel.getTensorDimVar( - tensorName = bufferA.name, dimIdx = idx)) - tilerModel.addConstraint( - tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = idx) == tilerModel.getTensorDimVar( - tensorName = bufferB.name, dimIdx = idx)) + for buff in [bufferA, bufferB, bufferOut]: + tilerModel.addTensorDimToModel(ctxt, buff.name) + + rankA = len(bufferA.shape) + if not parseDict['transA']: + firstDimIdxA, secondDimIdxA = rankA - 2, rankA - 1 + else: + firstDimIdxA, secondDimIdxA = rankA - 1, rankA - 2 + AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = firstDimIdxA) + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA) + + rankB = len(bufferB.shape) + if not parseDict['transB']: + firstDimIdxB, secondDimIdxB = rankB - 2, rankB - 1 + else: + firstDimIdxB, secondDimIdxB = rankB - 1, rankB - 2 + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB) + BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = secondDimIdxB) + + rankOut = len(bufferOut.shape) + outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 2) + outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = rankOut - 1) + + # Map batch dims between A and output + batchDimsA = rankA - 2 + for dimIdx in range(batchDimsA): + varA = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimIdx) + varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankA) + dimIdx) + tilerModel.addConstraint(varOut == varA) + + # Map batch dims between B and output + batchDimsB = rankB - 2 + for dimIdx in range(batchDimsB): + varB = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimIdx) + varOut = tilerModel.getTensorDimVar(tensorName = bufferOut.name, dimIdx = (rankOut - rankB) + dimIdx) + tilerModel.addConstraint(varOut == varB) tilerModel.addConstraint(outputFirstDimVar == AFirstDimVar) tilerModel.addConstraint(outputSecondDimVar == BSecondDimVar) - - # Add GEMM Geometrical constraints tilerModel.addConstraint(ASecondDimVar == BFirstDimVar) return tilerModel @@ -65,14 +73,19 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo bufferA = ctxt.lookup(name = parseDict['A']) bufferB = ctxt.lookup(name = parseDict['B']) - tensorsShapeLen = len(bufferA.shape) - - ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transA']) - BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 2) + parseDict['transB']) - BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, - dimIdx = (tensorsShapeLen - 1) - parseDict['transB']) + rankA = len(bufferA.shape) + if not parseDict['transA']: + _, secondDimIdxA = rankA - 2, rankA - 1 + else: + _, secondDimIdxA = rankA - 1, rankA - 2 + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = secondDimIdxA) + + rankB = len(bufferB.shape) + if not parseDict['transB']: + firstDimIdxB, _ = rankB - 2, rankB - 1 + else: + firstDimIdxB, _ = rankB - 1, rankB - 2 + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = firstDimIdxB) # VIC: We don't want to deal with intermediate results between kernel calls tilerModel.addConstraint(ASecondDimVar == parseDict['N']) From 0c64a3eb587aeeb703a63512057e32e182b4f0e3 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Mon, 27 Oct 2025 19:16:30 +0100 Subject: [PATCH 54/54] Remove duplicate attributes and check that the value is positive --- Deeploy/Targets/Generic/Parsers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ddd08a8551..edbb2bc917 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -1436,11 +1436,8 @@ def parseNode(self, node: gs.Node) -> (bool): # All *_div attrs are log2d-ified log2Attrs = [ "preattn_requant_div", - "preattn_requant_div", - "normalizer_requant_div", "normalizer_requant_div", "postattn_requant_div", - "postattn_requant_div", "wo_requant_div", "wq_requant_div", "wk_requant_div", @@ -1449,7 +1446,8 @@ def parseNode(self, node: gs.Node) -> (bool): for attr in log2Attrs: value = self.operatorRepresentation[attr] - assert isinstance(value, int) + assert isinstance( + value, int) and value > 0, f"Attribute {attr} must be a positive integer. Received value {value}" self.operatorRepresentation[attr] = int(math.log2(value)) return ret